--- linux-2.6.6-rc1/arch/alpha/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/alpha/kernel/process.c 2004-04-18 22:25:54.243584200 -0700 @@ -510,12 +510,6 @@ thread_saved_pc(task_t *t) return 0; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { @@ -534,7 +528,7 @@ get_wchan(struct task_struct *p) */ pc = thread_saved_pc(p); - if (pc >= first_sched && pc < last_sched) { + if (in_sched_functions(pc)) { schedule_frame = ((unsigned long *)p->thread_info->pcb.ksp)[6]; return ((unsigned long *)schedule_frame)[12]; } --- linux-2.6.6-rc1/arch/alpha/kernel/setup.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/alpha/kernel/setup.c 2004-04-18 22:25:35.884375224 -0700 @@ -122,7 +122,6 @@ static void get_sysnames(unsigned long, static void determine_cpu_caches (unsigned int); static char command_line[COMMAND_LINE_SIZE]; -char saved_command_line[COMMAND_LINE_SIZE]; /* * The format of "screen_info" is strange, and due to early --- linux-2.6.6-rc1/arch/arm26/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/arm26/kernel/process.c 2004-04-18 22:25:54.244584048 -0700 @@ -397,12 +397,6 @@ pid_t kernel_thread(int (*fn)(void *), v return __ret; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { unsigned long fp, lr; @@ -417,7 +411,7 @@ unsigned long get_wchan(struct task_stru if (fp < stack_page || fp > 4092+stack_page) return 0; lr = pc_pointer (((unsigned long *)fp)[-1]); - if (lr < first_sched || lr > last_sched) + if (!in_sched_functions(lr)) return lr; fp = *(unsigned long *) (fp - 12); } while (count ++ < 16); --- linux-2.6.6-rc1/arch/arm26/kernel/setup.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/arm26/kernel/setup.c 2004-04-18 22:25:35.885375072 -0700 @@ -76,7 +76,6 @@ struct processor processor; unsigned char aux_device_present; char elf_platform[ELF_PLATFORM_SIZE]; -char saved_command_line[COMMAND_LINE_SIZE]; unsigned long phys_initrd_start __initdata = 0; unsigned long phys_initrd_size __initdata = 0; --- linux-2.6.6-rc1/arch/arm/boot/compressed/Makefile 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/arm/boot/compressed/Makefile 2004-04-18 22:25:24.631085984 -0700 @@ -66,7 +66,7 @@ targets := vmlinux vmlinux.lds pig EXTRA_CFLAGS := -fpic EXTRA_AFLAGS := -LDFLAGS_vmlinux := -p -X \ +LDFLAGS_vmlinux := -p --no-undefined -X \ $(shell $(CC) $(CFLAGS) --print-libgcc-file-name) -T $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.o \ --- linux-2.6.6-rc1/arch/arm/kernel/armksyms.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/arm/kernel/armksyms.c 2004-04-18 22:25:24.631085984 -0700 @@ -73,8 +73,6 @@ extern void fp_init(union fp_state *); * This has a special calling convention; it doesn't * modify any of the usual registers, except for LR. */ -extern void __do_softirq(void); - #define EXPORT_SYMBOL_ALIAS(sym,orig) \ const struct kernel_symbol __ksymtab_##sym \ __attribute__((section("__ksymtab"))) = \ @@ -88,7 +86,6 @@ EXPORT_SYMBOL_ALIAS(kern_fp_enter,fp_ent EXPORT_SYMBOL_ALIAS(fp_printk,printk); EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig); -EXPORT_SYMBOL_NOVERS(__do_softirq); EXPORT_SYMBOL_NOVERS(__backtrace); /* platform dependent support */ --- linux-2.6.6-rc1/arch/arm/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/arm/kernel/process.c 2004-04-18 22:25:54.245583896 -0700 @@ -411,12 +411,6 @@ pid_t kernel_thread(int (*fn)(void *), v return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { unsigned long fp, lr; @@ -431,7 +425,7 @@ unsigned long get_wchan(struct task_stru if (fp < stack_page || fp > 4092+stack_page) return 0; lr = pc_pointer (((unsigned long *)fp)[-1]); - if (lr < first_sched || lr > last_sched) + if (!in_sched_functions(lr)) return lr; fp = *(unsigned long *) (fp - 12); } while (count ++ < 16); --- linux-2.6.6-rc1/arch/arm/kernel/ptrace.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/arm/kernel/ptrace.c 2004-04-18 22:25:24.632085832 -0700 @@ -526,7 +526,7 @@ core_initcall(ptrace_break_init); * actually access the pt_regs stored on the kernel stack. */ static int ptrace_read_user(struct task_struct *tsk, unsigned long off, - unsigned long *ret) + unsigned long __user *ret) { unsigned long tmp; @@ -559,7 +559,7 @@ static int ptrace_write_user(struct task /* * Get all user integer registers. */ -static int ptrace_getregs(struct task_struct *tsk, void *uregs) +static int ptrace_getregs(struct task_struct *tsk, void __user *uregs) { struct pt_regs *regs = get_user_regs(tsk); @@ -569,7 +569,7 @@ static int ptrace_getregs(struct task_st /* * Set all user integer registers. */ -static int ptrace_setregs(struct task_struct *tsk, void *uregs) +static int ptrace_setregs(struct task_struct *tsk, void __user *uregs) { struct pt_regs newregs; int ret; @@ -591,7 +591,7 @@ static int ptrace_setregs(struct task_st /* * Get the child FPU state. */ -static int ptrace_getfpregs(struct task_struct *tsk, void *ufp) +static int ptrace_getfpregs(struct task_struct *tsk, void __user *ufp) { return copy_to_user(ufp, &tsk->thread_info->fpstate, sizeof(struct user_fp)) ? -EFAULT : 0; @@ -600,7 +600,7 @@ static int ptrace_getfpregs(struct task_ /* * Set the child FPU state. */ -static int ptrace_setfpregs(struct task_struct *tsk, void *ufp) +static int ptrace_setfpregs(struct task_struct *tsk, void __user *ufp) { struct thread_info *thread = tsk->thread_info; thread->used_cp[1] = thread->used_cp[2] = 1; @@ -628,7 +628,7 @@ static int do_ptrace(int request, struct break; case PTRACE_PEEKUSR: - ret = ptrace_read_user(child, addr, (unsigned long *)data); + ret = ptrace_read_user(child, addr, (unsigned long __user *)data); break; /* @@ -704,19 +704,19 @@ static int do_ptrace(int request, struct break; case PTRACE_GETREGS: - ret = ptrace_getregs(child, (void *)data); + ret = ptrace_getregs(child, (void __user *)data); break; case PTRACE_SETREGS: - ret = ptrace_setregs(child, (void *)data); + ret = ptrace_setregs(child, (void __user *)data); break; case PTRACE_GETFPREGS: - ret = ptrace_getfpregs(child, (void *)data); + ret = ptrace_getfpregs(child, (void __user *)data); break; case PTRACE_SETFPREGS: - ret = ptrace_setfpregs(child, (void *)data); + ret = ptrace_setfpregs(child, (void __user *)data); break; default: --- linux-2.6.6-rc1/arch/arm/kernel/setup.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/arm/kernel/setup.c 2004-04-18 22:25:35.886374920 -0700 @@ -81,7 +81,6 @@ struct cpu_cache_fns cpu_cache; unsigned char aux_device_present; char elf_platform[ELF_PLATFORM_SIZE]; -char saved_command_line[COMMAND_LINE_SIZE]; unsigned long phys_initrd_start __initdata = 0; unsigned long phys_initrd_size __initdata = 0; --- linux-2.6.6-rc1/arch/arm/kernel/signal.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/arm/kernel/signal.c 2004-04-18 22:25:24.633085680 -0700 @@ -76,7 +76,7 @@ asmlinkage int sys_sigsuspend(int restar } asmlinkage int -sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs *regs) +sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, struct pt_regs *regs) { sigset_t saveset, newset; @@ -104,8 +104,8 @@ sys_rt_sigsuspend(sigset_t *unewset, siz } asmlinkage int -sys_sigaction(int sig, const struct old_sigaction *act, - struct old_sigaction *oact) +sys_sigaction(int sig, const struct old_sigaction __user *act, + struct old_sigaction __user *oact) { struct k_sigaction new_ka, old_ka; int ret; @@ -147,15 +147,15 @@ struct sigframe struct rt_sigframe { - struct siginfo *pinfo; - void *puc; + struct siginfo __user *pinfo; + void __user *puc; struct siginfo info; struct ucontext uc; unsigned long retcode; }; static int -restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc) +restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { int err = 0; @@ -184,7 +184,7 @@ restore_sigcontext(struct pt_regs *regs, asmlinkage int sys_sigreturn(struct pt_regs *regs) { - struct sigframe *frame; + struct sigframe __user *frame; sigset_t set; /* Always make any pending restarted system calls return -EINTR */ @@ -198,7 +198,7 @@ asmlinkage int sys_sigreturn(struct pt_r if (regs->ARM_sp & 7) goto badframe; - frame = (struct sigframe *)regs->ARM_sp; + frame = (struct sigframe __user *)regs->ARM_sp; if (verify_area(VERIFY_READ, frame, sizeof (*frame))) goto badframe; @@ -232,7 +232,7 @@ badframe: asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) { - struct rt_sigframe *frame; + struct rt_sigframe __user *frame; sigset_t set; /* Always make any pending restarted system calls return -EINTR */ @@ -246,7 +246,7 @@ asmlinkage int sys_rt_sigreturn(struct p if (regs->ARM_sp & 7) goto badframe; - frame = (struct rt_sigframe *)regs->ARM_sp; + frame = (struct rt_sigframe __user *)regs->ARM_sp; if (verify_area(VERIFY_READ, frame, sizeof (*frame))) goto badframe; @@ -276,7 +276,7 @@ badframe: } static int -setup_sigcontext(struct sigcontext *sc, /*struct _fpstate *fpstate,*/ +setup_sigcontext(struct sigcontext __user *sc, /*struct _fpstate *fpstate,*/ struct pt_regs *regs, unsigned long mask) { int err = 0; @@ -307,7 +307,7 @@ setup_sigcontext(struct sigcontext *sc, return err; } -static inline void * +static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize) { unsigned long sp = regs->ARM_sp; @@ -321,12 +321,12 @@ get_sigframe(struct k_sigaction *ka, str /* * ATPCS B01 mandates 8-byte alignment */ - return (void *)((sp - framesize) & ~7); + return (void __user *)((sp - framesize) & ~7); } static int setup_return(struct pt_regs *regs, struct k_sigaction *ka, - unsigned long *rc, void *frame, int usig) + unsigned long __user *rc, void __user *frame, int usig) { unsigned long handler = (unsigned long)ka->sa.sa_handler; unsigned long retcode; @@ -387,7 +387,7 @@ setup_return(struct pt_regs *regs, struc static int setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs) { - struct sigframe *frame = get_sigframe(ka, regs, sizeof(*frame)); + struct sigframe __user *frame = get_sigframe(ka, regs, sizeof(*frame)); int err = 0; if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) @@ -410,7 +410,7 @@ static int setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe *frame = get_sigframe(ka, regs, sizeof(*frame)); + struct rt_sigframe __user *frame = get_sigframe(ka, regs, sizeof(*frame)); int err = 0; if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) --- linux-2.6.6-rc1/arch/arm/kernel/sys_arm.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/arm/kernel/sys_arm.c 2004-04-18 22:25:24.634085528 -0700 @@ -37,7 +37,7 @@ extern unsigned long do_mremap(unsigned * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way unix traditionally does this, though. */ -asmlinkage int sys_pipe(unsigned long * fildes) +asmlinkage int sys_pipe(unsigned long __user *fildes) { int fd[2]; int error; @@ -94,7 +94,7 @@ struct mmap_arg_struct { unsigned long offset; }; -asmlinkage int old_mmap(struct mmap_arg_struct *arg) +asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) { int error = -EFAULT; struct mmap_arg_struct a; @@ -141,11 +141,11 @@ out: struct sel_arg_struct { unsigned long n; - fd_set *inp, *outp, *exp; - struct timeval *tvp; + fd_set __user *inp, *outp, *exp; + struct timeval __user *tvp; }; -asmlinkage int old_select(struct sel_arg_struct *arg) +asmlinkage int old_select(struct sel_arg_struct __user *arg) { struct sel_arg_struct a; @@ -160,7 +160,8 @@ asmlinkage int old_select(struct sel_arg * * This is really horribly ugly. */ -asmlinkage int sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth) +asmlinkage int sys_ipc(uint call, int first, int second, int third, + void __user *ptr, long fifth) { int version, ret; @@ -169,28 +170,28 @@ asmlinkage int sys_ipc (uint call, int f switch (call) { case SEMOP: - return sys_semop (first, (struct sembuf *)ptr, second); + return sys_semop(first, (struct sembuf __user *)ptr, second); case SEMGET: return sys_semget (first, second, third); case SEMCTL: { union semun fourth; if (!ptr) return -EINVAL; - if (get_user(fourth.__pad, (void **) ptr)) + if (get_user(fourth.__pad, (void __user **) ptr)) return -EFAULT; return sys_semctl (first, second, third, fourth); } case MSGSND: - return sys_msgsnd (first, (struct msgbuf *) ptr, - second, third); + return sys_msgsnd(first, (struct msgbuf __user *) ptr, + second, third); case MSGRCV: switch (version) { case 0: { struct ipc_kludge tmp; if (!ptr) return -EINVAL; - if (copy_from_user(&tmp,(struct ipc_kludge *) ptr, + if (copy_from_user(&tmp,(struct ipc_kludge __user *)ptr, sizeof (tmp))) return -EFAULT; return sys_msgrcv (first, tmp.msgp, second, @@ -198,36 +199,36 @@ asmlinkage int sys_ipc (uint call, int f } default: return sys_msgrcv (first, - (struct msgbuf *) ptr, + (struct msgbuf __user *) ptr, second, fifth, third); } case MSGGET: return sys_msgget ((key_t) first, second); case MSGCTL: - return sys_msgctl (first, second, (struct msqid_ds *) ptr); + return sys_msgctl(first, second, (struct msqid_ds __user *)ptr); case SHMAT: switch (version) { default: { ulong raddr; - ret = do_shmat (first, (char *) ptr, second, &raddr); + ret = do_shmat(first, (char __user *)ptr, second, &raddr); if (ret) return ret; - return put_user (raddr, (ulong *) third); + return put_user(raddr, (ulong __user *)third); } case 1: /* iBCS2 emulator entry point */ if (!segment_eq(get_fs(), get_ds())) return -EINVAL; - return do_shmat (first, (char *) ptr, - second, (ulong *) third); + return do_shmat(first, (char __user *) ptr, + second, (ulong __user *) third); } case SHMDT: - return sys_shmdt ((char *)ptr); + return sys_shmdt ((char __user *)ptr); case SHMGET: return sys_shmget (first, second, third); case SHMCTL: return sys_shmctl (first, second, - (struct shmid_ds *) ptr); + (struct shmid_ds __user *) ptr); default: return -ENOSYS; } @@ -266,7 +267,8 @@ asmlinkage int sys_vfork(struct pt_regs /* sys_execve() executes a new program. * This is called indirectly via a small wrapper */ -asmlinkage int sys_execve(char *filenamei, char **argv, char **envp, struct pt_regs *regs) +asmlinkage int sys_execve(char __user *filenamei, char __user * __user *argv, + char __user * __user *envp, struct pt_regs *regs) { int error; char * filename; --- linux-2.6.6-rc1/arch/arm/Makefile 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/arm/Makefile 2004-04-18 22:25:24.630086136 -0700 @@ -7,7 +7,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux :=-p -X +LDFLAGS_vmlinux :=-p --no-undefined -X LDFLAGS_BLOB :=--format binary AFLAGS_vmlinux.lds.o = -DTEXTADDR=$(TEXTADDR) -DDATAADDR=$(DATAADDR) OBJCOPYFLAGS :=-O binary -R .note -R .comment -S --- linux-2.6.6-rc1/arch/arm/mm/alignment.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/arch/arm/mm/alignment.c 2004-04-18 22:25:24.635085376 -0700 @@ -112,10 +112,10 @@ proc_alignment_read(char *page, char **s return len; } -static int proc_alignment_write(struct file *file, const char *buffer, +static int proc_alignment_write(struct file *file, const char __user *buffer, unsigned long count, void *data) { - int mode; + char mode; if (count > 0) { if (get_user(mode, buffer)) --- linux-2.6.6-rc1/arch/arm/mm/fault-armv.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/arm/mm/fault-armv.c 2004-04-18 22:25:24.636085224 -0700 @@ -186,19 +186,20 @@ no_pmd: void __flush_dcache_page(struct page *page) { + struct address_space *mapping = page_mapping(page); struct mm_struct *mm = current->active_mm; struct list_head *l; __cpuc_flush_dcache_page(page_address(page)); - if (!page_mapping(page)) + if (!mapping) return; /* * With a VIVT cache, we need to also write back * and invalidate any user data. */ - list_for_each(l, &page->mapping->i_mmap_shared) { + list_for_each(l, &mapping->i_mmap_shared) { struct vm_area_struct *mpnt; unsigned long off; @@ -224,11 +225,15 @@ void __flush_dcache_page(struct page *pa static void make_coherent(struct vm_area_struct *vma, unsigned long addr, struct page *page, int dirty) { + struct address_space *mapping = page_mapping(page); struct list_head *l; struct mm_struct *mm = vma->vm_mm; unsigned long pgoff; int aliases = 0; + if (!mapping) + return; + pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); /* @@ -236,7 +241,7 @@ make_coherent(struct vm_area_struct *vma * space, then we need to handle them specially to maintain * cache coherency. */ - list_for_each(l, &page->mapping->i_mmap_shared) { + list_for_each(l, &mapping->i_mmap_shared) { struct vm_area_struct *mpnt; unsigned long off; --- linux-2.6.6-rc1/arch/arm/mm/mm-armv.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/arm/mm/mm-armv.c 2004-04-18 22:25:33.464743064 -0700 @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -232,7 +231,7 @@ void free_pgd_slow(pgd_t *pgd) pte = pmd_page(*pmd); pmd_clear(pmd); - pgtable_remove_rmap(pte); + dec_page_state(nr_page_table_pages); pte_free(pte); pmd_free(pmd); free: --- linux-2.6.6-rc1/arch/arm/tools/mach-types 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/arm/tools/mach-types 2004-04-18 22:25:24.636085224 -0700 @@ -6,7 +6,7 @@ # To add an entry into this database, please see Documentation/arm/README, # or contact rmk@arm.linux.org.uk # -# Last update: Sat Mar 13 14:36:30 2004 +# Last update: Thu Apr 15 10:14:37 2004 # # machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number # @@ -390,9 +390,9 @@ espd_4510b ARCH_ESPD_4510B ESPD_4510B mp1x ARCH_MP1X MP1X 379 at91rm9200tb ARCH_AT91RM9200TB AT91RM9200TB 380 adsvgx ARCH_ADSVGX ADSVGX 381 -omap1610 ARCH_OMAP1610 OMAP1610 382 +omap_h2 ARCH_OMAP_H2 OMAP_H2 382 pelee ARCH_PELEE PELEE 383 -e7xx ARCH_E7XX E7XX 384 +e740 MACH_E740 E740 384 iq80331 ARCH_IQ80331 IQ80331 385 versatile_pb ARCH_VERSATILE_PB VERSATILE_PB 387 kev7a400 MACH_KEV7A400 KEV7A400 388 @@ -485,3 +485,41 @@ phoenix MACH_PHOENIX PHOENIX 474 vr1000 MACH_VR1000 VR1000 475 deisterpxa MACH_DEISTERPXA DEISTERPXA 476 bcm1160 MACH_BCM1160 BCM1160 477 +pcm022 MACH_PCM022 PCM022 478 +adsgcx MACH_ADSGCX ADSGCX 479 +dreadnaught MACH_DREADNAUGHT DREADNAUGHT 480 +dm320 MACH_DM320 DM320 481 +markov MACH_MARKOV MARKOV 482 +cos7a400 MACH_COS7A400 COS7A400 483 +milano MACH_MILANO MILANO 484 +ue9328 MACH_UE9328 UE9328 485 +uex255 MACH_UEX255 UEX255 486 +ue2410 MACH_UE2410 UE2410 487 +a620 MACH_A620 A620 488 +ocelot MACH_OCELOT OCELOT 489 +cheetah MACH_CHEETAH CHEETAH 490 +omap_perseus2 MACH_OMAP_PERSEUS2 OMAP_PERSEUS2 491 +zvue MACH_ZVUE ZVUE 492 +roverp1 MACH_ROVERP1 ROVERP1 493 +asidial2 MACH_ASIDIAL2 ASIDIAL2 494 +s3c24a0 MACH_S3C24A0 S3C24A0 495 +e800 MACH_E800 E800 496 +e750 MACH_E750 E750 497 +s3c5500 MACH_S3C5500 S3C5500 498 +smdk5500 MACH_SMDK5500 SMDK5500 499 +signalsync MACH_SIGNALSYNC SIGNALSYNC 500 +nbc MACH_NBC NBC 501 +er4525 MACH_ER4525 ER4525 502 +netbookpro MACH_NETBOOKPRO NETBOOKPRO 503 +hw90200 MACH_HW90200 HW90200 504 +condor MACH_CONDOR CONDOR 505 +cup MACH_CUP CUP 506 +kite MACH_KITE KITE 507 +scb9328 MACH_SCB9328 SCB9328 508 +omap_h3 MACH_OMAP_H3 OMAP_H3 509 +omap_h4 MACH_OMAP_H4 OMAP_H4 510 +n10 MACH_N10 N10 511 +montajade MACH_MONTAJADE MONTAJADE 512 +sg560 MACH_SG560 SG560 513 +dp1000 MACH_DP1000 DP1000 514 +omap_osk MACH_OMAP_OSK OMAP_OSK 515 --- linux-2.6.6-rc1/arch/cris/arch-v10/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/cris/arch-v10/kernel/process.c 2004-04-18 22:25:54.245583896 -0700 @@ -217,8 +217,8 @@ asmlinkage int sys_execve(const char *fn * These bracket the sleeping functions.. */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) +#define first_sched ((unsigned long)__sched_text_start) +#define last_sched ((unsigned long)__sched_text_end) unsigned long get_wchan(struct task_struct *p) { --- linux-2.6.6-rc1/arch/cris/kernel/setup.c 2003-07-10 18:50:30.000000000 -0700 +++ 25/arch/cris/kernel/setup.c 2004-04-18 22:25:35.886374920 -0700 @@ -28,10 +28,7 @@ unsigned char aux_device_present; extern int root_mountflags; extern char _etext, _edata, _end; -#define COMMAND_LINE_SIZE 256 - static char command_line[COMMAND_LINE_SIZE] = { 0, }; - char saved_command_line[COMMAND_LINE_SIZE]; extern const unsigned long text_start, edata; /* set by the linker script */ extern unsigned long dram_start, dram_end; --- linux-2.6.6-rc1/arch/h8300/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/h8300/kernel/process.c 2004-04-18 22:25:59.640763704 -0700 @@ -261,12 +261,6 @@ out: return error; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long thread_saved_pc(struct task_struct *tsk) { return ((struct pt_regs *)tsk->thread.esp0)->pc; @@ -283,11 +277,11 @@ unsigned long get_wchan(struct task_stru stack_page = (unsigned long)p; fp = ((struct pt_regs *)p->thread.ksp)->er6; do { - if (fp < stack_page+sizeof(struct task_struct) || + if (fp < stack_page+sizeof(struct thread_info) || fp >= 8184+stack_page) return 0; pc = ((unsigned long *)fp)[1]; - if (pc < first_sched || pc >= last_sched) + if (!in_sched_functions(pc)) return pc; fp = *(unsigned long *) fp; } while (count++ < 16); --- linux-2.6.6-rc1/arch/h8300/kernel/setup.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/h8300/kernel/setup.c 2004-04-18 22:25:35.887374768 -0700 @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -60,8 +61,7 @@ unsigned long memory_end; struct task_struct *_current_task; -char command_line[512]; -char saved_command_line[512]; +char command_line[COMMAND_LINE_SIZE]; extern int _stext, _etext, _sdata, _edata, _sbss, _ebss, _end; extern int _ramstart, _ramend; --- linux-2.6.6-rc1/arch/i386/Kconfig 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/Kconfig 2004-04-18 22:25:47.915546208 -0700 @@ -479,6 +479,16 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + config PREEMPT bool "Preemptible Kernel" help @@ -709,7 +719,7 @@ config X86_PAE # Common NUMA Features config NUMA - bool "Numa Memory Allocation Support" + bool "Numa Memory Allocation and Scheduler Support" depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI)) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT) @@ -1095,25 +1105,6 @@ config PCI_MMCONFIG select ACPI_BOOT default y -config PCI_USE_VECTOR - bool "Vector-based interrupt indexing (MSI)" - depends on X86_LOCAL_APIC && X86_IO_APIC - default n - help - This replaces the current existing IRQ-based index interrupt scheme - with the vector-base index scheme. The advantages of vector base - over IRQ base are listed below: - 1) Support MSI implementation. - 2) Support future IOxAPIC hotplug - - Note that this allows the device drivers to enable MSI, Message - Signaled Interrupt, on all MSI capable device functions detected. - Message Signal Interrupt enables an MSI-capable hardware device to - send an inbound Memory Write on its PCI bus instead of asserting - IRQ signal on device IRQ pin. - - If you don't know what to do here, say N. - source "drivers/pci/Kconfig" config ISA @@ -1264,6 +1255,15 @@ config DEBUG_PAGEALLOC This results in a large slowdown, but helps to find certain types of memory corruptions. +config SPINLINE + bool "Spinlock inlining" + depends on DEBUG_KERNEL + help + This will change spinlocks from out of line to inline, making them + account cost to the callers in readprofile, rather than the lock + itself (as ".text.lock.filename"). This can be helpful for finding + the callers of locks. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM @@ -1280,12 +1280,194 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + config DEBUG_SPINLOCK_SLEEP bool "Sleep-inside-spinlock checking" help If you say Y here, various routines which may sleep will become very noisy if they are called with a spinlock held. +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + config FRAME_POINTER bool "Compile the kernel with frame pointers" help @@ -1303,6 +1485,19 @@ config 4KSTACKS on the VM subsystem for higher order allocations. This option will also use IRQ stacks to compensate for the reduced stackspace. +config SCHEDSTATS + bool "Collect scheduler statistics" + depends on PROC_FS + default y + help + If you say Y here, additional code will be inserted into the + scheduler and related routines to collect statistics about + scheduler behavior and provide them in /proc/schedstat. These + stats may be useful for both tuning and debugging the scheduler + If you aren't debugging the scheduler or trying to tune a specific + application, you can say N to avoid the very slight overhead + this adds. + config X86_FIND_SMP_CONFIG bool depends on X86_LOCAL_APIC || X86_VOYAGER @@ -1341,6 +1536,12 @@ config X86_TRAMPOLINE depends on X86_SMP || (X86_VOYAGER && SMP) default y +# std_resources is overridden for pc9800, but that's not +# a currently selectable arch choice +config X86_STD_RESOURCES + bool + default y + config PC bool depends on X86 && !EMBEDDED --- linux-2.6.6-rc1/arch/i386/kernel/acpi/boot.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/i386/kernel/acpi/boot.c 2004-04-18 22:26:02.593314848 -0700 @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include --- linux-2.6.6-rc1/arch/i386/kernel/apic.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/apic.c 2004-04-18 22:26:02.594314696 -0700 @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.6-rc1/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c 2004-04-18 22:25:36.866225960 -0700 @@ -57,8 +57,7 @@ static int cpufreq_p4_setdc(unsigned int u32 l, h; cpumask_t cpus_allowed, affected_cpu_map; struct cpufreq_freqs freqs; - int hyperthreading = 0; - int sibling = 0; + int j; if (!cpu_online(cpu) || (newstate > DC_DISABLE) || (newstate == DC_RESV)) @@ -68,13 +67,10 @@ static int cpufreq_p4_setdc(unsigned int cpus_allowed = current->cpus_allowed; /* only run on CPU to be set, or on its sibling */ - affected_cpu_map = cpumask_of_cpu(cpu); -#ifdef CONFIG_X86_HT - hyperthreading = ((cpu_has_ht) && (smp_num_siblings == 2)); - if (hyperthreading) { - sibling = cpu_sibling_map[cpu]; - cpu_set(sibling, affected_cpu_map); - } +#ifdef CONFIG_SMP + affected_cpu_map = cpu_sibling_map[cpu]; +#else + affected_cpu_map = cpumask_of_cpu(cpu); #endif set_cpus_allowed(current, affected_cpu_map); BUG_ON(!cpu_isset(smp_processor_id(), affected_cpu_map)); @@ -97,11 +93,11 @@ static int cpufreq_p4_setdc(unsigned int /* notifiers */ freqs.old = stock_freq * l / 8; freqs.new = stock_freq * newstate / 8; - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - if (hyperthreading) { - freqs.cpu = sibling; - cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + } } rdmsr(MSR_IA32_THERM_STATUS, l, h); @@ -132,10 +128,11 @@ static int cpufreq_p4_setdc(unsigned int set_cpus_allowed(current, cpus_allowed); /* notifiers */ - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - if (hyperthreading) { - freqs.cpu = cpu; - cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + for_each_cpu(j) { + if (cpu_isset(j, affected_cpu_map)) { + freqs.cpu = j; + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + } } return 0; --- linux-2.6.6-rc1/arch/i386/kernel/cpu/cpufreq/powernow-k7.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/powernow-k7.c 2004-04-18 22:25:28.895437704 -0700 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,11 @@ #include #include +#ifdef CONFIG_ACPI_PROCESSOR +#include +#include +#endif + #include "powernow-k7.h" #define DEBUG @@ -57,6 +63,17 @@ struct pst_s { u8 numpstates; }; +#ifdef CONFIG_ACPI_PROCESSOR +union powernow_acpi_control_t { + struct { + unsigned long fid:5, + vid:5, + sgtc:20, + res1:2; + } bits; + unsigned long val; +}; +#endif /* divide by 1000 to get VID. */ static int mobile_vid_table[32] = { @@ -74,6 +91,12 @@ static int fid_codes[32] = { 150, 225, 160, 165, 170, 180, -1, -1, }; +/* This parameter is used in order to force ACPI instead of legacy method for + * configuration purpose. + */ + +static int powernow_acpi_force; + static struct cpufreq_frequency_table *powernow_table; static unsigned int can_scale_bus; @@ -85,6 +108,14 @@ static unsigned int fsb; static unsigned int latency; static char have_a0; +static int check_fsb(unsigned int fsbspeed) +{ + int delta; + unsigned int f = fsb / 1000; + + delta = (fsbspeed > f) ? fsbspeed - f : f - fsbspeed; + return (delta < 5); +} static int check_powernow(void) { @@ -140,7 +171,8 @@ static int check_powernow(void) static int get_ranges (unsigned char *pst) { - unsigned int j, speed; + unsigned int j; + unsigned int speed; u8 fid, vid; powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); @@ -151,12 +183,12 @@ static int get_ranges (unsigned char *ps for (j=0 ; j < number_scales; j++) { fid = *pst++; - powernow_table[j].frequency = fsb * fid_codes[fid] * 100; + powernow_table[j].frequency = (fsb * fid_codes[fid]) / 10; powernow_table[j].index = fid; /* lower 8 bits */ - speed = fsb * (fid_codes[fid]/10); + speed = powernow_table[j].frequency; + if ((fid_codes[fid] % 10)==5) { - speed += fsb/2; #if defined(CONFIG_ACPI_PROCESSOR) || defined(CONFIG_ACPI_PROCESSOR_MODULE) if (have_a0 == 1) powernow_table[j].frequency = CPUFREQ_ENTRY_INVALID; @@ -164,7 +196,7 @@ static int get_ranges (unsigned char *ps } dprintk (KERN_INFO PFX " FID: 0x%x (%d.%dx [%dMHz])\t", fid, - fid_codes[fid] / 10, fid_codes[fid] % 10, speed); + fid_codes[fid] / 10, fid_codes[fid] % 10, speed/1000); if (speed < minimum_speed) minimum_speed = speed; @@ -176,8 +208,6 @@ static int get_ranges (unsigned char *ps dprintk ("VID: 0x%x (%d.%03dV)\n", vid, mobile_vid_table[vid]/1000, mobile_vid_table[vid]%1000); } - dprintk ("\n"); - powernow_table[number_scales].frequency = CPUFREQ_TABLE_END; powernow_table[number_scales].index = 0; @@ -234,7 +264,8 @@ static void change_speed (unsigned int i rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); cfid = fidvidstatus.bits.CFID; - freqs.old = fsb * fid_codes[cfid] * 100; + freqs.old = fsb * fid_codes[cfid] / 10; + freqs.new = powernow_table[index].frequency; cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); @@ -262,19 +293,136 @@ static void change_speed (unsigned int i } +#ifdef CONFIG_ACPI_PROCESSOR + +struct acpi_processor_performance *acpi_processor_perf; + +static int powernow_acpi_init(void) +{ + int i; + int retval = 0; + union powernow_acpi_control_t pc; + + if (acpi_processor_perf != NULL && powernow_table != NULL) { + retval = -EINVAL; + goto err0; + } + + acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance), + GFP_KERNEL); + + if (!acpi_processor_perf) { + retval = -ENOMEM; + goto err0; + } + + memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance)); + + if (acpi_processor_register_performance(acpi_processor_perf, 0)) { + retval = -EIO; + goto err1; + } + + if (acpi_processor_perf->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + retval = -ENODEV; + goto err2; + } + + if (acpi_processor_perf->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) { + retval = -ENODEV; + goto err2; + } + + number_scales = acpi_processor_perf->state_count; + + if (number_scales < 2) { + retval = -ENODEV; + goto err2; + } + + powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); + if (!powernow_table) { + retval = -ENOMEM; + goto err2; + } + + memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table))); + + pc.val = (unsigned long) acpi_processor_perf->states[0].control; + for (i = 0; i < number_scales; i++) { + u8 fid, vid; + unsigned int speed; + + pc.val = (unsigned long) acpi_processor_perf->states[i].control; + dprintk (KERN_INFO PFX "acpi: P%d: %d MHz %d mW %d uS control %08x SGTC %d\n", + i, + (u32) acpi_processor_perf->states[i].core_frequency, + (u32) acpi_processor_perf->states[i].power, + (u32) acpi_processor_perf->states[i].transition_latency, + (u32) acpi_processor_perf->states[i].control, + pc.bits.sgtc); + + vid = pc.bits.vid; + fid = pc.bits.fid; + + powernow_table[i].frequency = fsb * fid_codes[fid] / 10; + powernow_table[i].index = fid; /* lower 8 bits */ + powernow_table[i].index |= (vid << 8); /* upper 8 bits */ + + speed = powernow_table[i].frequency; + + if ((fid_codes[fid] % 10)==5) { + if (have_a0 == 1) + powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; + } + + dprintk (KERN_INFO PFX " FID: 0x%x (%d.%dx [%dMHz])\t", fid, + fid_codes[fid] / 10, fid_codes[fid] % 10, speed/1000); + dprintk ("VID: 0x%x (%d.%03dV)\n", vid, mobile_vid_table[vid]/1000, + mobile_vid_table[vid]%1000); + + if (latency < pc.bits.sgtc) + latency = pc.bits.sgtc; + + if (speed < minimum_speed) + minimum_speed = speed; + if (speed > maximum_speed) + maximum_speed = speed; + } + + powernow_table[i].frequency = CPUFREQ_TABLE_END; + powernow_table[i].index = 0; + + return 0; + +err2: + acpi_processor_unregister_performance(acpi_processor_perf, 0); +err1: + kfree(acpi_processor_perf); +err0: + printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); + acpi_processor_perf = NULL; + return retval; +} +#else +static int powernow_acpi_init(void) +{ + printk(KERN_INFO PFX "no support for ACPI processor found." + " Please recompile your kernel with ACPI processor\n"); + return -EINVAL; +} +#endif + static int powernow_decode_bios (int maxfid, int startvid) { struct psb_s *psb; struct pst_s *pst; - struct cpuinfo_x86 *c = cpu_data; unsigned int i, j; unsigned char *p; unsigned int etuple; unsigned int ret; etuple = cpuid_eax(0x80000001); - etuple &= 0xf00; - etuple |= (c->x86_model<<4)|(c->x86_mask); for (i=0xC0000; i < 0xffff0 ; i+=16) { @@ -305,7 +453,6 @@ static int powernow_decode_bios (int max } dprintk (KERN_INFO PFX "Settling Time: %d microseconds.\n", psb->settlingtime); dprintk (KERN_INFO PFX "Has %d PST tables. (Only dumping ones relevant to this CPU).\n", psb->numpst); - latency *= 100; /* SGTC needs to be in units of 10ns */ p += sizeof (struct psb_s); @@ -315,7 +462,8 @@ static int powernow_decode_bios (int max pst = (struct pst_s *) p; number_scales = pst->numpstates; - if ((etuple == pst->cpuid) && (maxfid==pst->maxfid) && (startvid==pst->startvid)) + if ((etuple == pst->cpuid) && check_fsb(pst->fsbspeed) && + (maxfid==pst->maxfid) && (startvid==pst->startvid)) { dprintk (KERN_INFO PFX "PST:%d (@%p)\n", i, pst); dprintk (KERN_INFO PFX " cpuid: 0x%x\t", pst->cpuid); @@ -323,7 +471,6 @@ static int powernow_decode_bios (int max dprintk ("maxFID: 0x%x\t", pst->maxfid); dprintk ("startvid: 0x%x\n", pst->startvid); - fsb = pst->fsbspeed; ret = get_ranges ((char *) pst + sizeof (struct pst_s)); return ret; @@ -335,7 +482,7 @@ static int powernow_decode_bios (int max } printk (KERN_INFO PFX "No PST tables match this cpuid (0x%x)\n", etuple); printk (KERN_INFO PFX "This is indicative of a broken BIOS.\n"); - printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.shtml\n"); + return -EINVAL; } p++; @@ -365,6 +512,33 @@ static int powernow_verify (struct cpufr return cpufreq_frequency_table_verify(policy, powernow_table); } +/* + * We use the fact that the bus frequency is somehow + * a multiple of 100000/3 khz, then we compute sgtc according + * to this multiple. + * That way, we match more how AMD thinks all of that work. + * We will then get the same kind of behaviour already tested under + * the "well-known" other OS. + */ +static int __init fixup_sgtc(void) +{ + unsigned int sgtc; + unsigned int m; + + m = fsb / 3333; + if ((m % 10) >= 5) + m += 5; + + m /= 10; + + sgtc = 100 * m * latency; + sgtc = sgtc / 3; + if (sgtc > 0xfffff) { + printk(KERN_WARNING PFX "SGTC too large %d\n", sgtc); + sgtc = 0xfffff; + } + return sgtc; +} static int __init powernow_cpu_init (struct cpufreq_policy *policy) { @@ -376,18 +550,45 @@ static int __init powernow_cpu_init (str rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val); - result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); + /* A K7 with powernow technology is set to max frequency by BIOS */ + fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID]; + if (!fsb) { + printk(KERN_WARNING PFX "can not determine bus frequency\n"); + return -EINVAL; + } + dprintk(KERN_INFO PFX "FSB: %3d.%03d MHz\n", fsb/1000, fsb%1000); + + if ((dmi_broken & BROKEN_CPUFREQ) || powernow_acpi_force) { + printk (KERN_INFO PFX "PSB/PST known to be broken. Trying ACPI instead\n"); + result = powernow_acpi_init(); + } else { + result = powernow_decode_bios(fidvidstatus.bits.MFID, fidvidstatus.bits.SVID); + if (result) { + printk (KERN_INFO PFX "Trying ACPI perflib\n"); + maximum_speed = 0; + minimum_speed = -1; + latency = 0; + result = powernow_acpi_init(); + if (result) { + printk (KERN_INFO PFX "ACPI and legacy methods failed\n"); + printk (KERN_INFO PFX "See http://www.codemonkey.org.uk/projects/cpufreq/powernow-k7.shtml\n"); + } + } else { + /* SGTC use the bus clock as timer */ + latency = fixup_sgtc(); + printk(KERN_INFO PFX "SGTC: %d\n", latency); + } + } + if (result) return result; printk (KERN_INFO PFX "Minimum speed %d MHz. Maximum speed %d MHz.\n", - minimum_speed, maximum_speed); + minimum_speed/1000, maximum_speed/1000); policy->governor = CPUFREQ_DEFAULT_GOVERNOR; - /* latency is in 10 ns (look for SGTC above) for each VID - * and FID transition, so multiply that value with 20 */ - policy->cpuinfo.transition_latency = latency * 20; + policy->cpuinfo.transition_latency = 20 * latency / fsb; policy->cur = maximum_speed; @@ -418,10 +619,6 @@ static struct cpufreq_driver powernow_dr static int __init powernow_init (void) { - if (dmi_broken & BROKEN_CPUFREQ) { - printk (KERN_INFO PFX "Disabled at boot time by DMI,\n"); - return -ENODEV; - } if (check_powernow()==0) return -ENODEV; return cpufreq_register_driver(&powernow_driver); @@ -430,15 +627,25 @@ static int __init powernow_init (void) static void __exit powernow_exit (void) { +#ifdef CONFIG_ACPI_PROCESSOR + if (acpi_processor_perf) { + acpi_processor_unregister_performance(acpi_processor_perf, 0); + kfree(acpi_processor_perf); + } +#endif cpufreq_unregister_driver(&powernow_driver); if (powernow_table) kfree(powernow_table); } +module_param(powernow_acpi_force, int, 0444); + +MODULE_PARM_DESC(powernow_acpi_force, "Force ACPI to be used"); + MODULE_AUTHOR ("Dave Jones "); MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); MODULE_LICENSE ("GPL"); -module_init(powernow_init); +late_initcall(powernow_init); module_exit(powernow_exit); --- linux-2.6.6-rc1/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c 2004-04-18 22:25:28.896437552 -0700 @@ -195,21 +195,6 @@ static int centrino_cpu_init_table(struc struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; struct cpu_model *model; - if (!cpu_has(cpu, X86_FEATURE_EST)) - return -ENODEV; - - /* Only Intel Pentium M stepping 5 for now - add new CPUs as - they appear after making sure they use PERF_CTL in the same - way. */ - if (cpu->x86_vendor != X86_VENDOR_INTEL || - cpu->x86 != 6 || - cpu->x86_model != 9 || - cpu->x86_mask != 5) { - printk(KERN_INFO PFX "found unsupported CPU with Enhanced SpeedStep: " - "send /proc/cpuinfo to " MAINTAINER "\n"); - return -ENODEV; - } - for(model = models; model->model_name != NULL; model++) if (strcmp(cpu->x86_model_id, model->model_name) == 0) break; @@ -361,6 +346,7 @@ static inline int centrino_cpu_init_acpi static int centrino_cpu_init(struct cpufreq_policy *policy) { + struct cpuinfo_x86 *cpu = &cpu_data[policy->cpu]; unsigned freq; unsigned l, h; int ret; @@ -368,6 +354,21 @@ static int centrino_cpu_init(struct cpuf if (policy->cpu != 0) return -ENODEV; + if (!cpu_has(cpu, X86_FEATURE_EST)) + return -ENODEV; + + /* Only Intel Pentium M stepping 5 for now - add new CPUs as + they appear after making sure they use PERF_CTL in the same + way. */ + if (cpu->x86_vendor != X86_VENDOR_INTEL || + cpu->x86 != 6 || + cpu->x86_model != 9 || + cpu->x86_mask != 5) { + printk(KERN_INFO PFX "found unsupported CPU with Enhanced SpeedStep: " + "send /proc/cpuinfo to " MAINTAINER "\n"); + return -ENODEV; + } + if (centrino_cpu_init_acpi(policy)) { if (centrino_cpu_init_table(policy)) { return -ENODEV; --- linux-2.6.6-rc1/arch/i386/kernel/dmi_scan.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/dmi_scan.c 2004-04-18 22:25:52.726814784 -0700 @@ -360,6 +360,22 @@ static __init int fix_broken_hp_bios_irq } /* + * Work around broken Acer TravelMate 360 Notebooks which assign Cardbus to + * IRQ 11 even though it is actually wired to IRQ 10 + */ +static __init int fix_acer_tm360_irqrouting(struct dmi_blacklist *d) +{ +#ifdef CONFIG_PCI + extern int acer_tm360_irqrouting; + if (acer_tm360_irqrouting == 0) + { + acer_tm360_irqrouting = 1; + printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); + } +#endif + return 0; +} +/* * Check for clue free BIOS implementations who use * the following QA technique * @@ -413,30 +429,6 @@ static __init int swab_apm_power_in_minu } /* - * The Intel 440GX hall of shame. - * - * On many (all we have checked) of these boxes the $PIRQ table is wrong. - * The MP1.4 table is right however and so SMP kernels tend to work. - */ - -static __init int broken_pirq(struct dmi_blacklist *d) -{ - - printk(KERN_INFO " *** Possibly defective BIOS detected (irqtable)\n"); - printk(KERN_INFO " *** Many BIOSes matching this signature have incorrect IRQ routing tables.\n"); - printk(KERN_INFO " *** If you see IRQ problems, in particular SCSI resets and hangs at boot\n"); - printk(KERN_INFO " *** contact your hardware vendor and ask about updates.\n"); - printk(KERN_INFO " *** Building an SMP kernel may evade the bug some of the time.\n"); -#ifdef CONFIG_X86_IO_APIC - { - extern int skip_ioapic_setup; - skip_ioapic_setup = 0; - } -#endif - return 0; -} - -/* * ASUS K7V-RM has broken ACPI table defining sleep modes */ @@ -815,52 +807,6 @@ static __initdata struct dmi_blacklist d NO_MATCH, NO_MATCH } }, - /* Problem Intel 440GX bioses */ - - { broken_pirq, "SABR1 Bios", { /* Bad $PIR */ - MATCH(DMI_BIOS_VENDOR, "Intel Corporation"), - MATCH(DMI_BIOS_VERSION,"SABR1"), - NO_MATCH, NO_MATCH - } }, - { broken_pirq, "l44GX Bios", { /* Bad $PIR */ - MATCH(DMI_BIOS_VENDOR, "Intel Corporation"), - MATCH(DMI_BIOS_VERSION,"L440GX0.86B.0094.P10"), - NO_MATCH, NO_MATCH - } }, - { broken_pirq, "l44GX Bios", { /* Bad $PIR */ - MATCH(DMI_BIOS_VENDOR, "Intel Corporation"), - MATCH(DMI_BIOS_VERSION,"L440GX0.86B.0115.P12"), - NO_MATCH, NO_MATCH - } }, - { broken_pirq, "l44GX Bios", { /* Bad $PIR */ - MATCH(DMI_BIOS_VENDOR, "Intel Corporation"), - MATCH(DMI_BIOS_VERSION,"L440GX0.86B.0120.P12"), - NO_MATCH, NO_MATCH - } }, - { broken_pirq, "l44GX Bios", { /* Bad $PIR */ - MATCH(DMI_BIOS_VENDOR, "Intel Corporation"), - MATCH(DMI_BIOS_VERSION,"L440GX0.86B.0125.P13"), - NO_MATCH, NO_MATCH - } }, - { broken_pirq, "l44GX Bios", { /* Bad $PIR */ - MATCH(DMI_BIOS_VENDOR, "Intel Corporation"), - MATCH(DMI_BIOS_VERSION,"L440GX0.86B.0066.P07.9906041405"), - NO_MATCH, NO_MATCH - } }, - - { broken_pirq, "IBM xseries 370", { /* Bad $PIR */ - MATCH(DMI_BIOS_VENDOR, "IBM"), - MATCH(DMI_BIOS_VERSION,"MMKT33AUS"), - NO_MATCH, NO_MATCH - } }, - - /* Intel in disguise - In this case they can't hide and they don't run - too well either... */ - { broken_pirq, "Dell PowerEdge 8450", { /* Bad $PIR */ - MATCH(DMI_PRODUCT_NAME, "Dell PowerEdge 8450"), - NO_MATCH, NO_MATCH, NO_MATCH - } }, - { broken_acpi_Sx, "ASUS K7V-RM", { /* Bad ACPI Sx table */ MATCH(DMI_BIOS_VERSION,"ASUS K7V-RM ACPI BIOS Revision 1003A"), MATCH(DMI_BOARD_NAME, ""), @@ -894,6 +840,13 @@ static __initdata struct dmi_blacklist d MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"), MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736") } }, + + { fix_acer_tm360_irqrouting, "Acer TravelMate 36x Laptop", { + MATCH(DMI_SYS_VENDOR, "Acer"), + MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + NO_MATCH, NO_MATCH + } }, + /* @@ -1032,6 +985,12 @@ static __initdata struct dmi_blacklist d MATCH(DMI_BIOS_VERSION, "ASUS A7V ACPI BIOS Revision 1007"), NO_MATCH }}, + { disable_acpi_pci, "Acer TravelMate 36x Laptop", { + MATCH(DMI_SYS_VENDOR, "Acer"), + MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), + NO_MATCH, NO_MATCH + } }, + #endif { NULL, } --- linux-2.6.6-rc1/arch/i386/kernel/efi.c 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/i386/kernel/efi.c 2004-04-18 22:26:02.595314544 -0700 @@ -37,7 +37,6 @@ #include #include #include -#include #include #define EFI_DEBUG 0 --- linux-2.6.6-rc1/arch/i386/kernel/entry.S 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/entry.S 2004-04-18 22:25:48.561448016 -0700 @@ -48,6 +48,18 @@ #include #include #include "irq_vectors.h" + /* We do not recover from a stack overflow, but at least + * we know it happened and should be able to track it down. + */ +#ifdef CONFIG_STACK_OVERFLOW_TEST +#define STACK_OVERFLOW_TEST \ + testl $(THREAD_SIZE - 512),%esp; \ + jnz 10f; \ + call stack_overflow; \ +10: +#else +#define STACK_OVERFLOW_TEST +#endif #define nr_syscalls ((syscall_table_size)/4) @@ -100,7 +112,8 @@ TSS_ESP0_OFFSET = (4 - 0x200) pushl %ebx; \ movl $(__USER_DS), %edx; \ movl %edx, %ds; \ - movl %edx, %es; + movl %edx, %es; \ + STACK_OVERFLOW_TEST #define RESTORE_INT_REGS \ popl %ebx; \ @@ -300,6 +313,19 @@ syscall_exit: testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work restore_all: +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS + movl EFLAGS(%esp), %eax # mix EFLAGS and CS + movb CS(%esp), %al + testl $(VM_MASK | 3), %eax + jz resume_kernelX # returning to kernel or vm86-space + + cmpl $0,TI_PRE_COUNT(%ebx) # non-zero preempt_count ? + jz resume_kernelX + + int $3 + +resume_kernelX: +#endif RESTORE_ALL # perform work that needs to be done immediately before resumption @@ -882,9 +908,9 @@ ENTRY(sys_call_table) .long sys_utimes .long sys_fadvise64_64 .long sys_ni_syscall /* sys_vserver */ - .long sys_ni_syscall /* sys_mbind */ - .long sys_ni_syscall /* 275 sys_get_mempolicy */ - .long sys_ni_syscall /* sys_set_mempolicy */ + .long sys_mbind + .long sys_get_mempolicy + .long sys_set_mempolicy .long sys_mq_open .long sys_mq_unlink .long sys_mq_timedsend --- linux-2.6.6-rc1/arch/i386/kernel/i386_ksyms.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/i386/kernel/i386_ksyms.c 2004-04-18 22:26:02.595314544 -0700 @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.6-rc1/arch/i386/kernel/i8259.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/i8259.c 2004-04-18 22:25:24.638084920 -0700 @@ -445,6 +445,5 @@ void __init init_IRQ(void) if (boot_cpu_data.hard_math && !cpu_has_fpu) setup_irq(FPU_IRQ, &fpu_irq); - current_thread_info()->cpu = 0; - irq_ctx_init(0); + irq_ctx_init(current_thread_info()->cpu); } --- linux-2.6.6-rc1/arch/i386/kernel/io_apic.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/io_apic.c 2004-04-18 22:25:36.868225656 -0700 @@ -317,8 +317,7 @@ struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) \ - ((physical_balance && i > cpu_sibling_map[i]) ? cpu_sibling_map[i] : i) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) #define MAX_BALANCED_IRQ_INTERVAL (5*HZ) #define MIN_BALANCED_IRQ_INTERVAL (HZ/2) @@ -401,6 +400,7 @@ static void do_irq_balance(void) unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); unsigned long move_this_load = 0; int max_loaded = 0, min_loaded = 0; + int load; unsigned long useful_load_threshold = balanced_irq_interval + 10; int selected_irq; int tmp_loaded, first_attempt = 1; @@ -452,7 +452,7 @@ static void do_irq_balance(void) for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { min_cpu_irq = CPU_IRQ(i); @@ -471,7 +471,7 @@ tryanothercpu: for (i = 0; i < NR_CPUS; i++) { if (!cpu_online(i)) continue; - if (physical_balance && i > cpu_sibling_map[i]) + if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) continue; @@ -551,9 +551,14 @@ tryanotherirq: * We seek the least loaded sibling by making the comparison * (A+B)/2 vs B */ - if (physical_balance && (CPU_IRQ(min_loaded) >> 1) > - CPU_IRQ(cpu_sibling_map[min_loaded])) - min_loaded = cpu_sibling_map[min_loaded]; + load = CPU_IRQ(min_loaded) >> 1; + for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + if (load > CPU_IRQ(j)) { + /* This won't change cpu_sibling_map[min_loaded] */ + load = CPU_IRQ(j); + min_loaded = j; + } + } cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); target_cpu_mask = cpumask_of_cpu(min_loaded); --- linux-2.6.6-rc1/arch/i386/kernel/irq.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/irq.c 2004-04-18 22:26:02.596314392 -0700 @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -570,6 +569,8 @@ out: irq_exit(); + kgdb_process_breakpoint(); + return 1; } --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/arch/i386/kernel/kgdb_stub.c 2004-04-18 22:25:54.248583440 -0700 @@ -0,0 +1,2454 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES 64 +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + */ +enum regnames { _EAX, /* 0 */ + _ECX, /* 1 */ + _EDX, /* 2 */ + _EBX, /* 3 */ + _ESP, /* 4 */ + _EBP, /* 5 */ + _ESI, /* 6 */ + _EDI, /* 7 */ + _PC /* 8 also known as eip */ , + _PS /* 9 also known as eflags */ , + _CS, /* 10 */ + _SS, /* 11 */ + _DS, /* 12 */ + _ES, /* 13 */ + _FS, /* 14 */ + _GS /* 15 */ +}; + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned int esp; + int array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned int OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdboe, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (!kgdboe) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (!kgdboe) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (!kgdboe) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movl %eax,%esp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addl $-4,%ebx\n\t" + "movl (%ebx), %eax\n\t" + "pushl %eax\n\t" + "cmpl %esp,%ecx\n\t" + "jne 1b\n\t" + "popl %eax\n\t" + "popl %ebx\n\t" + "popl %ecx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (!kgdboe) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("EAX=%08lx ", regs->eax); + printk("EBX=%08lx ", regs->ebx); + printk("ECX=%08lx ", regs->ecx); + printk("EDX=%08lx ", regs->edx); + printk("\n"); + printk("ESI=%08lx ", regs->esi); + printk("EDI=%08lx ", regs->edi); + printk("EBP=%08lx ", regs->ebp); + printk("ESP=%08lx ", (long) ®s->esp); + printk("\n"); + printk(" DS=%08x ", regs->xds); + printk(" ES=%08x ", regs->xes); + printk(" SS=%08x ", __KERNEL_DS); + printk(" FL=%08lx ", regs->eflags); + printk("\n"); + printk(" CS=%08x ", regs->xcs); + printk(" IP=%08lx ", regs->eip); +#if 0 + printk(" FS=%08x ", regs->fs); + printk(" GS=%08x ", regs->gs); +#endif + printk("\n"); + +} /* print_regs */ + +#define NEW_esp fn_call_lookaside[trap_cpu].esp + +static void +regs_to_gdb_regs(int *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_EAX] = regs->eax; + gdb_regs[_EBX] = regs->ebx; + gdb_regs[_ECX] = regs->ecx; + gdb_regs[_EDX] = regs->edx; + gdb_regs[_ESI] = regs->esi; + gdb_regs[_EDI] = regs->edi; + gdb_regs[_EBP] = regs->ebp; + gdb_regs[_DS] = regs->xds; + gdb_regs[_ES] = regs->xes; + gdb_regs[_PS] = regs->eflags; + gdb_regs[_CS] = regs->xcs; + gdb_regs[_PC] = regs->eip; + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ + gdb_regs[_ESP] = NEW_esp; + gdb_regs[_SS] = __KERNEL_DS; + gdb_regs[_FS] = 0xFFFF; + gdb_regs[_GS] = 0xFFFF; +} /* regs_to_gdb_regs */ + +static void +gdb_regs_to_regs(int *gdb_regs, struct pt_regs *regs) +{ + regs->eax = gdb_regs[_EAX]; + regs->ebx = gdb_regs[_EBX]; + regs->ecx = gdb_regs[_ECX]; + regs->edx = gdb_regs[_EDX]; + regs->esi = gdb_regs[_ESI]; + regs->edi = gdb_regs[_EDI]; + regs->ebp = gdb_regs[_EBP]; + regs->xds = gdb_regs[_DS]; + regs->xes = gdb_regs[_ES]; + regs->eflags = gdb_regs[_PS]; + regs->xcs = gdb_regs[_CS]; + regs->eip = gdb_regs[_PC]; + NEW_esp = gdb_regs[_ESP]; /* keep the value */ +#if 0 /* can't change these */ + regs->esp = gdb_regs[_ESP]; + regs->xss = gdb_regs[_SS]; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif + +} /* gdb_regs_to_regs */ + +int thread_list = 0; + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, int *gdb_regs) +{ + unsigned long stack_page; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_ESP] = + (int) &kgdb_info.cpus_waiting[i].regs->esp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + gdb_regs[_ESP] = p->thread.esp; + gdb_regs[_PC] = p->thread.eip; + gdb_regs[_EBP] = *(int *) gdb_regs[_ESP]; + gdb_regs[_EDI] = *(int *) (gdb_regs[_ESP] + 4); + gdb_regs[_ESI] = *(int *) (gdb_regs[_ESP] + 8); + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + stack_page = (unsigned long) p->thread_info; + if (gdb_regs[_ESP] < stack_page || gdb_regs[_ESP] > + THREAD_SIZE - sizeof(long) + stack_page) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (gdb_regs[_EBP] < stack_page || + gdb_regs[_EBP] > THREAD_SIZE - 2*sizeof(long) + stack_page) + return; + gdb_regs[_PC] = *(unsigned long *) (gdb_regs[_EBP] + 4); + gdb_regs[_ESP] = gdb_regs[_EBP] + 8; + gdb_regs[_EBP] = *(unsigned long *) gdb_regs[_EBP]; + if (!in_sched_functions(gdb_regs[_PC])) + return; + } while (count++ < 16); + return; +} + +/* Indicate to caller of mem2hex or hex2mem that there has been an + error. */ +static volatile int mem_err = 0; +static volatile int mem_err_expected = 0; +static volatile int mem_err_cnt = 0; +static int garbage_loc = -1; + +int +get_char(char *addr) +{ + return *addr; +} + +void +set_char(char *addr, int val, int may_fault) +{ + /* + * This code traps references to the area mapped to the kernel + * stack as given by the regs and, instead, stores to the + * fn_call_lookaside[cpu].array + */ + if (may_fault && + (unsigned int) addr < OLD_esp && + ((unsigned int) addr > (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToInt(char **ptr, int *intValue) +{ + int numChars = 0; + int hexValue; + + *intValue = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *intValue = (*intValue << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned dr7; + + asm volatile ("movl %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned addr0, addr1, addr2, addr3; + asm volatile ("movl %%db0, %0\n" + "movl %%db1, %1\n" + "movl %%db2, %2\n" + "movl %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movl %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movl %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movl %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movl %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movl %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned flags; + int cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + kgdb_local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) + goto exit_in_kgdb; + + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) + goto exit_in_kgdb; + + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + kgdb_local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + int addr, length; + int breakno, breaktype; + char *ptr; + int newPC; + threadref thref; + int threadid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + int gdb_regs[NUMREGBYTES / 4]; + int dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) +#define NUMREGS NUMREGBYTES/4 + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->xcs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + if (kgdboe) + netpoll_set_trap(1); + + kgdb_local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time = 0, end_time, dum = 0; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + waiting_cpus[j].task != NOCPU && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + unsigned long *lp = (unsigned long *) &linux_regs; + + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + if (debug_regs) { + print_regs(®s); + printk("Stk: %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[0], lp[1], lp[2], lp[3], + lp[4], lp[5], lp[6], lp[7]); + printk(" %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[8], lp[9], lp[10], lp[11], + lp[12], lp[13], lp[14], lp[15]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[16], lp[17], lp[18], lp[19], + lp[20], lp[21], lp[22], lp[23]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[24], lp[25], lp[26], lp[27], + lp[28], lp[29], lp[30], lp[31]); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movl %0,%%db7" + : /* no output */ + :"r"(0)); + + asm volatile ("movl %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (int) (&linux_regs->esp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES, 0); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES, 0); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). These may have + * meaning for ptrace, but for us it is safe to + * ignor them. We do this by dumping them into + * _GS which we also ignor, but do have memory for. + */ + int regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToInt(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + regno = + (regno >= NUMREGS ? _GS : regno); + hex2mem(ptr, (char *) &gdb_regs[regno], + 4, 0); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && (hexToInt(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + mem2hex((char *) addr, + remcomOutBuffer, length, 1); + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToInt(&ptr, &length)) && (*(ptr++) == ':')) { + hex2mem(ptr, (char *) addr, length, 1); + + if (mem_err) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToInt(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%x\n", addr); + + regs.eip = addr; + } + + newPC = regs.eip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movl %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + + if (kgdboe) + netpoll_set_trap(0); + + correct_hw_break(); + asm volatile ("movl %0, %%db6\n"::"r" (0)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + ptr++; + hexToInt(&ptr, &breaktype); + ptr++; + hexToInt(&ptr, &length); + ptr++; + hexToInt(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToInt(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + int *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (int); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->xcs; + *--ptr = linux_regs->eip; + *--ptr = linux_regs->ecx; + *--ptr = linux_regs->ebx; + *--ptr = linux_regs->eax; + linux_regs->ecx = NEW_esp - (sizeof (int) * 6); + linux_regs->ebx = (unsigned int) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->eip = (unsigned int) fn_call_stub; + } else { + linux_regs->eip = (unsigned int) fn_rtn_stub; + linux_regs->eax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + kgdb_local_irq_restore(flags); + return (0); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + kgdb_local_irq_restore(flags); + return (0); +} + +/* this function is used to set up exception handlers for tracing and + * breakpoints. + * This function is not needed as the above line does all that is needed. + * We leave it for backward compatitability... + */ +void +set_debug_traps(void) +{ + /* + * linux_debug_hook is defined in traps.c. We store a pointer + * to our own exception handler into it. + + * But really folks, every hear of labeled common, an old Fortran + * concept. Lots of folks can reference it and it is define if + * anyone does. Only one can initialize it at link time. We do + * this with the hook. See the statement above. No need for any + * executable code and it is ready as soon as the kernel is + * loaded. Very desirable in kernel debugging. + + linux_debug_hook = handle_exception ; + */ + + /* In case GDB is started before us, ack any packets (presumably + "$?#xx") sitting there. + putDebugChar ('+'); + + initialized = 1; + */ +} + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + int flags; + kgdb_local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *) (((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + kgdb_local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int kgdb_need_breakpoint[NR_CPUS]; + +void kgdb_schedule_breakpoint(void) +{ + kgdb_need_breakpoint[smp_processor_id()] = 1; +} + +void kgdb_process_breakpoint(void) +{ + /* + * Handle a breakpoint queued from inside network driver code + * to avoid reentrancy issues + */ + if (kgdb_need_breakpoint[smp_processor_id()]) { + kgdb_need_breakpoint[smp_processor_id()] = 0; + BREAKPOINT; + } +} + --- linux-2.6.6-rc1/arch/i386/kernel/Makefile 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/i386/kernel/Makefile 2004-04-18 22:25:30.316221712 -0700 @@ -14,6 +14,7 @@ obj-y += timers/ obj-$(CONFIG_ACPI_BOOT) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_MICROCODE) += microcode.o @@ -31,6 +32,7 @@ obj-$(CONFIG_ACPI_SRAT) += srat.o obj-$(CONFIG_HPET_TIMER) += time_hpet.o obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_X86_STD_RESOURCES) += std_resources.o EXTRA_AFLAGS := -traditional --- linux-2.6.6-rc1/arch/i386/kernel/mpparse.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/i386/kernel/mpparse.c 2004-04-18 22:26:02.597314240 -0700 @@ -28,7 +28,6 @@ #include #include #include -#include #include #include --- linux-2.6.6-rc1/arch/i386/kernel/nmi.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/nmi.c 2004-04-18 22:25:43.857163176 -0700 @@ -31,9 +31,19 @@ #include #include +#ifdef CONFIG_KGDB +#include +#ifdef CONFIG_SMP +unsigned int nmi_watchdog = NMI_IO_APIC; +#else +unsigned int nmi_watchdog = NMI_LOCAL_APIC; +#endif +#else unsigned int nmi_watchdog = NMI_NONE; +#endif static unsigned int nmi_hz = HZ; -unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ +static unsigned int nmi_p4_cccr_val; extern void show_registers(struct pt_regs *regs); /* nmi_active: @@ -66,7 +76,8 @@ int nmi_active; #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) #define P4_ESCR_OS (1<<3) #define P4_ESCR_USR (1<<2) -#define P4_CCCR_OVF_PMI (1<<26) +#define P4_CCCR_OVF_PMI0 (1<<26) +#define P4_CCCR_OVF_PMI1 (1<<27) #define P4_CCCR_THRESHOLD(N) ((N)<<20) #define P4_CCCR_COMPLEMENT (1<<19) #define P4_CCCR_COMPARE (1<<18) @@ -79,7 +90,7 @@ int nmi_active; #define MSR_P4_IQ_COUNTER0 0x30C #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) #define P4_NMI_IQ_CCCR0 \ - (P4_CCCR_OVF_PMI|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ + (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) int __init check_nmi_watchdog (void) @@ -322,6 +333,11 @@ static int setup_p4_watchdog(void) return 0; nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; + nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; +#endif if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) clear_msr_range(0x3F1, 2); @@ -339,7 +355,7 @@ static int setup_p4_watchdog(void) Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); + wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); return 1; } @@ -408,6 +424,9 @@ void touch_nmi_watchdog (void) for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; } +#ifdef CONFIG_KGDB +int tune_watchdog = 5*HZ; +#endif void nmi_watchdog_tick (struct pt_regs * regs) { @@ -421,12 +440,24 @@ void nmi_watchdog_tick (struct pt_regs * sum = irq_stat[cpu].apic_timer_irqs; +#ifdef CONFIG_KGDB + if (! in_kgdb(regs) && last_irq_sums[cpu] == sum ) { + +#else if (last_irq_sums[cpu] == sum) { +#endif /* * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ alert_counter[cpu]++; +#ifdef CONFIG_KGDB + if (alert_counter[cpu] == tune_watchdog) { + kgdb_handle_exception(2, SIGPWR, 0, regs); + last_irq_sums[cpu] = sum; + alert_counter[cpu] = 0; + } +#endif if (alert_counter[cpu] == 5*nmi_hz) { spin_lock(&nmi_print_lock); /* @@ -455,9 +486,16 @@ void nmi_watchdog_tick (struct pt_regs * * - LVTPC is masked on interrupt and must be * unmasked by the LVTPC handler. */ - wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0); + wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); apic_write(APIC_LVTPC, APIC_DM_NMI); } + else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) { + /* Only P6 based Pentium M need to re-unmask + * the apic vector but it doesn't hurt + * other P6 variant */ + apic_write(APIC_LVTPC, + apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + } wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); } } --- linux-2.6.6-rc1/arch/i386/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/process.c 2004-04-18 22:25:54.249583288 -0700 @@ -629,11 +629,6 @@ out: return error; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) #define top_esp (THREAD_SIZE - sizeof(unsigned long)) #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) @@ -654,14 +649,12 @@ unsigned long get_wchan(struct task_stru if (ebp < stack_page || ebp > top_ebp+stack_page) return 0; eip = *(unsigned long *) (ebp+4); - if (eip < first_sched || eip >= last_sched) + if (!in_sched_functions(eip)) return eip; ebp = *(unsigned long *) ebp; } while (count++ < 16); return 0; } -#undef last_sched -#undef first_sched /* * sys_alloc_thread_area: get a yet unused TLS descriptor index. --- linux-2.6.6-rc1/arch/i386/kernel/setup.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/setup.c 2004-04-18 22:25:35.888374616 -0700 @@ -128,7 +128,6 @@ unsigned long saved_videomode; #define RAMDISK_LOAD_FLAG 0x4000 static char command_line[COMMAND_LINE_SIZE]; - char saved_command_line[COMMAND_LINE_SIZE]; unsigned char __initdata boot_params[PARAM_SIZE]; --- linux-2.6.6-rc1/arch/i386/kernel/signal.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/signal.c 2004-04-18 22:25:45.306942776 -0700 @@ -317,7 +317,7 @@ setup_sigcontext(struct sigcontext __use * Determine which stack to use.. */ static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +get_sigframe(struct k_sigaction *ka_copy, struct pt_regs * regs, size_t frame_size) { unsigned long esp; @@ -325,16 +325,16 @@ get_sigframe(struct k_sigaction *ka, str esp = regs->esp; /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { + if (ka_copy->sa.sa_flags & SA_ONSTACK) { if (sas_ss_flags(esp) == 0) esp = current->sas_ss_sp + current->sas_ss_size; } /* This is the legacy signal stack switching. */ else if ((regs->xss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - esp = (unsigned long) ka->sa.sa_restorer; + !(ka_copy->sa.sa_flags & SA_RESTORER) && + ka_copy->sa.sa_restorer) { + esp = (unsigned long) ka_copy->sa.sa_restorer; } return (void __user *)((esp - frame_size) & -8ul); @@ -344,14 +344,14 @@ get_sigframe(struct k_sigaction *ka, str See vsyscall-sigreturn.S. */ extern void __kernel_sigreturn, __kernel_rt_sigreturn; -static void setup_frame(int sig, struct k_sigaction *ka, +static void setup_frame(int sig, struct k_sigaction *ka_copy, sigset_t *set, struct pt_regs * regs) { void *restorer; struct sigframe __user *frame; int err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka_copy, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -377,8 +377,8 @@ static void setup_frame(int sig, struct goto give_sigsegv; restorer = &__kernel_sigreturn; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; + if (ka_copy->sa.sa_flags & SA_RESTORER) + restorer = ka_copy->sa.sa_restorer; /* Set up to return from userspace. */ err |= __put_user(restorer, &frame->pretcode); @@ -399,7 +399,7 @@ static void setup_frame(int sig, struct /* Set up registers for signal handler */ regs->esp = (unsigned long) frame; - regs->eip = (unsigned long) ka->sa.sa_handler; + regs->eip = (unsigned long) ka_copy->sa.sa_handler; set_fs(USER_DS); regs->xds = __USER_DS; @@ -417,18 +417,18 @@ static void setup_frame(int sig, struct give_sigsegv: if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + current->sighand->action[sig-1].sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static void setup_rt_frame(int sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { void *restorer; struct rt_sigframe __user *frame; int err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka_copy, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -460,8 +460,8 @@ static void setup_rt_frame(int sig, stru /* Set up to return from userspace. */ restorer = &__kernel_rt_sigreturn; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; + if (ka_copy->sa.sa_flags & SA_RESTORER) + restorer = ka_copy->sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); /* @@ -480,7 +480,7 @@ static void setup_rt_frame(int sig, stru /* Set up registers for signal handler */ regs->esp = (unsigned long) frame; - regs->eip = (unsigned long) ka->sa.sa_handler; + regs->eip = (unsigned long) ka_copy->sa.sa_handler; set_fs(USER_DS); regs->xds = __USER_DS; @@ -498,7 +498,7 @@ static void setup_rt_frame(int sig, stru give_sigsegv: if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + current->sighand->action[sig-1].sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } @@ -507,11 +507,9 @@ give_sigsegv: */ static void -handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset, - struct pt_regs * regs) +handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka_copy, + sigset_t *oldset, struct pt_regs * regs) { - struct k_sigaction *ka = ¤t->sighand->action[sig-1]; - /* Are we from a system call? */ if (regs->orig_eax >= 0) { /* If so, check system call restarting.. */ @@ -522,7 +520,7 @@ handle_signal(unsigned long sig, siginfo break; case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { + if (!(ka_copy->sa.sa_flags & SA_RESTART)) { regs->eax = -EINTR; break; } @@ -534,17 +532,14 @@ handle_signal(unsigned long sig, siginfo } /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); + if (ka_copy->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(sig, ka_copy, info, oldset, regs); else - setup_frame(sig, ka, oldset, regs); - - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; + setup_frame(sig, ka_copy, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (!(ka_copy->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked,¤t->blocked,&ka_copy->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); @@ -560,6 +555,7 @@ int fastcall do_signal(struct pt_regs *r { siginfo_t info; int signr; + struct k_sigaction ka_copy; /* * We want the common case to go fast, which @@ -578,7 +574,7 @@ int fastcall do_signal(struct pt_regs *r if (!oldset) oldset = ¤t->blocked; - signr = get_signal_to_deliver(&info, regs, NULL); + signr = get_signal_to_deliver(&info, &ka_copy, regs, NULL); if (signr > 0) { /* Reenable any watchpoints before delivering the * signal to user space. The processor register will @@ -588,7 +584,7 @@ int fastcall do_signal(struct pt_regs *r __asm__("movl %0,%%db7" : : "r" (current->thread.debugreg[7])); /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, oldset, regs); + handle_signal(signr, &info, &ka_copy, oldset, regs); return 1; } --- linux-2.6.6-rc1/arch/i386/kernel/smpboot.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/smpboot.c 2004-04-18 22:26:02.598314088 -0700 @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -46,7 +47,6 @@ #include #include -#include #include #include #include @@ -936,7 +936,7 @@ static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; -int cpu_sibling_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; static void __init smp_boot_cpus(unsigned int max_cpus) { @@ -955,6 +955,8 @@ static void __init smp_boot_cpus(unsigne current_thread_info()->cpu = 0; smp_tune_scheduling(); + cpus_clear(cpu_sibling_map[0]); + cpu_set(0, cpu_sibling_map[0]); /* * If we couldn't find an SMP configuration at boot time, @@ -1081,34 +1083,39 @@ static void __init smp_boot_cpus(unsigne Dprintk("Boot done.\n"); /* - * If Hyper-Threading is avaialble, construct cpu_sibling_map[], so - * that we can tell the sibling CPU efficiently. + * construct cpu_sibling_map[], so that we can tell sibling CPUs + * efficiently. */ - if (cpu_has_ht && smp_num_siblings > 1) { - for (cpu = 0; cpu < NR_CPUS; cpu++) - cpu_sibling_map[cpu] = NO_PROC_ID; - - for (cpu = 0; cpu < NR_CPUS; cpu++) { - int i; - if (!cpu_isset(cpu, cpu_callout_map)) - continue; + for (cpu = 0; cpu < NR_CPUS; cpu++) + cpus_clear(cpu_sibling_map[cpu]); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + int siblings = 0; + int i; + if (!cpu_isset(cpu, cpu_callout_map)) + continue; + if (smp_num_siblings > 1) { for (i = 0; i < NR_CPUS; i++) { - if (i == cpu || !cpu_isset(i, cpu_callout_map)) + if (!cpu_isset(i, cpu_callout_map)) continue; if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_sibling_map[cpu] = i; - printk("cpu_sibling_map[%d] = %d\n", cpu, cpu_sibling_map[cpu]); - break; + siblings++; + cpu_set(i, cpu_sibling_map[cpu]); } } - if (cpu_sibling_map[cpu] == NO_PROC_ID) { - smp_num_siblings = 1; - printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu); - } + } else { + siblings++; + cpu_set(cpu, cpu_sibling_map[cpu]); } + + if (siblings != smp_num_siblings) + printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings); } + if (nmi_watchdog == NMI_LOCAL_APIC) + check_nmi_watchdog(); + smpboot_setup_io_apic(); setup_boot_APIC_clock(); @@ -1120,6 +1127,209 @@ static void __init smp_boot_cpus(unsigne synchronize_tsc_bp(); } +#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first = NULL, *last = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + cpu_domain->parent = phys_domain; + cpu_domain->groups = &sched_group_cpus[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + phys_domain->parent = node_domain; + phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)]; + + *node_domain = SD_NODE_INIT; + node_domain->span = cpu_possible_map; + node_domain->groups = &sched_group_nodes[cpu_to_node(i)]; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + int j; + first = last = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + struct sched_group *node = &sched_group_nodes[i]; + cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + first = last = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* + * Make each extra sibling increase power by 10% of + * the basic CPU. This is very arbitrary. + */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + node->cpu_power += cpu->cpu_power; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + /* Set up nodes */ + first = last = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + /* ->cpu_power already setup */ + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_domain, i); + } +} +#else /* !CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first = NULL, *last = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + cpu_domain->parent = phys_domain; + cpu_domain->groups = &sched_group_cpus[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = cpu_possible_map; + phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)]; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + int j; + first = last = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + } + + first = last = NULL; + /* Set up physical groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* See SMT+NUMA setup for comment */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + + if (!first) + first = cpu; + if (last) + last->next = cpu; + last = cpu; + } + last->next = first; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_domain, i); + } +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_SCHED_SMT */ + /* These are wrappers to interface to the new boot process. Someone who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) --- linux-2.6.6-rc1/arch/i386/kernel/smp.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/smp.c 2004-04-18 22:26:02.599313936 -0700 @@ -21,7 +21,6 @@ #include #include -#include #include #include #include @@ -466,7 +465,17 @@ void flush_tlb_all(void) { on_each_cpu(do_flush_tlb_all, 0, 1, 1); } - +#ifdef CONFIG_KGDB +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif /* * this function sends a 'reschedule' IPI to another CPU. * it goes straight through and wastes no time serializing --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/arch/i386/kernel/std_resources.c 2004-04-18 22:25:24.640084616 -0700 @@ -0,0 +1,204 @@ +/* + * Machine specific resource allocation for generic. + */ + +#include +#include +#include + +#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) + +static struct resource system_rom_resource = { + .name = "System ROM", + .start = 0xf0000, + .end = 0xfffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource extension_rom_resource = { + .name = "Extension ROM", + .start = 0xe0000, + .end = 0xeffff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource adapter_rom_resources[] = { { + .name = "Adapter ROM", + .start = 0xc8000, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}, { + .name = "Adapter ROM", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +} }; + +#define ADAPTER_ROM_RESOURCES \ + (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) + +static struct resource video_rom_resource = { + .name = "Video ROM", + .start = 0xc0000, + .end = 0xc7fff, + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM +}; + +static struct resource vram_resource = { + .name = "Video RAM area", + .start = 0xa0000, + .end = 0xbffff, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource standard_io_resources[] = { { + .name = "dma1", + .start = 0x0000, + .end = 0x001f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "pic1", + .start = 0x0020, + .end = 0x0021, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "timer", + .start = 0x0040, + .end = 0x005f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "keyboard", + .start = 0x0060, + .end = 0x006f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "dma page reg", + .start = 0x0080, + .end = 0x008f, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "pic2", + .start = 0x00a0, + .end = 0x00a1, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "dma2", + .start = 0x00c0, + .end = 0x00df, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "fpu", + .start = 0x00f0, + .end = 0x00ff, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +} }; + +#define STANDARD_IO_RESOURCES \ + (sizeof standard_io_resources / sizeof standard_io_resources[0]) + +static int __init checksum(unsigned char *rom, unsigned long length) +{ + unsigned char *p, sum = 0; + + for (p = rom; p < rom + length; p++) + sum += *p; + return sum == 0; +} + +void __init probe_roms(void) +{ + unsigned long start, length, upper; + unsigned char *rom; + int i; + + /* video rom */ + upper = adapter_rom_resources[0].start; + for (start = video_rom_resource.start; start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; + + video_rom_resource.start = start; + + /* 0 < length <= 0x7f * 512, historically */ + length = rom[2] * 512; + + /* if checksum okay, trust length byte */ + if (length && checksum(rom, length)) + video_rom_resource.end = start + length - 1; + + request_resource(&iomem_resource, &video_rom_resource); + break; + } + + start = (video_rom_resource.end + 1 + 2047) & ~2047UL; + if (start < upper) + start = upper; + + /* system rom */ + request_resource(&iomem_resource, &system_rom_resource); + upper = system_rom_resource.start; + + /* check for extension rom (ignore length byte!) */ + rom = isa_bus_to_virt(extension_rom_resource.start); + if (romsignature(rom)) { + length = extension_rom_resource.end - extension_rom_resource.start + 1; + if (checksum(rom, length)) { + request_resource(&iomem_resource, &extension_rom_resource); + upper = extension_rom_resource.start; + } + } + + /* check for adapter roms on 2k boundaries */ + for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; + + /* 0 < length <= 0x7f * 512, historically */ + length = rom[2] * 512; + + /* but accept any length that fits if checksum okay */ + if (!length || start + length > upper || !checksum(rom, length)) + continue; + + adapter_rom_resources[i].start = start; + adapter_rom_resources[i].end = start + length - 1; + request_resource(&iomem_resource, &adapter_rom_resources[i]); + + start = adapter_rom_resources[i++].end & ~2047UL; + } +} + +void __init request_graphics_resource(void) +{ + request_resource(&iomem_resource, &vram_resource); +} + +void __init request_standard_io_resources(void) +{ + int i; + + for (i = 0; i < STANDARD_IO_RESOURCES; i++) + request_resource(&ioport_resource, &standard_io_resources[i]); +} --- linux-2.6.6-rc1/arch/i386/kernel/traps.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/kernel/traps.c 2004-04-18 22:26:02.599313936 -0700 @@ -47,7 +47,6 @@ #include #include -#include #include #include @@ -92,6 +91,40 @@ asmlinkage void alignment_check(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); +#ifdef CONFIG_KGDB +extern void sysenter_entry(void); +#include +#include +void set_intr_gate(unsigned int n, void *addr); +static void set_intr_usr_gate(unsigned int n, void *addr); +/* + * Should be able to call this breakpoint() very early in + * bring up. Just hard code the call where needed. + * The breakpoint() code is here because set_?_gate() functions + * are local (static) to trap.c. They need be done only once, + * but it does not hurt to do them over. + */ +void breakpoint(void) +{ + set_intr_usr_gate(3,&int3); /* disable ints on trap */ + set_intr_gate(1,&debug); + set_intr_gate(14,&page_fault); + + BREAKPOINT; +} +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) \ + { \ + if (!user_mode(regs) ) \ + { \ + kgdb_handle_exception(trapnr, signr, error_code, regs); \ + after; \ + } else if ((trapnr == 3) && (regs->eflags &0x200)) local_irq_enable(); \ + } +#else +#define CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,after) +#endif + + static int kstack_depth_to_print = 24; void show_trace(struct task_struct *task, unsigned long * stack) @@ -184,7 +217,7 @@ void show_registers(struct pt_regs *regs ss = regs->xss & 0xffff; } print_modules(); - printk("CPU: %d\nEIP: %04x:[<%08lx>] %s\nEFLAGS: %08lx" + printk("CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\nEFLAGS: %08lx" " (%s) \n", smp_processor_id(), 0xffff & regs->xcs, regs->eip, print_tainted(), regs->eflags, UTS_RELEASE); @@ -202,23 +235,25 @@ void show_registers(struct pt_regs *regs * time of the fault.. */ if (in_kernel) { + u8 *eip; printk("\nStack: "); show_stack(NULL, (unsigned long*)esp); printk("Code: "); - if(regs->eip < PAGE_OFFSET) - goto bad; - for(i=0;i<20;i++) - { + eip = (u8 *)regs->eip - 43; + for (i = 0; i < 64; i++, eip++) { unsigned char c; - if(__get_user(c, &((unsigned char*)regs->eip)[i])) { -bad: + + if (eip < (u8 *)PAGE_OFFSET || __get_user(c, eip)) { printk(" Bad EIP value."); break; } - printk("%02x ", c); + if (eip == (u8 *)regs->eip) + printk("<%02x> ", c); + else + printk("%02x ", c); } } printk("\n"); @@ -286,6 +321,15 @@ void die(const char * str, struct pt_reg #endif if (nl) printk("\n"); +#ifdef CONFIG_KGDB + /* This is about the only place we want to go to kgdb even if in + * user mode. But we must go in via a trap so within kgdb we will + * always be in kernel mode. + */ + if (user_mode(regs)) + BREAKPOINT; +#endif + CHK_REMOTE_DEBUG(0,SIGTRAP,err,regs,) show_registers(regs); bust_spinlocks(0); spin_unlock_irq(&die_lock); @@ -355,6 +399,7 @@ static inline void do_trap(int trapnr, i #define DO_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr,signr,error_code,regs,)\ do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ } @@ -372,7 +417,9 @@ asmlinkage void do_##name(struct pt_regs #define DO_VM86_ERROR(trapnr, signr, str, name) \ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ { \ + CHK_REMOTE_DEBUG(trapnr, signr, error_code,regs, return)\ do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ + return; \ } #define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ @@ -419,8 +466,10 @@ gp_in_vm86: return; gp_in_kernel: - if (!fixup_exception(regs)) + if (!fixup_exception(regs)){ + CHK_REMOTE_DEBUG(13,SIGSEGV,error_code,regs,) die("general protection fault", regs, error_code); + } } static void mem_parity_error(unsigned char reason, struct pt_regs * regs) @@ -582,8 +631,18 @@ asmlinkage void do_debug(struct pt_regs * allowing programs to debug themselves without the ptrace() * interface. */ +#ifdef CONFIG_KGDB + /* + * I think this is the only "real" case of a TF in the kernel + * that really belongs to user space. Others are + * "Ours all ours!" + */ + if (((regs->xcs & 3) == 0) && ((void *)regs->eip == sysenter_entry)) + goto clear_TF_reenable; +#else if ((regs->xcs & 3) == 0) goto clear_TF_reenable; +#endif if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) goto clear_TF; } @@ -595,6 +654,17 @@ asmlinkage void do_debug(struct pt_regs info.si_errno = 0; info.si_code = TRAP_BRKPT; +#ifdef CONFIG_KGDB + /* + * If this is a kernel mode trap, we need to reset db7 to allow us + * to continue sanely ALSO skip the signal delivery + */ + if ((regs->xcs & 3) == 0) + goto clear_dr7; + + /* if not kernel, allow ints but only if they were on */ + if ( regs->eflags & 0x200) local_irq_enable(); +#endif /* If this is a kernel mode trap, save the user PC on entry to * the kernel, that's what the debugger can make sense of. */ @@ -609,6 +679,7 @@ clear_dr7: __asm__("movl %0,%%db7" : /* no output */ : "r" (0)); + CHK_REMOTE_DEBUG(1,SIGTRAP,error_code,regs,) return; debug_vm86: @@ -857,6 +928,12 @@ static void __init set_call_gate(void *a { _set_gate(a,12,3,addr,__KERNEL_CS); } +#ifdef CONFIG_KGDB +void set_intr_usr_gate(unsigned int n, void *addr) +{ + _set_gate(idt_table+n,14,3,addr,__KERNEL_CS); +} +#endif static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) { @@ -879,7 +956,11 @@ void __init trap_init(void) set_trap_gate(0,÷_error); set_intr_gate(1,&debug); set_intr_gate(2,&nmi); +#ifndef CONFIG_KGDB set_system_gate(3,&int3); /* int3-5 can be called from all */ +#else + set_intr_usr_gate(3,&int3); /* int3-5 can be called from all */ +#endif set_system_gate(4,&overflow); set_system_gate(5,&bounds); set_trap_gate(6,&invalid_op); --- linux-2.6.6-rc1/arch/i386/kernel/vm86.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/i386/kernel/vm86.c 2004-04-18 22:26:02.600313784 -0700 @@ -44,7 +44,6 @@ #include #include -#include #include #include #include --- linux-2.6.6-rc1/arch/i386/lib/dec_and_lock.c 2003-11-09 16:45:04.000000000 -0800 +++ 25/arch/i386/lib/dec_and_lock.c 2004-04-18 22:25:47.915546208 -0700 @@ -10,6 +10,7 @@ #include #include +#ifndef ATOMIC_DEC_AND_LOCK int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) { int counter; @@ -38,3 +39,5 @@ slow_path: spin_unlock(lock); return 0; } +#endif + --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/arch/i386/lib/kgdb_serial.c 2004-04-18 22:25:30.861138872 -0700 @@ -0,0 +1,499 @@ +/* + * Serial interface GDB stub + * + * Written (hacked together) by David Grothe (dave@gcom.com) + * Modified to allow invokation early in boot see also + * kgdb.h for instructions by George Anzinger(george@mvista.com) + * Modified to handle debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_KGDB_USER_CONSOLE +extern void kgdb_console_finit(void); +#endif +#define PRNT_off +#define TEST_EXISTANCE +#ifdef PRNT +#define dbprintk(s) printk s +#else +#define dbprintk(s) +#endif +#define TEST_INTERRUPT_off +#ifdef TEST_INTERRUPT +#define intprintk(s) printk s +#else +#define intprintk(s) +#endif + +#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +static char gdb_buf[GDB_BUF_SIZE]; +static int gdb_buf_in_inx; +static atomic_t gdb_buf_in_cnt; +static int gdb_buf_out_inx; + +struct async_struct *gdb_async_info; +static int gdb_async_irq; + +#define outb_px(a,b) outb_p(b,a) + +static void program_uart(struct async_struct *info); +static void write_char(struct async_struct *info, int chr); +/* + * Get a byte from the hardware data buffer and return it + */ +static int +read_data_bfr(struct async_struct *info) +{ + char it = inb_p(info->port + UART_LSR); + + if (it & UART_LSR_DR) + return (inb_p(info->port + UART_RX)); + /* + * If we have a framing error assume somebody messed with + * our uart. Reprogram it and send '-' both ways... + */ + if (it & 0xc) { + program_uart(info); + write_char(info, '-'); + return ('-'); + } + return (-1); + +} /* read_data_bfr */ + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + + * Locking here is a bit of a problem. We MUST not lock out communication + * if we are trying to talk to gdb about a kgdb entry. ON the other hand + * we can loose chars in the console pass thru if we don't lock. It is also + * possible that we could hold the lock or be waiting for it when kgdb + * NEEDS to talk. Since kgdb locks down the world, it does not need locks. + * We do, of course have possible issues with interrupting a uart operation, + * but we will just depend on the uart status to help keep that straight. + + */ +static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; +#ifdef CONFIG_SMP +extern spinlock_t kgdb_spinlock; +#endif + +static int +read_char(struct async_struct *info) +{ + int chr; + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_lock(&uart_interrupt_lock); + } +#endif + if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ + chr = gdb_buf[gdb_buf_out_inx++]; + gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&gdb_buf_in_cnt); + } else { + chr = read_data_bfr(info); + } +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_unlock(&uart_interrupt_lock); + } +#endif + local_irq_restore(flags); + return (chr); +} + +/* + * Wait until the interface can accept a char, then write it. + */ +static void +write_char(struct async_struct *info, int chr) +{ + while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; + + outb_p(chr, info->port + UART_TX); + +} /* write_char */ + +/* + * Mostly we don't need a spinlock, but since the console goes + * thru here with interrutps on, well, we need to catch those + * chars. + */ +/* + * This is the receiver interrupt routine for the GDB stub. + * It will receive a limited number of characters of input + * from the gdb host machine and save them up in a buffer. + * + * When the gdb stub routine tty_getDebugChar() is called it + * draws characters out of the buffer until it is empty and + * then reads directly from the serial port. + * + * We do not attempt to write chars from the interrupt routine + * since the stubs do all of that via tty_putDebugChar() which + * writes one byte after waiting for the interface to become + * ready. + * + * The debug stubs like to run with interrupts disabled since, + * after all, they run as a consequence of a breakpoint in + * the kernel. + * + * Perhaps someone who knows more about the tty driver than I + * care to learn can make this work for any low level serial + * driver. + */ +static irqreturn_t +gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct async_struct *info; + unsigned long flags; + + info = gdb_async_info; + if (!info || !info->tty || irq != gdb_async_irq) + return IRQ_NONE; + + local_irq_save(flags); + spin_lock(&uart_interrupt_lock); + do { + int chr = read_data_bfr(info); + intprintk(("Debug char on int: %x hex\n", chr)); + if (chr < 0) + continue; + + if (chr == 3) { /* Ctrl-C means remote interrupt */ + BREAKPOINT; + continue; + } + + if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { + /* buffer overflow tosses early char */ + read_char(info); + } + gdb_buf[gdb_buf_in_inx++] = chr; + gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); + } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); + spin_unlock(&uart_interrupt_lock); + local_irq_restore(flags); + return IRQ_HANDLED; +} /* gdb_interrupt */ + +/* + * Just a NULL routine for testing. + */ +void +gdb_null(void) +{ +} /* gdb_null */ + +/* These structure are filled in with values defined in asm/kgdb_local.h + */ +static struct serial_state state = SB_STATE; +static struct async_struct local_info = SB_INFO; +static int ok_to_enable_ints = 0; +static void kgdb_enable_ints_now(void); + +extern char *kgdb_version; +/* + * Hook an IRQ for KGDB. + * + * This routine is called from tty_putDebugChar, below. + */ +static int ints_disabled = 1; +int +gdb_hook_interrupt(struct async_struct *info, int verb) +{ + struct serial_state *state = info->state; + unsigned long flags; + int port; +#ifdef TEST_EXISTANCE + int scratch, scratch2; +#endif + + /* The above fails if memory managment is not set up yet. + * Rather than fail the set up, just keep track of the fact + * and pick up the interrupt thing later. + */ + gdb_async_info = info; + port = gdb_async_info->port; + gdb_async_irq = state->irq; + if (verb) { + printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", + kgdb_version, + port, + gdb_async_irq, gdb_async_info->state->custom_divisor); + } + local_irq_save(flags); +#ifdef TEST_EXISTANCE + /* Existance test */ + /* Should not need all this, but just in case.... */ + + scratch = inb_p(port + UART_IER); + outb_px(port + UART_IER, 0); + outb_px(0xff, 0x080); + scratch2 = inb_p(port + UART_IER); + outb_px(port + UART_IER, scratch); + if (scratch2) { + printk + ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); + local_irq_restore(flags); + return 1; /* We failed; there's nothing here */ + } + scratch2 = inb_p(port + UART_LCR); + outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ + outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ + outb_px(port + UART_LCR, 0); + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); + scratch = inb_p(port + UART_IIR) >> 6; + if (scratch == 1) { + printk("gdb_hook_interrupt: Undefined UART type!" + " Not a UART! \n"); + local_irq_restore(flags); + return 1; + } else { + dbprintk(("gdb_hook_interrupt: UART type " + "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); + } + scratch = inb_p(port + UART_MCR); + outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); + outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); + scratch2 = inb_p(port + UART_MSR) & 0xF0; + outb_px(port + UART_MCR, scratch); + if (scratch2 != 0x90) { + printk("gdb_hook_interrupt: " + "Loop back test failed! Not a UART!\n"); + local_irq_restore(flags); + return scratch2 + 1000; /* force 0 to fail */ + } +#endif /* test existance */ + program_uart(info); + local_irq_restore(flags); + + return (0); + +} /* gdb_hook_interrupt */ + +static void +program_uart(struct async_struct *info) +{ + int port = info->port; + + (void) inb_p(port + UART_RX); + outb_px(port + UART_IER, 0); + + (void) inb_p(port + UART_RX); /* serial driver comments say */ + (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ + (void) inb_p(port + UART_MSR); + outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); + outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ + outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ + outb_px(port + UART_MCR, info->MCR); + + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ + outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + return; +} + +/* + * tty_getDebugChar + * + * This is a GDB stub routine. It waits for a character from the + * serial interface and then returns it. If there is no serial + * interface connection then it returns a bogus value which will + * almost certainly cause the system to hang. In the + */ +int kgdb_in_isr = 0; +int kgdb_in_lsr = 0; +extern spinlock_t kgdb_spinlock; + +/* Caller takes needed protections */ + +int +tty_getDebugChar(void) +{ + volatile int chr, dum, time, end_time; + + dbprintk(("tty_getDebugChar(port %x): ", gdb_async_info->port)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + /* + * This trick says if we wait a very long time and get + * no char, return the -1 and let the upper level deal + * with it. + */ + rdtsc(dum, time); + end_time = time + 2; + while (((chr = read_char(gdb_async_info)) == -1) && + (end_time - time) > 0) { + rdtsc(dum, time); + }; + /* + * This covers our butts if some other code messes with + * our uart, hay, it happens :o) + */ + if (chr == -1) + program_uart(gdb_async_info); + + dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); + return (chr); + +} /* tty_getDebugChar */ + +static int count = 3; +static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + +static int __init +kgdb_enable_ints(void) +{ + if (kgdboe) { + return 0; + } + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 1); + } + ok_to_enable_ints = 1; + kgdb_enable_ints_now(); +#ifdef CONFIG_KGDB_USER_CONSOLE + kgdb_console_finit(); +#endif + return 0; +} + +#ifdef CONFIG_SERIAL_8250 +void shutdown_for_kgdb(struct async_struct *gdb_async_info); +#endif + +#ifdef CONFIG_DISCONTIGMEM +static inline int kgdb_mem_init_done(void) +{ + return highmem_start_page != NULL; +} +#else +static inline int kgdb_mem_init_done(void) +{ + return max_mapnr != 0; +} +#endif + +static void +kgdb_enable_ints_now(void) +{ + if (!spin_trylock(&one_at_atime)) + return; + if (!ints_disabled) + goto exit; + if (kgdb_mem_init_done() && + ints_disabled) { /* don't try till mem init */ +#ifdef CONFIG_SERIAL_8250 + /* + * The ifdef here allows the system to be configured + * without the serial driver. + * Don't make it a module, however, it will steal the port + */ + shutdown_for_kgdb(gdb_async_info); +#endif + ints_disabled = request_irq(gdb_async_info->state->irq, + gdb_interrupt, + IRQ_T(gdb_async_info), + "KGDB-stub", NULL); + intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); + } + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + exit: + spin_unlock(&one_at_atime); +} + +/* + * tty_putDebugChar + * + * This is a GDB stub routine. It waits until the interface is ready + * to transmit a char and then sends it. If there is no serial + * interface connection then it simply returns to its caller, having + * pretended to send the char. Caller takes needed protections. + */ +void +tty_putDebugChar(int chr) +{ + dbprintk(("tty_putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", + gdb_async_info->port, + chr, + chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + + write_char(gdb_async_info, chr); /* this routine will wait */ + count = (chr == '#') ? 0 : count + 1; + if ((count == 2)) { /* try to enable after */ + if (ints_disabled & ok_to_enable_ints) + kgdb_enable_ints_now(); /* try to enable after */ + + /* We do this a lot because, well we really want to get these + * interrupts. The serial driver will clear these bits when it + * initializes the chip. Every thing else it does is ok, + * but this. + */ + if (!ints_disabled) { + outb_px(gdb_async_info->port + UART_IER, + gdb_async_info->IER); + } + } + +} /* tty_putDebugChar */ + +/* + * This does nothing for the serial port, since it doesn't buffer. + */ + +void tty_flushDebugChar(void) +{ +} + +module_init(kgdb_enable_ints); --- linux-2.6.6-rc1/arch/i386/lib/Makefile 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/i386/lib/Makefile 2004-04-18 22:25:30.322220800 -0700 @@ -9,3 +9,4 @@ lib-y = checksum.o delay.o \ lib-$(CONFIG_X86_USE_3DNOW) += mmx.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +lib-$(CONFIG_KGDB) += kgdb_serial.o --- linux-2.6.6-rc1/arch/i386/mach-default/Makefile 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/mach-default/Makefile 2004-04-18 22:25:24.640084616 -0700 @@ -2,4 +2,4 @@ # Makefile for the linux kernel. # -obj-y := setup.o topology.o std_resources.o +obj-y := setup.o topology.o --- linux-2.6.6-rc1/arch/i386/mach-default/std_resources.c 2004-04-14 23:14:47.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,204 +0,0 @@ -/* - * Machine specific resource allocation for generic. - */ - -#include -#include -#include - -#define romsignature(x) (*(unsigned short *)(x) == 0xaa55) - -static struct resource system_rom_resource = { - .name = "System ROM", - .start = 0xf0000, - .end = 0xfffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource extension_rom_resource = { - .name = "Extension ROM", - .start = 0xe0000, - .end = 0xeffff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource adapter_rom_resources[] = { { - .name = "Adapter ROM", - .start = 0xc8000, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}, { - .name = "Adapter ROM", - .start = 0, - .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -} }; - -#define ADAPTER_ROM_RESOURCES \ - (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) - -static struct resource video_rom_resource = { - .name = "Video ROM", - .start = 0xc0000, - .end = 0xc7fff, - .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM -}; - -static struct resource vram_resource = { - .name = "Video RAM area", - .start = 0xa0000, - .end = 0xbffff, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM -}; - -static struct resource standard_io_resources[] = { { - .name = "dma1", - .start = 0x0000, - .end = 0x001f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic1", - .start = 0x0020, - .end = 0x0021, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "timer", - .start = 0x0040, - .end = 0x005f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "keyboard", - .start = 0x0060, - .end = 0x006f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma page reg", - .start = 0x0080, - .end = 0x008f, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "pic2", - .start = 0x00a0, - .end = 0x00a1, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "dma2", - .start = 0x00c0, - .end = 0x00df, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -}, { - .name = "fpu", - .start = 0x00f0, - .end = 0x00ff, - .flags = IORESOURCE_BUSY | IORESOURCE_IO -} }; - -#define STANDARD_IO_RESOURCES \ - (sizeof standard_io_resources / sizeof standard_io_resources[0]) - -static int __init checksum(unsigned char *rom, unsigned long length) -{ - unsigned char *p, sum = 0; - - for (p = rom; p < rom + length; p++) - sum += *p; - return sum == 0; -} - -void __init probe_roms(void) -{ - unsigned long start, length, upper; - unsigned char *rom; - int i; - - /* video rom */ - upper = adapter_rom_resources[0].start; - for (start = video_rom_resource.start; start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - video_rom_resource.start = start; - - /* 0 < length <= 0x7f * 512, historically */ - length = rom[2] * 512; - - /* if checksum okay, trust length byte */ - if (length && checksum(rom, length)) - video_rom_resource.end = start + length - 1; - - request_resource(&iomem_resource, &video_rom_resource); - break; - } - - start = (video_rom_resource.end + 1 + 2047) & ~2047UL; - if (start < upper) - start = upper; - - /* system rom */ - request_resource(&iomem_resource, &system_rom_resource); - upper = system_rom_resource.start; - - /* check for extension rom (ignore length byte!) */ - rom = isa_bus_to_virt(extension_rom_resource.start); - if (romsignature(rom)) { - length = extension_rom_resource.end - extension_rom_resource.start + 1; - if (checksum(rom, length)) { - request_resource(&iomem_resource, &extension_rom_resource); - upper = extension_rom_resource.start; - } - } - - /* check for adapter roms on 2k boundaries */ - for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { - rom = isa_bus_to_virt(start); - if (!romsignature(rom)) - continue; - - /* 0 < length <= 0x7f * 512, historically */ - length = rom[2] * 512; - - /* but accept any length that fits if checksum okay */ - if (!length || start + length > upper || !checksum(rom, length)) - continue; - - adapter_rom_resources[i].start = start; - adapter_rom_resources[i].end = start + length - 1; - request_resource(&iomem_resource, &adapter_rom_resources[i]); - - start = adapter_rom_resources[i++].end & ~2047UL; - } -} - -void __init request_graphics_resource(void) -{ - request_resource(&iomem_resource, &vram_resource); -} - -void __init request_standard_io_resources(void) -{ - int i; - - for (i = 0; i < STANDARD_IO_RESOURCES; i++) - request_resource(&ioport_resource, &standard_io_resources[i]); -} --- linux-2.6.6-rc1/arch/i386/mach-visws/mpparse.c 2003-09-08 13:58:55.000000000 -0700 +++ 25/arch/i386/mach-visws/mpparse.c 2004-04-18 22:25:24.642084312 -0700 @@ -28,6 +28,7 @@ unsigned int boot_cpu_logical_apicid = - /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; +unsigned int __initdata maxcpus = NR_CPUS; /* * The Visual Workstation is Intel MP compliant in the hardware @@ -89,6 +90,9 @@ void __init find_smp_config(void) ncpus = CO_CPU_MAX; } + if (ncpus > maxcpus) + ncpus = maxcpus; + smp_found_config = 1; while (ncpus--) MP_processor_info(mp++); --- linux-2.6.6-rc1/arch/i386/mach-visws/traps.c 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/i386/mach-visws/traps.c 2004-04-18 22:26:02.600313784 -0700 @@ -8,7 +8,6 @@ #include #include -#include #include #include #include "cobalt.h" --- linux-2.6.6-rc1/arch/i386/mach-voyager/voyager_basic.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/i386/mach-voyager/voyager_basic.c 2004-04-18 22:26:02.601313632 -0700 @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.6-rc1/arch/i386/mach-voyager/voyager_smp.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/i386/mach-voyager/voyager_smp.c 2004-04-18 22:26:02.602313480 -0700 @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -599,12 +598,10 @@ do_boot_cpu(__u8 cpu) idle->thread.eip = (unsigned long) start_secondary; unhash_process(idle); /* init_tasks (in sched.c) is indexed logically */ -#if 0 - // for AC kernels - stack_start.esp = (THREAD_SIZE + (__u8 *)TSK_TO_KSTACK(idle)); -#else - stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle->thread_info); -#endif + stack_start.esp = (void *) idle->thread.esp; + + irq_ctx_init(cpu); + /* Note: Don't modify initial ss override */ VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu, (unsigned long)hijack_source.val, hijack_source.idt.Segment, --- linux-2.6.6-rc1/arch/i386/mach-voyager/voyager_thread.c 2003-06-14 12:18:25.000000000 -0700 +++ 25/arch/i386/mach-voyager/voyager_thread.c 2004-04-18 22:26:02.603313328 -0700 @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -135,7 +134,7 @@ thread(void *unused) init_timer(&wakeup_timer); sigfillset(¤t->blocked); - current->tty = NULL; /* get rid of controlling tty */ + current->signal->tty = NULL; printk(KERN_NOTICE "Voyager starting monitor thread\n"); --- linux-2.6.6-rc1/arch/i386/Makefile 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/Makefile 2004-04-18 22:25:42.591355608 -0700 @@ -19,7 +19,7 @@ LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S LDFLAGS_vmlinux := -CFLAGS += -pipe +CFLAGS += -pipe -msoft-float # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call check_gcc,-mpreferred-stack-boundary=2,) @@ -97,6 +97,9 @@ mcore-$(CONFIG_X86_ES7000) := mach-es700 # default subarch .h files mflags-y += -Iinclude/asm-i386/mach-default +mflags-$(CONFIG_KGDB) += -gdwarf-2 +mflags-$(CONFIG_KGDB_MORE) += $(shell echo $(CONFIG_KGDB_OPTIONS) | sed -e 's/"//g') + head-y := arch/i386/kernel/head.o arch/i386/kernel/init_task.o libs-y += arch/i386/lib/ --- linux-2.6.6-rc1/arch/i386/mm/fault.c 2003-12-17 21:20:01.000000000 -0800 +++ 25/arch/i386/mm/fault.c 2004-04-18 22:26:02.603313328 -0700 @@ -24,7 +24,6 @@ #include #include -#include #include #include @@ -403,6 +402,12 @@ no_context: * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ +#ifdef CONFIG_KGDB + if (!user_mode(regs)){ + kgdb_handle_exception(14,SIGBUS, error_code, regs); + return; + } +#endif bust_spinlocks(1); --- linux-2.6.6-rc1/arch/i386/mm/hugetlbpage.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/mm/hugetlbpage.c 2004-04-18 22:26:02.604313176 -0700 @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -140,32 +139,31 @@ follow_hugetlb_page(struct mm_struct *mm #if 0 /* This is just for testing */ struct page * -follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { unsigned long start = address; int length = 1; int nr; struct page *page; + struct vm_area_struct *vma; - nr = follow_hugetlb_page(mm, vma, &page, NULL, &start, &length, 0); - if (nr == 1) - return page; - return NULL; -} + if (! mm->used_hugetlb) + return ERR_PTR(-EINVAL); -/* - * If virtual address `addr' lies within a huge page, return its controlling - * VMA, else NULL. - */ -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - if (mm->used_hugetlb) { - struct vm_area_struct *vma = find_vma(mm, addr); - if (vma && is_vm_hugetlb_page(vma)) - return vma; - } - return NULL; + vma = find_vma(mm, addr); + if (!vma || !is_vm_hugetlb_page(vma)) + return ERR_PTR(-EINVAL); + + pte = huge_pte_offset(mm, address); + + /* hugetlb should be locked, and hence, prefaulted */ + WARN_ON(!pte || pte_none(*pte)); + + page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; + + WARN_ON(!PageCompound(page)); + + return page; } int pmd_huge(pmd_t pmd) @@ -183,15 +181,9 @@ follow_huge_pmd(struct mm_struct *mm, un #else struct page * -follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { - return NULL; -} - -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - return NULL; + return ERR_PTR(-EINVAL); } int pmd_huge(pmd_t pmd) @@ -206,10 +198,8 @@ follow_huge_pmd(struct mm_struct *mm, un struct page *page; page = pte_page(*(pte_t *)pmd); - if (page) { + if (page) page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); - get_page(page); - } return page; } #endif --- linux-2.6.6-rc1/arch/i386/mm/init.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/mm/init.c 2004-04-18 22:26:02.604313176 -0700 @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.6-rc1/arch/i386/mm/ioremap.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/i386/mm/ioremap.c 2004-04-18 22:26:02.605313024 -0700 @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.6-rc1/arch/i386/oprofile/op_model_p4.c 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/i386/oprofile/op_model_p4.c 2004-04-18 22:25:36.870225352 -0700 @@ -382,11 +382,8 @@ static struct p4_event_binding p4_events static unsigned int get_stagger(void) { #ifdef CONFIG_SMP - int cpu; - if (smp_num_siblings > 1) { - cpu = smp_processor_id(); - return (cpu_sibling_map[cpu] > cpu) ? 0 : 1; - } + int cpu = smp_processor_id(); + return (cpu != first_cpu(cpu_sibling_map[cpu])); #endif return 0; } --- linux-2.6.6-rc1/arch/i386/pci/irq.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/i386/pci/irq.c 2004-04-18 22:25:52.727814632 -0700 @@ -23,6 +23,7 @@ #define PIRQ_VERSION 0x0100 int broken_hp_bios_irq9; +int acer_tm360_irqrouting; static struct irq_routing_table *pirq_table; @@ -453,15 +454,12 @@ static int pirq_bios_set(struct pci_dev static __init int intel_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) { -#if 0 /* Let's see what chip this is supposed to be ... */ - /* We must not touch 440GX even if we have tables. 440GX has - different IRQ routing weirdness */ + /* 440GX has a proprietary PIRQ router -- don't use it */ if ( pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_0, NULL) || pci_find_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443GX_2, NULL)) return 0; -#endif switch(device) { @@ -749,6 +747,13 @@ static int pcibios_lookup_irq(struct pci r->set(pirq_router_dev, dev, pirq, 11); } + /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ + if (acer_tm360_irqrouting && pirq == 0x63 && dev->irq == 11) { + dev->irq = 10; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, 10); + r->set(pirq_router_dev, dev, pirq, 10); + } + /* * Find the best IRQ to assign: use the one * reported by the device if possible. --- linux-2.6.6-rc1/arch/ia64/ia32/binfmt_elf32.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ia64/ia32/binfmt_elf32.c 2004-04-18 22:25:49.813257712 -0700 @@ -104,6 +104,7 @@ ia64_elf32_init (struct pt_regs *regs) vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_private_data = NULL; + mpol_set_vma_default(vma); down_write(¤t->mm->mmap_sem); { insert_vm_struct(current->mm, vma); @@ -190,6 +191,7 @@ ia32_setup_arg_pages (struct linux_binpr mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; mpnt->vm_private_data = 0; + mpol_set_vma_default(mpnt); insert_vm_struct(current->mm, mpnt); current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; } --- linux-2.6.6-rc1/arch/ia64/ia32/ia32_entry.S 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/ia64/ia32/ia32_entry.S 2004-04-18 22:25:58.841885152 -0700 @@ -350,11 +350,11 @@ ia32_syscall_table: data8 sys_setfsgid /* 16-bit version */ data8 sys_llseek /* 140 */ data8 sys32_getdents - data8 sys32_select + data8 compat_sys_select data8 sys_flock data8 sys32_msync - data8 sys32_readv /* 145 */ - data8 sys32_writev + data8 compat_sys_readv /* 145 */ + data8 compat_sys_writev data8 sys_getsid data8 sys_fdatasync data8 sys32_sysctl --- linux-2.6.6-rc1/arch/ia64/ia32/ia32_signal.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ia64/ia32/ia32_signal.c 2004-04-18 22:25:45.448921192 -0700 @@ -1,7 +1,7 @@ /* * IA32 Architecture-specific signal handling support. * - * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co + * Copyright (C) 1999, 2001-2002, 2004 Hewlett-Packard Co * David Mosberger-Tang * Copyright (C) 1999 Arun Sharma * Copyright (C) 2000 VA Linux Co @@ -114,8 +114,8 @@ copy_siginfo_from_user32 (siginfo_t *to, err |= __get_user(to->si_band, &from->si_band); err |= __get_user(to->si_fd, &from->si_fd); break; - case __SI_RT: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ: + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: err |= __get_user(to->si_pid, &from->si_pid); err |= __get_user(to->si_uid, &from->si_uid); err |= __get_user(to->si_int, &from->si_int); @@ -820,7 +820,7 @@ restore_sigcontext_ia32 (struct pt_regs * Determine which stack to use.. */ static inline void * -get_sigframe (struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +get_sigframe (struct k_sigaction *ka_copy, struct pt_regs * regs, size_t frame_size) { unsigned long esp; @@ -828,7 +828,7 @@ get_sigframe (struct k_sigaction *ka, st esp = (unsigned int) regs->r12; /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { + if (ka_copy->sa.sa_flags & SA_ONSTACK) { if (!on_sig_stack(esp)) esp = current->sas_ss_sp + current->sas_ss_size; } @@ -837,17 +837,40 @@ get_sigframe (struct k_sigaction *ka, st return (void *)((esp - frame_size) & -8ul); } +static long +force_sigsegv (int sig) +{ + unsigned long flags; + + if (sig == SIGSEGV) { + /* + * Acquiring siglock around the sa_handler-update is almost + * certainly overkill, but this isn't a + * performance-critical path and I'd rather play it safe + * here than having to debug a nasty race if and when + * something changes in kernel/signal.c that would make it + * no longer safe to modify sa_handler without holding the + * lock. + */ + spin_lock_irqsave(¤t->sighand->siglock, flags); + current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + force_sig(SIGSEGV, current); + return 0; +} + static int -setup_frame_ia32 (int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs) +setup_frame_ia32 (int sig, struct k_sigaction *ka_copy, sigset_t *set, struct pt_regs * regs) { struct exec_domain *ed = current_thread_info()->exec_domain; struct sigframe_ia32 *frame; int err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka_copy, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; + return force_sigsegv(sig); err |= __put_user((ed && ed->signal_invmap && sig < 32 ? (int)(ed->signal_invmap[sig]) : sig), &frame->sig); @@ -860,8 +883,8 @@ setup_frame_ia32 (int sig, struct k_siga /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - unsigned int restorer = IA32_SA_RESTORER(ka); + if (ka_copy->sa.sa_flags & SA_RESTORER) { + unsigned int restorer = IA32_SA_RESTORER(ka_copy); err |= __put_user(restorer, &frame->pretcode); } else { err |= __put_user((long)frame->retcode, &frame->pretcode); @@ -873,11 +896,11 @@ setup_frame_ia32 (int sig, struct k_siga } if (err) - goto give_sigsegv; + return force_sigsegv(sig); /* Set up registers for signal handler */ regs->r12 = (unsigned long) frame; - regs->cr_iip = IA32_SA_HANDLER(ka); + regs->cr_iip = IA32_SA_HANDLER(ka_copy); set_fs(USER_DS); @@ -885,32 +908,26 @@ setup_frame_ia32 (int sig, struct k_siga regs->eflags &= ~TF_MASK; #endif -#if 0 +#if DEBUG_SIG printk("SIG deliver (%s:%d): sig=%d sp=%p pc=%lx ra=%x\n", current->comm, current->pid, sig, (void *) frame, regs->cr_iip, frame->pretcode); #endif return 1; - - give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); - return 0; } static int -setup_rt_frame_ia32 (int sig, struct k_sigaction *ka, siginfo_t *info, +setup_rt_frame_ia32 (int sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct exec_domain *ed = current_thread_info()->exec_domain; struct rt_sigframe_ia32 *frame; int err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka_copy, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; + return force_sigsegv(sig); err |= __put_user((ed && ed->signal_invmap && sig < 32 ? ed->signal_invmap[sig] : sig), &frame->sig); @@ -927,12 +944,12 @@ setup_rt_frame_ia32 (int sig, struct k_s err |= setup_sigcontext_ia32(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) - goto give_sigsegv; + return force_sigsegv(sig); /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - unsigned int restorer = IA32_SA_RESTORER(ka); + if (ka_copy->sa.sa_flags & SA_RESTORER) { + unsigned int restorer = IA32_SA_RESTORER(ka_copy); err |= __put_user(restorer, &frame->pretcode); } else { err |= __put_user((long)frame->retcode, &frame->pretcode); @@ -943,11 +960,11 @@ setup_rt_frame_ia32 (int sig, struct k_s } if (err) - goto give_sigsegv; + return force_sigsegv(sig); /* Set up registers for signal handler */ regs->r12 = (unsigned long) frame; - regs->cr_iip = IA32_SA_HANDLER(ka); + regs->cr_iip = IA32_SA_HANDLER(ka_copy); set_fs(USER_DS); @@ -955,29 +972,23 @@ setup_rt_frame_ia32 (int sig, struct k_s regs->eflags &= ~TF_MASK; #endif -#if 0 +#if DEBUG_SIG printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%x\n", current->comm, current->pid, (void *) frame, regs->cr_iip, frame->pretcode); #endif return 1; - -give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); - return 0; } int -ia32_setup_frame1 (int sig, struct k_sigaction *ka, siginfo_t *info, +ia32_setup_frame1 (int sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - return setup_rt_frame_ia32(sig, ka, info, set, regs); + if (ka_copy->sa.sa_flags & SA_SIGINFO) + return setup_rt_frame_ia32(sig, ka_copy, info, set, regs); else - return setup_frame_ia32(sig, ka, set, regs); + return setup_frame_ia32(sig, ka_copy, set, regs); } asmlinkage long --- linux-2.6.6-rc1/arch/ia64/ia32/sys_ia32.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ia64/ia32/sys_ia32.c 2004-04-18 22:25:59.124842136 -0700 @@ -90,58 +90,17 @@ extern unsigned long arch_get_unmapped_a /* XXX make per-mm: */ static DECLARE_MUTEX(ia32_mmap_sem); -static int -nargs (unsigned int arg, char **ap) -{ - unsigned int addr; - int n, err; - - if (!arg) - return 0; - - n = 0; - do { - err = get_user(addr, (unsigned int *)A(arg)); - if (err) - return err; - if (ap) - *ap++ = (char *) A(addr); - arg += sizeof(unsigned int); - n++; - } while (addr); - return n - 1; -} - asmlinkage long -sys32_execve (char *filename, unsigned int argv, unsigned int envp, - struct pt_regs *regs) +sys32_execve (char *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs *regs) { + long error; + char *filename; unsigned long old_map_base, old_task_size, tssd; - char **av, **ae; - int na, ne, len; - long r; - - na = nargs(argv, NULL); - if (na < 0) - return na; - ne = nargs(envp, NULL); - if (ne < 0) - return ne; - len = (na + ne + 2) * sizeof(*av); - av = kmalloc(len, GFP_KERNEL); - if (!av) - return -ENOMEM; - ae = av + na + 1; - av[na] = NULL; - ae[ne] = NULL; - - r = nargs(argv, av); - if (r < 0) - goto out; - r = nargs(envp, ae); - if (r < 0) - goto out; + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; old_map_base = current->thread.map_base; old_task_size = current->thread.task_size; @@ -153,19 +112,18 @@ sys32_execve (char *filename, unsigned i ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob); ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1); - set_fs(KERNEL_DS); - r = sys_execve(filename, av, ae, regs); - if (r < 0) { + error = compat_do_execve(filename, argv, envp, regs); + putname(filename); + + if (error < 0) { /* oops, execve failed, switch back to old values... */ ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE); ia64_set_kr(IA64_KR_TSSD, tssd); current->thread.map_base = old_map_base; current->thread.task_size = old_task_size; - set_fs(USER_DS); /* establish new task-size as the address-limit */ } - out: - kfree(av); - return r; + + return error; } int cp_compat_stat(struct kstat *stat, struct compat_stat *ubuf) @@ -818,110 +776,6 @@ out: return error; } -/* - * We can actually return ERESTARTSYS instead of EINTR, but I'd - * like to be certain this leads to no problems. So I return - * EINTR just for safety. - * - * Update: ERESTARTSYS breaks at least the xview clock binary, so - * I'm trying ERESTARTNOHAND which restart only when you want to. - */ -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) -#define ROUND_UP_TIME(x,y) (((x)+(y)-1)/(y)) - -asmlinkage long -sys32_select (int n, fd_set *inp, fd_set *outp, fd_set *exp, struct compat_timeval *tvp32) -{ - fd_set_bits fds; - char *bits; - long timeout; - int ret, size; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (tvp32) { - time_t sec, usec; - - ret = -EFAULT; - if (get_user(sec, &tvp32->tv_sec) || get_user(usec, &tvp32->tv_usec)) - goto out_nofds; - - ret = -EINVAL; - if (sec < 0 || usec < 0) - goto out_nofds; - - if ((unsigned long) sec < MAX_SELECT_SECONDS) { - timeout = ROUND_UP_TIME(usec, 1000000/HZ); - timeout += sec * (unsigned long) HZ; - } - } - - ret = -EINVAL; - if (n < 0) - goto out_nofds; - - if (n > current->files->max_fdset) - n = current->files->max_fdset; - - /* - * We need 6 bitmaps (in/out/ex for both incoming and outgoing), - * since we used fdset we need to allocate memory in units of - * long-words. - */ - ret = -ENOMEM; - size = FDS_BYTES(n); - bits = kmalloc(6 * size, GFP_KERNEL); - if (!bits) - goto out_nofds; - fds.in = (unsigned long *) bits; - fds.out = (unsigned long *) (bits + size); - fds.ex = (unsigned long *) (bits + 2*size); - fds.res_in = (unsigned long *) (bits + 3*size); - fds.res_out = (unsigned long *) (bits + 4*size); - fds.res_ex = (unsigned long *) (bits + 5*size); - - if ((ret = get_fd_set(n, inp, fds.in)) || - (ret = get_fd_set(n, outp, fds.out)) || - (ret = get_fd_set(n, exp, fds.ex))) - goto out; - zero_fd_set(n, fds.res_in); - zero_fd_set(n, fds.res_out); - zero_fd_set(n, fds.res_ex); - - ret = do_select(n, &fds, &timeout); - - if (tvp32 && !(current->personality & STICKY_TIMEOUTS)) { - time_t sec = 0, usec = 0; - if (timeout) { - sec = timeout / HZ; - usec = timeout % HZ; - usec *= (1000000/HZ); - } - if (put_user(sec, &tvp32->tv_sec) || put_user(usec, &tvp32->tv_usec)) { - ret = -EFAULT; - goto out; - } - } - - if (ret < 0) - goto out; - if (!ret) { - ret = -ERESTARTNOHAND; - if (signal_pending(current)) - goto out; - ret = 0; - } - - set_fd_set(n, inp, fds.res_in); - set_fd_set(n, outp, fds.res_out); - set_fd_set(n, exp, fds.res_ex); - -out: - kfree(bits); -out_nofds: - return ret; -} - struct sel_arg_struct { unsigned int n; unsigned int inp; @@ -937,87 +791,8 @@ sys32_old_select (struct sel_arg_struct if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; - return sys32_select(a.n, (fd_set *) A(a.inp), (fd_set *) A(a.outp), (fd_set *) A(a.exp), - (struct compat_timeval *) A(a.tvp)); -} - -static struct iovec * -get_compat_iovec (struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, int type) -{ - u32 i, buf, len; - struct iovec *ivp, *iov; - - /* Get the "struct iovec" from user memory */ - - if (!count) - return 0; - if (verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*count)) - return NULL; - if (count > UIO_MAXIOV) - return NULL; - if (count > UIO_FASTIOV) { - iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - return NULL; - } else - iov = iov_buf; - - ivp = iov; - for (i = 0; i < count; i++) { - if (__get_user(len, &iov32->iov_len) || __get_user(buf, &iov32->iov_base)) { - if (iov != iov_buf) - kfree(iov); - return NULL; - } - if (verify_area(type, (void *)A(buf), len)) { - if (iov != iov_buf) - kfree(iov); - return((struct iovec *)0); - } - ivp->iov_base = (void *)A(buf); - ivp->iov_len = (__kernel_size_t) len; - iov32++; - ivp++; - } - return iov; -} - -asmlinkage long -sys32_readv (int fd, struct compat_iovec *vector, u32 count) -{ - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov; - long ret; - mm_segment_t old_fs = get_fs(); - - iov = get_compat_iovec(vector, iovstack, count, VERIFY_WRITE); - if (!iov) - return -EFAULT; - set_fs(KERNEL_DS); - ret = sys_readv(fd, iov, count); - set_fs(old_fs); - if (iov != iovstack) - kfree(iov); - return ret; -} - -asmlinkage long -sys32_writev (int fd, struct compat_iovec *vector, u32 count) -{ - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov; - long ret; - mm_segment_t old_fs = get_fs(); - - iov = get_compat_iovec(vector, iovstack, count, VERIFY_READ); - if (!iov) - return -EFAULT; - set_fs(KERNEL_DS); - ret = sys_writev(fd, iov, count); - set_fs(old_fs); - if (iov != iovstack) - kfree(iov); - return ret; + return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), + compat_ptr(a.exp), compat_ptr(a.tvp)); } #define SEMOP 1 @@ -2425,176 +2200,6 @@ sys32_setresgid(compat_gid_t rgid, compa return sys_setresgid(srgid, segid, ssgid); } -/* Stuff for NFS server syscalls... */ -struct nfsctl_svc32 { - u16 svc32_port; - s32 svc32_nthreads; -}; - -struct nfsctl_client32 { - s8 cl32_ident[NFSCLNT_IDMAX+1]; - s32 cl32_naddr; - struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX]; - s32 cl32_fhkeytype; - s32 cl32_fhkeylen; - u8 cl32_fhkey[NFSCLNT_KEYMAX]; -}; - -struct nfsctl_export32 { - s8 ex32_client[NFSCLNT_IDMAX+1]; - s8 ex32_path[NFS_MAXPATHLEN+1]; - compat_dev_t ex32_dev; - compat_ino_t ex32_ino; - s32 ex32_flags; - compat_uid_t ex32_anon_uid; - compat_gid_t ex32_anon_gid; -}; - -struct nfsctl_arg32 { - s32 ca32_version; /* safeguard */ - union { - struct nfsctl_svc32 u32_svc; - struct nfsctl_client32 u32_client; - struct nfsctl_export32 u32_export; - u32 u32_debug; - } u; -#define ca32_svc u.u32_svc -#define ca32_client u.u32_client -#define ca32_export u.u32_export -#define ca32_debug u.u32_debug -}; - -union nfsctl_res32 { - struct knfs_fh cr32_getfh; - u32 cr32_debug; -}; - -static int -nfs_svc32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= __get_user(karg->ca_svc.svc_port, &arg32->ca32_svc.svc32_port); - err |= __get_user(karg->ca_svc.svc_nthreads, - &arg32->ca32_svc.svc32_nthreads); - return err; -} - -static int -nfs_clnt32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_client.cl_ident[0], - &arg32->ca32_client.cl32_ident[0], - NFSCLNT_IDMAX); - err |= __get_user(karg->ca_client.cl_naddr, - &arg32->ca32_client.cl32_naddr); - err |= copy_from_user(&karg->ca_client.cl_addrlist[0], - &arg32->ca32_client.cl32_addrlist[0], - (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); - err |= __get_user(karg->ca_client.cl_fhkeytype, - &arg32->ca32_client.cl32_fhkeytype); - err |= __get_user(karg->ca_client.cl_fhkeylen, - &arg32->ca32_client.cl32_fhkeylen); - err |= copy_from_user(&karg->ca_client.cl_fhkey[0], - &arg32->ca32_client.cl32_fhkey[0], - NFSCLNT_KEYMAX); - return err; -} - -static int -nfs_exp32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_export.ex_client[0], - &arg32->ca32_export.ex32_client[0], - NFSCLNT_IDMAX); - err |= copy_from_user(&karg->ca_export.ex_path[0], - &arg32->ca32_export.ex32_path[0], - NFS_MAXPATHLEN); - err |= __get_user(karg->ca_export.ex_dev, - &arg32->ca32_export.ex32_dev); - err |= __get_user(karg->ca_export.ex_ino, - &arg32->ca32_export.ex32_ino); - err |= __get_user(karg->ca_export.ex_flags, - &arg32->ca32_export.ex32_flags); - err |= __get_user(karg->ca_export.ex_anon_uid, - &arg32->ca32_export.ex32_anon_uid); - err |= __get_user(karg->ca_export.ex_anon_gid, - &arg32->ca32_export.ex32_anon_gid); - return err; -} - -static int -nfs_getfh32_res_trans(union nfsctl_res *kres, union nfsctl_res32 *res32) -{ - int err; - - err = copy_to_user(&res32->cr32_getfh, - &kres->cr_getfh, - sizeof(res32->cr32_getfh)); - err |= __put_user(kres->cr_debug, &res32->cr32_debug); - return err; -} - -int asmlinkage -sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) -{ - struct nfsctl_arg *karg = NULL; - union nfsctl_res *kres = NULL; - mm_segment_t oldfs; - int err; - - karg = kmalloc(sizeof(*karg), GFP_USER); - if(!karg) - return -ENOMEM; - if(res32) { - kres = kmalloc(sizeof(*kres), GFP_USER); - if(!kres) { - kfree(karg); - return -ENOMEM; - } - } - switch(cmd) { - case NFSCTL_SVC: - err = nfs_svc32_trans(karg, arg32); - break; - case NFSCTL_ADDCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_DELCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_EXPORT: - err = nfs_exp32_trans(karg, arg32); - break; - default: - err = -EINVAL; - break; - } - if(err) - goto done; - oldfs = get_fs(); - set_fs(KERNEL_DS); - err = sys_nfsservctl(cmd, karg, kres); - set_fs(oldfs); - - if(!err && cmd == NFSCTL_GETFS) - err = nfs_getfh32_res_trans(kres, res32); - -done: - if(karg) - kfree(karg); - if(kres) - kfree(kres); - return err; -} - /* Handle adjtimex compatibility. */ struct timex32 { --- linux-2.6.6-rc1/arch/ia64/Kconfig 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ia64/Kconfig 2004-04-18 22:25:47.933543472 -0700 @@ -361,16 +361,6 @@ config PCI information about which PCI hardware does work under Linux and which doesn't. -config PCI_USE_VECTOR - bool - default y if IA64 - help - This enables MSI, Message Signaled Interrupt, on specific - MSI capable device functions detected upon requests from the - device drivers. Message Signal Interrupt enables an MSI-capable - hardware device to send an inbound Memory Write on its PCI bus - instead of asserting IRQ signal on device IRQ pin. - config PCI_DOMAINS bool default PCI @@ -454,6 +444,19 @@ config MAGIC_SYSRQ keys are documented in . Don't say Y unless you really know what this hack does. +config SCHEDSTATS + bool "Collect scheduler statistics" + depends on PROC_FS + default y + help + If you say Y here, additional code will be inserted into the + scheduler and related routines to collect statistics about + scheduler behavior and provide them in /proc/schedstat. These + stats may be useful for both tuning and debugging the scheduler + If you aren't debugging the scheduler or trying to tune a specific + application, you can say N to avoid the very slight overhead + this adds. + config DEBUG_SLAB bool "Debug memory allocations" depends on DEBUG_KERNEL @@ -503,6 +506,13 @@ config DEBUG_INFO Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. +config LOCKMETER + bool "Kernel lock metering" + depends on SMP + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + config SYSVIPC_COMPAT bool depends on COMPAT && SYSVIPC --- linux-2.6.6-rc1/arch/ia64/kernel/entry.S 2004-02-03 20:42:34.000000000 -0800 +++ 25/arch/ia64/kernel/entry.S 2004-04-18 22:25:48.698427192 -0700 @@ -1501,9 +1501,9 @@ sys_call_table: data8 sys_clock_nanosleep data8 sys_fstatfs64 data8 sys_statfs64 - data8 sys_ni_syscall - data8 sys_ni_syscall // 1260 - data8 sys_ni_syscall + data8 sys_mbind + data8 sys_get_mempolicy // 1260 + data8 sys_set_mempolicy data8 sys_ni_syscall data8 sys_ni_syscall data8 sys_ni_syscall --- linux-2.6.6-rc1/arch/ia64/kernel/perfmon.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ia64/kernel/perfmon.c 2004-04-18 22:25:49.818256952 -0700 @@ -2308,6 +2308,7 @@ pfm_smpl_buffer_alloc(struct task_struct vma->vm_ops = NULL; vma->vm_pgoff = 0; vma->vm_file = NULL; + mpol_set_vma_default(vma); vma->vm_private_data = NULL; /* --- linux-2.6.6-rc1/arch/ia64/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ia64/kernel/process.c 2004-04-18 22:25:54.249583288 -0700 @@ -657,11 +657,6 @@ get_wchan (struct task_struct *p) struct unw_frame_info info; unsigned long ip; int count = 0; - /* - * These bracket the sleeping functions.. - */ -# define first_sched ((unsigned long) scheduling_functions_start_here) -# define last_sched ((unsigned long) scheduling_functions_end_here) /* * Note: p may not be a blocked task (it could be current or @@ -676,12 +671,10 @@ get_wchan (struct task_struct *p) if (unw_unwind(&info) < 0) return 0; unw_get_ip(&info, &ip); - if (ip < first_sched || ip >= last_sched) + if (!in_sched_functions(ip)) return ip; } while (count++ < 16); return 0; -# undef first_sched -# undef last_sched } void --- linux-2.6.6-rc1/arch/ia64/kernel/setup.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/ia64/kernel/setup.c 2004-04-18 22:25:35.888374616 -0700 @@ -88,10 +88,6 @@ unsigned char aux_device_present = 0xaa; unsigned long ia64_max_iommu_merge_mask = ~0UL; EXPORT_SYMBOL(ia64_max_iommu_merge_mask); -#define COMMAND_LINE_SIZE 512 - -char saved_command_line[COMMAND_LINE_SIZE]; /* used in proc filesystem */ - /* * We use a special marker for the end of memory and it uses the extra (+1) slot */ --- linux-2.6.6-rc1/arch/ia64/kernel/signal.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/ia64/kernel/signal.c 2004-04-18 22:25:45.450920888 -0700 @@ -1,7 +1,7 @@ /* * Architecture-specific signal handling support. * - * Copyright (C) 1999-2003 Hewlett-Packard Co + * Copyright (C) 1999-2004 Hewlett-Packard Co * David Mosberger-Tang * * Derived from i386 and Alpha versions. @@ -397,18 +397,47 @@ rbs_on_sig_stack (unsigned long bsp) } static long -setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, +force_sigsegv (int sig, void *addr) +{ + unsigned long flags; + struct siginfo si; + + if (sig == SIGSEGV) { + /* + * Acquiring siglock around the sa_handler-update is almost + * certainly overkill, but this isn't a + * performance-critical path and I'd rather play it safe + * here than having to debug a nasty race if and when + * something changes in kernel/signal.c that would make it + * no longer safe to modify sa_handler without holding the + * lock. + */ + spin_lock_irqsave(¤t->sighand->siglock, flags); + current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + si.si_signo = SIGSEGV; + si.si_errno = 0; + si.si_code = SI_KERNEL; + si.si_pid = current->pid; + si.si_uid = current->uid; + si.si_addr = addr; + force_sig_info(SIGSEGV, &si, current); + return 0; +} + +static long +setup_frame (int sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *set, struct sigscratch *scr) { extern char __kernel_sigtramp[]; unsigned long tramp_addr, new_rbs = 0; struct sigframe *frame; - struct siginfo si; long err; frame = (void *) scr->pt.r12; tramp_addr = (unsigned long) __kernel_sigtramp; - if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) { + if ((ka_copy->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) { frame = (void *) ((current->sas_ss_sp + current->sas_ss_size) & ~(STACK_ALIGN - 1)); /* @@ -422,14 +451,14 @@ setup_frame (int sig, struct k_sigaction frame = (void *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) - goto give_sigsegv; + return force_sigsegv(sig, frame); err = __put_user(sig, &frame->arg0); err |= __put_user(&frame->info, &frame->arg1); err |= __put_user(&frame->sc, &frame->arg2); err |= __put_user(new_rbs, &frame->sc.sc_rbs_base); err |= __put_user(0, &frame->sc.sc_loadrs); /* initialize to zero */ - err |= __put_user(ka->sa.sa_handler, &frame->handler); + err |= __put_user(ka_copy->sa.sa_handler, &frame->handler); err |= copy_siginfo_to_user(&frame->info, info); @@ -438,8 +467,8 @@ setup_frame (int sig, struct k_sigaction err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags); err |= setup_sigcontext(&frame->sc, set, scr); - if (err) - goto give_sigsegv; + if (unlikely(err)) + return force_sigsegv(sig, frame); scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */ scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */ @@ -466,40 +495,25 @@ setup_frame (int sig, struct k_sigaction current->comm, current->pid, sig, scr->pt.r12, frame->sc.sc_ip, frame->handler); #endif return 1; - - give_sigsegv: - if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; - si.si_signo = SIGSEGV; - si.si_errno = 0; - si.si_code = SI_KERNEL; - si.si_pid = current->pid; - si.si_uid = current->uid; - si.si_addr = frame; - force_sig_info(SIGSEGV, &si, current); - return 0; } static long -handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, +handle_signal (unsigned long sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *oldset, struct sigscratch *scr) { if (IS_IA32_PROCESS(&scr->pt)) { /* send signal to IA-32 process */ - if (!ia32_setup_frame1(sig, ka, info, oldset, &scr->pt)) + if (!ia32_setup_frame1(sig, ka_copy, info, oldset, &scr->pt)) return 0; } else /* send signal to IA-64 process */ - if (!setup_frame(sig, ka, info, oldset, scr)) + if (!setup_frame(sig, ka_copy, info, oldset, scr)) return 0; - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; - - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (!(ka_copy->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); { - sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); + sigorsets(¤t->blocked, ¤t->blocked, &ka_copy->sa.sa_mask); sigaddset(¤t->blocked, sig); recalc_sigpending(); } @@ -515,7 +529,7 @@ handle_signal (unsigned long sig, struct long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) { - struct k_sigaction *ka; + struct k_sigaction ka_copy; siginfo_t info; long restart = in_syscall; long errno = scr->pt.r8; @@ -537,7 +551,7 @@ ia64_do_signal (sigset_t *oldset, struct * need to push through a forced SIGSEGV. */ while (1) { - int signr = get_signal_to_deliver(&info, &scr->pt, NULL); + int signr = get_signal_to_deliver(&info, &ka_copy, &scr->pt, NULL); /* * get_signal_to_deliver() may have run a debugger (via notify_parent()) @@ -564,8 +578,6 @@ ia64_do_signal (sigset_t *oldset, struct if (signr <= 0) break; - ka = ¤t->sighand->action[signr - 1]; - if (unlikely(restart)) { switch (errno) { case ERESTART_RESTARTBLOCK: @@ -575,7 +587,7 @@ ia64_do_signal (sigset_t *oldset, struct break; case ERESTARTSYS: - if ((ka->sa.sa_flags & SA_RESTART) == 0) { + if ((ka_copy.sa.sa_flags & SA_RESTART) == 0) { scr->pt.r8 = ERR_CODE(EINTR); /* note: scr->pt.r10 is already -1 */ break; @@ -594,7 +606,7 @@ ia64_do_signal (sigset_t *oldset, struct * Whee! Actually deliver the signal. If the delivery failed, we need to * continue to iterate in this loop so we can deliver the SIGSEGV... */ - if (handle_signal(signr, ka, &info, oldset, scr)) + if (handle_signal(signr, &ka_copy, &info, oldset, scr)) return 1; } --- linux-2.6.6-rc1/arch/ia64/lib/dec_and_lock.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/ia64/lib/dec_and_lock.c 2004-04-18 22:25:47.933543472 -0700 @@ -13,6 +13,7 @@ #include #include +#ifndef CONFIG_LOCKMETER /* * Decrement REFCOUNT and if the count reaches zero, acquire the spinlock. Both of these * operations have to be done atomically, so that the count doesn't drop to zero without @@ -40,3 +41,4 @@ atomic_dec_and_lock (atomic_t *refcount, } EXPORT_SYMBOL(atomic_dec_and_lock); +#endif --- linux-2.6.6-rc1/arch/ia64/mm/hugetlbpage.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ia64/mm/hugetlbpage.c 2004-04-18 22:25:59.346808392 -0700 @@ -49,8 +49,12 @@ huge_pte_offset (struct mm_struct *mm, u pte_t *pte = NULL; pgd = pgd_offset(mm, taddr); - pmd = pmd_offset(pgd, taddr); - pte = pte_offset_map(pmd, taddr); + if (pgd_present(*pgd)) { + pmd = pmd_offset(pgd, taddr); + if (pmd_present(*pmd)) + pte = pte_offset_map(pmd, taddr); + } + return pte; } @@ -150,27 +154,21 @@ back1: return i; } -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - if (mm->used_hugetlb) { - if (REGION_NUMBER(addr) == REGION_HPAGE) { - struct vm_area_struct *vma = find_vma(mm, addr); - if (vma && is_vm_hugetlb_page(vma)) - return vma; - } - } - return NULL; -} - -struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, int write) +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write) { struct page *page; pte_t *ptep; + if (! mm->used_hugetlb) + return ERR_PTR(-EINVAL); + if (REGION_NUMBER(addr) != REGION_HPAGE) + return ERR_PTR(-EINVAL); + ptep = huge_pte_offset(mm, addr); + if (!ptep || pte_none(*ptep)) + return NULL; page = pte_page(*ptep); page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT); - get_page(page); return page; } int pmd_huge(pmd_t pmd) --- linux-2.6.6-rc1/arch/ia64/mm/init.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/ia64/mm/init.c 2004-04-18 22:25:49.819256800 -0700 @@ -131,6 +131,7 @@ ia64_init_addr_space (void) vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_private_data = NULL; + mpol_set_vma_default(vma); insert_vm_struct(current->mm, vma); } @@ -143,6 +144,7 @@ ia64_init_addr_space (void) vma->vm_end = PAGE_SIZE; vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; + mpol_set_vma_default(vma); insert_vm_struct(current->mm, vma); } } --- linux-2.6.6-rc1/arch/m68k/atari/stram.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/m68k/atari/stram.c 2004-04-18 22:25:49.820256648 -0700 @@ -752,7 +752,7 @@ static int unswap_by_read(unsigned short /* Get a page for the entry, using the existing swap cache page if there is one. Otherwise, get a clean page and read the swap into it. */ - page = read_swap_cache_async(entry); + page = read_swap_cache_async(entry, NULL, 0); if (!page) { swap_free(entry); return -ENOMEM; --- linux-2.6.6-rc1/arch/m68k/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/m68k/kernel/process.c 2004-04-18 22:25:59.765744704 -0700 @@ -67,8 +67,7 @@ unsigned long thread_saved_pc(struct tas { struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp; /* Check whether the thread is blocked in resume() */ - if (sw->retpc > (unsigned long)scheduling_functions_start_here && - sw->retpc < (unsigned long)scheduling_functions_end_here) + if (in_sched_functions(sw->retpc)) return ((unsigned long *)sw->a6)[1]; else return sw->retpc; @@ -382,12 +381,6 @@ out: return error; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { unsigned long fp, pc; @@ -399,11 +392,11 @@ unsigned long get_wchan(struct task_stru stack_page = (unsigned long)(p->thread_info); fp = ((struct switch_stack *)p->thread.ksp)->a6; do { - if (fp < stack_page+sizeof(struct task_struct) || + if (fp < stack_page+sizeof(struct thread_info) || fp >= 8184+stack_page) return 0; pc = ((unsigned long *)fp)[1]; - if (pc < first_sched || pc >= last_sched) + if (!in_sched_functions(pc)) return pc; fp = *(unsigned long *) fp; } while (count++ < 16); --- linux-2.6.6-rc1/arch/m68k/kernel/setup.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/m68k/kernel/setup.c 2004-04-18 22:25:35.889374464 -0700 @@ -62,7 +62,6 @@ struct mem_info m68k_memory[NUM_MEMINFO] static struct mem_info m68k_ramdisk; static char m68k_command_line[CL_SIZE]; -char saved_command_line[CL_SIZE]; char m68k_debug_device[6] = ""; --- linux-2.6.6-rc1/arch/m68knommu/Kconfig 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/m68knommu/Kconfig 2004-04-18 22:26:01.783437968 -0700 @@ -5,6 +5,10 @@ mainmenu "uClinux/68k (w/o MMU) Kernel Configuration" +config M68KNOMMU + bool + default y + config MMU bool default n --- linux-2.6.6-rc1/arch/m68knommu/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/m68knommu/kernel/process.c 2004-04-18 22:25:59.894725096 -0700 @@ -404,12 +404,6 @@ out: return error; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { unsigned long fp, pc; @@ -421,11 +415,11 @@ unsigned long get_wchan(struct task_stru stack_page = (unsigned long)p; fp = ((struct switch_stack *)p->thread.ksp)->a6; do { - if (fp < stack_page+sizeof(struct task_struct) || + if (fp < stack_page+sizeof(struct thread_info) || fp >= 8184+stack_page) return 0; pc = ((unsigned long *)fp)[1]; - if (pc < first_sched || pc >= last_sched) + if (!in_sched_functions(pc)) return pc; fp = *(unsigned long *) fp; } while (count++ < 16); @@ -440,8 +434,7 @@ unsigned long thread_saved_pc(struct tas struct switch_stack *sw = (struct switch_stack *)tsk->thread.ksp; /* Check whether the thread is blocked in resume() */ - if (sw->retpc > (unsigned long)scheduling_functions_start_here && - sw->retpc < (unsigned long)scheduling_functions_end_here) + if (in_sched_functions(sw->retpc)) return ((unsigned long *)sw->a6)[1]; else return sw->retpc; --- linux-2.6.6-rc1/arch/m68knommu/kernel/setup.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/m68knommu/kernel/setup.c 2004-04-18 22:25:35.889374464 -0700 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -44,8 +45,7 @@ unsigned long rom_length; unsigned long memory_start; unsigned long memory_end; -char command_line[512]; -char saved_command_line[512]; +char command_line[COMMAND_LINE_SIZE]; /* setup some dummy routines */ static void dummy_waitbut(void) --- linux-2.6.6-rc1/arch/m68k/q40/config.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/m68k/q40/config.c 2004-04-18 22:25:35.890374312 -0700 @@ -64,7 +64,6 @@ void q40_set_vectors (void); extern void q40_mksound(unsigned int /*freq*/, unsigned int /*ticks*/ ); -extern char *saved_command_line; extern char m68k_debug_device[]; static void q40_mem_console_write(struct console *co, const char *b, unsigned int count); --- linux-2.6.6-rc1/arch/mips/kernel/linux32.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/mips/kernel/linux32.c 2004-04-18 22:25:58.845884544 -0700 @@ -142,228 +142,6 @@ asmlinkage int sys_ftruncate64(unsigned } /* - * count32() counts the number of arguments/envelopes - */ -static int count32(u32 * argv, int max) -{ - int i = 0; - - if (argv != NULL) { - for (;;) { - u32 p; int error; - - error = get_user(p,argv); - if (error) - return error; - if (!p) - break; - argv++; - if (++i > max) - return -E2BIG; - } - } - return i; -} - - -/* - * 'copy_strings32()' copies argument/envelope strings from user - * memory to free pages in kernel mem. These are in a format ready - * to be put directly into the top of new user memory. - */ -int copy_strings32(int argc, u32 * argv, struct linux_binprm *bprm) -{ - struct page *kmapped_page = NULL; - char *kaddr = NULL; - int ret; - - while (argc-- > 0) { - u32 str; - int len; - unsigned long pos; - - if (get_user(str, argv+argc) || !str || - !(len = strnlen_user((char *)A(str), bprm->p))) { - ret = -EFAULT; - goto out; - } - - if (bprm->p < len) { - ret = -E2BIG; - goto out; - } - - bprm->p -= len; - /* XXX: add architecture specific overflow check here. */ - - pos = bprm->p; - while (len > 0) { - int i, new, err; - int offset, bytes_to_copy; - struct page *page; - - offset = pos % PAGE_SIZE; - i = pos/PAGE_SIZE; - page = bprm->page[i]; - new = 0; - if (!page) { - page = alloc_page(GFP_HIGHUSER); - bprm->page[i] = page; - if (!page) { - ret = -ENOMEM; - goto out; - } - new = 1; - } - - if (page != kmapped_page) { - if (kmapped_page) - kunmap(kmapped_page); - kmapped_page = page; - kaddr = kmap(kmapped_page); - } - if (new && offset) - memset(kaddr, 0, offset); - bytes_to_copy = PAGE_SIZE - offset; - if (bytes_to_copy > len) { - bytes_to_copy = len; - if (new) - memset(kaddr+offset+len, 0, - PAGE_SIZE-offset-len); - } - err = copy_from_user(kaddr + offset, (char *)A(str), - bytes_to_copy); - if (err) { - ret = -EFAULT; - goto out; - } - - pos += bytes_to_copy; - str += bytes_to_copy; - len -= bytes_to_copy; - } - } - ret = 0; -out: - if (kmapped_page) - kunmap(kmapped_page); - return ret; -} - -#ifdef CONFIG_MMU - -#define free_arg_pages(bprm) do { } while (0) - -#else - -static inline void free_arg_pages(struct linux_binprm *bprm) -{ - int i; - - for (i = 0; i < MAX_ARG_PAGES; i++) { - if (bprm->page[i]) - __free_page(bprm->page[i]); - bprm->page[i] = NULL; - } -} - -#endif /* CONFIG_MMU */ - -/* - * sys32_execve() executes a new program. - */ -static inline int -do_execve32(char * filename, u32 * argv, u32 * envp, struct pt_regs * regs) -{ - struct linux_binprm bprm; - struct file * file; - int retval; - - sched_balance_exec(); - - file = open_exec(filename); - - retval = PTR_ERR(file); - if (IS_ERR(file)) - return retval; - - bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - memset(bprm.page, 0, MAX_ARG_PAGES * sizeof(bprm.page[0])); - - bprm.file = file; - bprm.filename = filename; - bprm.interp = filename; - bprm.sh_bang = 0; - bprm.loader = 0; - bprm.exec = 0; - bprm.security = NULL; - bprm.mm = mm_alloc(); - retval = -ENOMEM; - if (!bprm.mm) - goto out_file; - - retval = init_new_context(current, bprm.mm); - if (retval < 0) - goto out_mm; - - bprm.argc = count32(argv, bprm.p / sizeof(u32)); - if ((retval = bprm.argc) < 0) - goto out_mm; - - bprm.envc = count32(envp, bprm.p / sizeof(u32)); - if ((retval = bprm.envc) < 0) - goto out_mm; - - retval = security_bprm_alloc(&bprm); - if (retval) - goto out; - - retval = prepare_binprm(&bprm); - if (retval < 0) - goto out; - - retval = copy_strings_kernel(1, &bprm.filename, &bprm); - if (retval < 0) - goto out; - - bprm.exec = bprm.p; - retval = copy_strings32(bprm.envc, envp, &bprm); - if (retval < 0) - goto out; - - retval = copy_strings32(bprm.argc, argv, &bprm); - if (retval < 0) - goto out; - - retval = search_binary_handler(&bprm, regs); - if (retval >= 0) { - free_arg_pages(&bprm); - - /* execve success */ - security_bprm_free(&bprm); - return retval; - } - -out: - /* Something went wrong, return the inode and free the argument pages*/ - free_arg_pages(&bprm); - - if (bprm.security) - security_bprm_free(&bprm); - -out_mm: - if (bprm.mm) - mmdrop(bprm.mm); - -out_file: - if (bprm.file) { - allow_write_access(bprm.file); - fput(bprm.file); - } - return retval; -} - -/* * sys_execve() executes a new program. */ asmlinkage int sys32_execve(nabi_no_regargs struct pt_regs regs) @@ -371,12 +149,12 @@ asmlinkage int sys32_execve(nabi_no_rega int error; char * filename; - filename = getname((char *) (long)regs.regs[4]); + filename = getname(compat_ptr(regs.regs[4])); error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve32(filename, (u32 *) (long)regs.regs[5], - (u32 *) (long)regs.regs[6], ®s); + error = compat_do_execve(filename, compat_ptr(regs.regs[5]), + compat_ptr(regs.regs[6]), ®s); putname(filename); out: @@ -671,150 +449,6 @@ asmlinkage int sys32_llseek(unsigned int return sys_llseek(fd, offset_high, offset_low, result, origin); } -typedef ssize_t (*IO_fn_t)(struct file *, char *, size_t, loff_t *); - -static long -do_readv_writev32(int type, struct file *file, const struct compat_iovec *vector, - u32 count) -{ - unsigned long tot_len; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack, *ivp; - struct inode *inode; - long retval, i; - IO_fn_t fn; - - /* First get the "struct iovec" from user memory and - * verify all the pointers - */ - if (!count) - return 0; - if(verify_area(VERIFY_READ, vector, sizeof(struct compat_iovec)*count)) - return -EFAULT; - if (count > UIO_MAXIOV) - return -EINVAL; - if (count > UIO_FASTIOV) { - iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - return -ENOMEM; - } - - tot_len = 0; - i = count; - ivp = iov; - while (i > 0) { - u32 len; - u32 buf; - - __get_user(len, &vector->iov_len); - __get_user(buf, &vector->iov_base); - tot_len += len; - ivp->iov_base = (void *)A(buf); - ivp->iov_len = (__kernel_size_t) len; - vector++; - ivp++; - i--; - } - - inode = file->f_dentry->d_inode; - /* VERIFY_WRITE actually means a read, as we write to user space */ - retval = locks_verify_area((type == VERIFY_WRITE - ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), - inode, file, file->f_pos, tot_len); - if (retval) { - if (iov != iovstack) - kfree(iov); - return retval; - } - - /* Then do the actual IO. Note that sockets need to be handled - * specially as they have atomicity guarantees and can handle - * iovec's natively - */ -#ifdef CONFIG_NET - if (inode->i_sock) { - int err; - err = sock_readv_writev(type, inode, file, iov, count, tot_len); - if (iov != iovstack) - kfree(iov); - return err; - } -#endif - - if (!file->f_op) { - if (iov != iovstack) - kfree(iov); - return -EINVAL; - } - /* VERIFY_WRITE actually means a read, as we write to user space */ - fn = file->f_op->read; - if (type == VERIFY_READ) - fn = (IO_fn_t) file->f_op->write; - ivp = iov; - while (count > 0) { - void * base; - int len, nr; - - base = ivp->iov_base; - len = ivp->iov_len; - ivp++; - count--; - nr = fn(file, base, len, &file->f_pos); - if (nr < 0) { - if (retval) - break; - retval = nr; - break; - } - retval += nr; - if (nr != len) - break; - } - if (iov != iovstack) - kfree(iov); - - return retval; -} - -asmlinkage long -sys32_readv(int fd, struct compat_iovec *vector, u32 count) -{ - struct file *file; - ssize_t ret; - - ret = -EBADF; - file = fget(fd); - if (!file) - goto bad_file; - if (file->f_op && (file->f_mode & FMODE_READ) && - (file->f_op->readv || file->f_op->read)) - ret = do_readv_writev32(VERIFY_WRITE, file, vector, count); - - fput(file); - -bad_file: - return ret; -} - -asmlinkage long -sys32_writev(int fd, struct compat_iovec *vector, u32 count) -{ - struct file *file; - ssize_t ret; - - ret = -EBADF; - file = fget(fd); - if(!file) - goto bad_file; - if (file->f_op && (file->f_mode & FMODE_WRITE) && - (file->f_op->writev || file->f_op->write)) - ret = do_readv_writev32(VERIFY_READ, file, vector, count); - fput(file); - -bad_file: - return ret; -} - /* From the Single Unix Spec: pread & pwrite act like lseek to pos + op + lseek back to original location. They fail just like lseek does on non-seekable files. */ @@ -885,167 +519,6 @@ out: bad_file: return ret; } -/* - * Ooo, nasty. We need here to frob 32-bit unsigned longs to - * 64-bit unsigned longs. - */ - -static inline int -get_fd_set32(unsigned long n, unsigned long *fdset, u32 *ufdset) -{ - if (ufdset) { - unsigned long odd; - - if (verify_area(VERIFY_WRITE, ufdset, n*sizeof(u32))) - return -EFAULT; - - odd = n & 1UL; - n &= ~1UL; - while (n) { - unsigned long h, l; - __get_user(l, ufdset); - __get_user(h, ufdset+1); - ufdset += 2; - *fdset++ = h << 32 | l; - n -= 2; - } - if (odd) - __get_user(*fdset, ufdset); - } else { - /* Tricky, must clear full unsigned long in the - * kernel fdset at the end, this makes sure that - * actually happens. - */ - memset(fdset, 0, ((n + 1) & ~1)*sizeof(u32)); - } - return 0; -} - -static inline void -set_fd_set32(unsigned long n, u32 *ufdset, unsigned long *fdset) -{ - unsigned long odd; - - if (!ufdset) - return; - - odd = n & 1UL; - n &= ~1UL; - while (n) { - unsigned long h, l; - l = *fdset++; - h = l >> 32; - __put_user(l, ufdset); - __put_user(h, ufdset+1); - ufdset += 2; - n -= 2; - } - if (odd) - __put_user(*fdset, ufdset); -} - -/* - * We can actually return ERESTARTSYS instead of EINTR, but I'd - * like to be certain this leads to no problems. So I return - * EINTR just for safety. - * - * Update: ERESTARTSYS breaks at least the xview clock binary, so - * I'm trying ERESTARTNOHAND which restart only when you want to. - */ -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) - -asmlinkage int sys32_select(int n, u32 *inp, u32 *outp, u32 *exp, struct compat_timeval *tvp) -{ - fd_set_bits fds; - char *bits; - unsigned long nn; - long timeout; - int ret, size; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (tvp) { - time_t sec, usec; - - if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp))) - || (ret = __get_user(sec, &tvp->tv_sec)) - || (ret = __get_user(usec, &tvp->tv_usec))) - goto out_nofds; - - ret = -EINVAL; - if(sec < 0 || usec < 0) - goto out_nofds; - - if ((unsigned long) sec < MAX_SELECT_SECONDS) { - timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); - timeout += sec * (unsigned long) HZ; - } - } - - ret = -EINVAL; - if (n < 0) - goto out_nofds; - if (n > current->files->max_fdset) - n = current->files->max_fdset; - - /* - * We need 6 bitmaps (in/out/ex for both incoming and outgoing), - * since we used fdset we need to allocate memory in units of - * long-words. - */ - ret = -ENOMEM; - size = FDS_BYTES(n); - bits = kmalloc(6 * size, GFP_KERNEL); - if (!bits) - goto out_nofds; - fds.in = (unsigned long *) bits; - fds.out = (unsigned long *) (bits + size); - fds.ex = (unsigned long *) (bits + 2*size); - fds.res_in = (unsigned long *) (bits + 3*size); - fds.res_out = (unsigned long *) (bits + 4*size); - fds.res_ex = (unsigned long *) (bits + 5*size); - - nn = (n + 8*sizeof(u32) - 1) / (8*sizeof(u32)); - if ((ret = get_fd_set32(nn, fds.in, inp)) || - (ret = get_fd_set32(nn, fds.out, outp)) || - (ret = get_fd_set32(nn, fds.ex, exp))) - goto out; - zero_fd_set(n, fds.res_in); - zero_fd_set(n, fds.res_out); - zero_fd_set(n, fds.res_ex); - - ret = do_select(n, &fds, &timeout); - - if (tvp && !(current->personality & STICKY_TIMEOUTS)) { - time_t sec = 0, usec = 0; - if (timeout) { - sec = timeout / HZ; - usec = timeout % HZ; - usec *= (1000000/HZ); - } - put_user(sec, &tvp->tv_sec); - put_user(usec, &tvp->tv_usec); - } - - if (ret < 0) - goto out; - if (!ret) { - ret = -ERESTARTNOHAND; - if (signal_pending(current)) - goto out; - ret = 0; - } - - set_fd_set32(nn, inp, fds.res_in); - set_fd_set32(nn, outp, fds.res_out); - set_fd_set32(nn, exp, fds.res_ex); - -out: - kfree(bits); -out_nofds: - return ret; -} - asmlinkage int sys32_sched_rr_get_interval(compat_pid_t pid, struct compat_timespec *interval) --- linux-2.6.6-rc1/arch/mips/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/mips/kernel/process.c 2004-04-18 22:25:54.252582832 -0700 @@ -280,12 +280,6 @@ unsigned long thread_saved_pc(struct tas return ((unsigned long *)t->reg29)[schedule_frame.pc_offset]; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - /* get_wchan - a maintenance nightmare^W^Wpain in the ass ... */ unsigned long get_wchan(struct task_struct *p) { @@ -297,7 +291,7 @@ unsigned long get_wchan(struct task_stru if (!mips_frame_info_initialized) return 0; pc = thread_saved_pc(p); - if (pc < first_sched || pc >= last_sched) + if (!in_sched_functions(pc)) goto out; if (pc >= (unsigned long) sleep_on_timeout) @@ -331,7 +325,7 @@ schedule_timeout_caller: */ pc = ((unsigned long *)frame)[schedule_timeout_frame.pc_offset]; - if (pc >= first_sched && pc < last_sched) { + if (in_sched_functions(pc)) { /* schedule_timeout called by [interruptible_]sleep_on_timeout */ frame = ((unsigned long *)frame)[schedule_timeout_frame.frame_offset]; pc = ((unsigned long *)frame)[sleep_on_timeout_frame.pc_offset]; --- linux-2.6.6-rc1/arch/mips/kernel/scall64-n32.S 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/mips/kernel/scall64-n32.S 2004-04-18 22:25:58.846884392 -0700 @@ -128,11 +128,11 @@ EXPORT(sysn32_call_table) PTR compat_sys_ioctl /* 6015 */ PTR sys_pread64 PTR sys_pwrite64 - PTR sys32_readv - PTR sys32_writev + PTR compat_sys_readv + PTR compat_sys_writev PTR sys_access /* 6020 */ PTR sys_pipe - PTR sys32_select + PTR compat_sys_select PTR sys_sched_yield PTR sys_mremap PTR sys_msync /* 6025 */ --- linux-2.6.6-rc1/arch/mips/kernel/scall64-o32.S 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/mips/kernel/scall64-o32.S 2004-04-18 22:25:58.846884392 -0700 @@ -397,11 +397,11 @@ out: jr ra sys sys_setfsgid 1 sys sys32_llseek 5 /* 4140 */ sys sys32_getdents 3 - sys sys32_select 5 + sys compat_sys_select 5 sys sys_flock 2 sys sys_msync 3 - sys sys32_readv 3 /* 4145 */ - sys sys32_writev 3 + sys compat_sys_readv 3 /* 4145 */ + sys compat_sys_writev 3 sys sys_cacheflush 3 sys sys_cachectl 3 sys sys_sysmips 4 --- linux-2.6.6-rc1/arch/mips/kernel/setup.c 2004-03-10 20:41:25.000000000 -0800 +++ 25/arch/mips/kernel/setup.c 2004-04-18 22:25:35.890374312 -0700 @@ -71,7 +71,6 @@ EXPORT_SYMBOL(mips_machgroup); struct boot_mem_map boot_mem_map; static char command_line[CL_SIZE]; - char saved_command_line[CL_SIZE]; char arcs_cmdline[CL_SIZE]=CONFIG_CMDLINE; /* --- linux-2.6.6-rc1/arch/mips/kernel/signal32.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/mips/kernel/signal32.c 2004-04-18 22:25:24.646083704 -0700 @@ -358,8 +358,8 @@ static int copy_siginfo_to_user32(siginf err |= __put_user(from->si_band, &to->si_band); err |= __put_user(from->si_fd, &to->si_fd); break; - case __SI_RT: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ: + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); --- linux-2.6.6-rc1/arch/mips/mm/cache.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/mips/mm/cache.c 2004-04-18 22:25:24.647083552 -0700 @@ -55,9 +55,10 @@ asmlinkage int sys_cacheflush(void *addr void flush_dcache_page(struct page *page) { + struct address_space *mapping = page_mapping(page); unsigned long addr; - if (page_mapping(page) && !mapping_mapped(page->mapping)) { + if (mapping && !mapping_mapped(mapping)) { SetPageDcacheDirty(page); return; } --- linux-2.6.6-rc1/arch/parisc/kernel/cache.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/parisc/kernel/cache.c 2004-04-18 22:25:24.647083552 -0700 @@ -229,16 +229,17 @@ void disable_sr_hashing(void) void __flush_dcache_page(struct page *page) { + struct address_space *mapping = page_mapping(page); struct mm_struct *mm = current->active_mm; struct list_head *l; flush_kernel_dcache_page(page_address(page)); - if (!page_mapping(page)) + if (!mapping) return; /* check shared list first if it's not empty...it's usually * the shortest */ - list_for_each(l, &page->mapping->i_mmap_shared) { + list_for_each(l, &mapping->i_mmap_shared) { struct vm_area_struct *mpnt; unsigned long off; @@ -267,7 +268,7 @@ void __flush_dcache_page(struct page *pa /* then check private mapping list for read only shared mappings * which are flagged by VM_MAYSHARE */ - list_for_each(l, &page->mapping->i_mmap) { + list_for_each(l, &mapping->i_mmap) { struct vm_area_struct *mpnt; unsigned long off; --- linux-2.6.6-rc1/arch/parisc/kernel/setup.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/arch/parisc/kernel/setup.c 2004-04-18 22:25:35.891374160 -0700 @@ -45,8 +45,6 @@ #include #include -#define COMMAND_LINE_SIZE 1024 -char saved_command_line[COMMAND_LINE_SIZE]; char command_line[COMMAND_LINE_SIZE]; /* Intended for ccio/sba/cpu statistics under /proc/bus/{runway|gsc} */ --- linux-2.6.6-rc1/arch/parisc/kernel/syscall_table.S 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/parisc/kernel/syscall_table.S 2004-04-18 22:25:59.125841984 -0700 @@ -232,12 +232,12 @@ ENTRY_DIFF(getdents) /* it is POSSIBLE that select will be OK because even though fd_set * contains longs, the macros and sizes are clever. */ - ENTRY_DIFF(select) + ENTRY_COMP(select) ENTRY_SAME(flock) ENTRY_SAME(msync) /* struct iovec contains pointers */ - ENTRY_DIFF(readv) /* 145 */ - ENTRY_DIFF(writev) + ENTRY_COMP(readv) /* 145 */ + ENTRY_COMP(writev) ENTRY_SAME(getsid) ENTRY_SAME(fdatasync) /* struct __sysctl_args is a mess */ @@ -266,7 +266,7 @@ ENTRY_SAME(ni_syscall) /* query_module */ ENTRY_SAME(poll) /* structs contain pointers and an in_addr... */ - ENTRY_DIFF(nfsservctl) + ENTRY_COMP(nfsservctl) ENTRY_SAME(setresgid) /* 170 */ ENTRY_SAME(getresgid) ENTRY_SAME(prctl) --- linux-2.6.6-rc1/arch/parisc/kernel/sys_parisc32.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/parisc/kernel/sys_parisc32.c 2004-04-18 22:25:59.126841832 -0700 @@ -65,189 +65,6 @@ #endif /* - * count32() counts the number of arguments/envelopes. It is basically - * a copy of count() from fs/exec.c, except that it works - * with 32 bit argv and envp pointers. - */ - -static int count32(u32 *argv, int max) -{ - int i = 0; - - if (argv != NULL) { - for (;;) { - u32 p; - int error; - - error = get_user(p,argv); - if (error) - return error; - if (!p) - break; - argv++; - if(++i > max) - return -E2BIG; - } - } - return i; -} - - -/* - * copy_strings32() is basically a copy of copy_strings() from fs/exec.c - * except that it works with 32 bit argv and envp pointers. - */ - - -static int copy_strings32(int argc, u32 *argv, struct linux_binprm *bprm) -{ - while (argc-- > 0) { - u32 str; - int len; - unsigned long pos; - - if (get_user(str, argv + argc) || - !str || - !(len = strnlen_user((char *)compat_ptr(str), bprm->p))) - return -EFAULT; - - if (bprm->p < len) - return -E2BIG; - - bprm->p -= len; - - pos = bprm->p; - while (len > 0) { - char *kaddr; - int i, new, err; - struct page *page; - int offset, bytes_to_copy; - - offset = pos % PAGE_SIZE; - i = pos/PAGE_SIZE; - page = bprm->page[i]; - new = 0; - if (!page) { - page = alloc_page(GFP_HIGHUSER); - bprm->page[i] = page; - if (!page) - return -ENOMEM; - new = 1; - } - kaddr = (char *)kmap(page); - - if (new && offset) - memset(kaddr, 0, offset); - bytes_to_copy = PAGE_SIZE - offset; - if (bytes_to_copy > len) { - bytes_to_copy = len; - if (new) - memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len); - } - err = copy_from_user(kaddr + offset, (char *)compat_ptr(str), bytes_to_copy); - flush_dcache_page(page); - kunmap(page); - - if (err) - return -EFAULT; - - pos += bytes_to_copy; - str += bytes_to_copy; - len -= bytes_to_copy; - } - } - return 0; -} - -/* - * do_execve32() is mostly a copy of do_execve(), with the exception - * that it processes 32 bit argv and envp pointers. - */ - -static inline int -do_execve32(char * filename, u32 * argv, u32 * envp, struct pt_regs * regs) -{ - struct linux_binprm bprm; - struct file *file; - int retval; - int i; - - file = open_exec(filename); - - retval = PTR_ERR(file); - if (IS_ERR(file)) - return retval; - - bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); - - DBG(("do_execve32(%s, %p, %p, %p)\n", filename, argv, envp, regs)); - - bprm.file = file; - bprm.filename = filename; - bprm.interp = filename; - bprm.sh_bang = 0; - bprm.loader = 0; - bprm.exec = 0; - - bprm.mm = mm_alloc(); - retval = -ENOMEM; - if (!bprm.mm) - goto out_file; - - retval = init_new_context(current, bprm.mm); - if (retval < 0) - goto out_mm; - - if ((bprm.argc = count32(argv, bprm.p / sizeof(u32))) < 0) - goto out_mm; - - if ((bprm.envc = count32(envp, bprm.p / sizeof(u32))) < 0) - goto out_mm; - - retval = prepare_binprm(&bprm); - if (retval < 0) - goto out; - - retval = copy_strings_kernel(1, &bprm.filename, &bprm); - if (retval < 0) - goto out; - - bprm.exec = bprm.p; - retval = copy_strings32(bprm.envc, envp, &bprm); - if (retval < 0) - goto out; - - retval = copy_strings32(bprm.argc, argv, &bprm); - if (retval < 0) - goto out; - - retval = search_binary_handler(&bprm,regs); - if (retval >= 0) - /* execve success */ - return retval; - -out: - /* Something went wrong, return the inode and free the argument pages*/ - for (i = 0; i < MAX_ARG_PAGES; i++) { - struct page *page = bprm.page[i]; - if (page) - __free_page(page); - } - -out_mm: - mmdrop(bprm.mm); - -out_file: - if (bprm.file) { - allow_write_access(bprm.file); - fput(bprm.file); - } - - return retval; -} - -/* * sys32_execve() executes a new program. */ @@ -261,8 +78,8 @@ asmlinkage int sys32_execve(struct pt_re error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve32(filename, (u32 *) regs->gr[25], - (u32 *) regs->gr[24], regs); + error = compat_do_execve(filename, compat_ptr(regs->gr[25]), + compat_ptr(regs->gr[24]), regs); if (error == 0) current->ptrace &= ~PT_DTRACE; putname(filename); @@ -609,149 +426,6 @@ out: return error; } -/* readv/writev stolen from mips64 */ -typedef ssize_t (*IO_fn_t)(struct file *, char *, size_t, loff_t *); - -static long -do_readv_writev32(int type, struct file *file, const struct compat_iovec *vector, - u32 count) -{ - unsigned long tot_len; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack, *ivp; - struct inode *inode; - long retval, i; - IO_fn_t fn; - - /* First get the "struct iovec" from user memory and - * verify all the pointers - */ - if (!count) - return 0; - if(verify_area(VERIFY_READ, vector, sizeof(struct compat_iovec)*count)) - return -EFAULT; - if (count > UIO_MAXIOV) - return -EINVAL; - if (count > UIO_FASTIOV) { - iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - return -ENOMEM; - } - - tot_len = 0; - i = count; - ivp = iov; - while (i > 0) { - u32 len; - u32 buf; - - __get_user(len, &vector->iov_len); - __get_user(buf, &vector->iov_base); - tot_len += len; - ivp->iov_base = compat_ptr(buf); - ivp->iov_len = (compat_size_t) len; - vector++; - ivp++; - i--; - } - - inode = file->f_dentry->d_inode; - /* VERIFY_WRITE actually means a read, as we write to user space */ - retval = locks_verify_area((type == VERIFY_WRITE - ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), - inode, file, file->f_pos, tot_len); - if (retval) { - if (iov != iovstack) - kfree(iov); - return retval; - } - - /* Then do the actual IO. Note that sockets need to be handled - * specially as they have atomicity guarantees and can handle - * iovec's natively - */ - if (inode->i_sock) { - int err; - err = sock_readv_writev(type, inode, file, iov, count, tot_len); - if (iov != iovstack) - kfree(iov); - return err; - } - - if (!file->f_op) { - if (iov != iovstack) - kfree(iov); - return -EINVAL; - } - /* VERIFY_WRITE actually means a read, as we write to user space */ - fn = file->f_op->read; - if (type == VERIFY_READ) - fn = (IO_fn_t) file->f_op->write; - ivp = iov; - while (count > 0) { - void * base; - int len, nr; - - base = ivp->iov_base; - len = ivp->iov_len; - ivp++; - count--; - nr = fn(file, base, len, &file->f_pos); - if (nr < 0) { - if (retval) - break; - retval = nr; - break; - } - retval += nr; - if (nr != len) - break; - } - if (iov != iovstack) - kfree(iov); - - return retval; -} - -asmlinkage long -sys32_readv(int fd, struct compat_iovec *vector, u32 count) -{ - struct file *file; - ssize_t ret; - - ret = -EBADF; - file = fget(fd); - if (!file) - goto bad_file; - if (file->f_op && (file->f_mode & FMODE_READ) && - (file->f_op->readv || file->f_op->read)) - ret = do_readv_writev32(VERIFY_WRITE, file, vector, count); - - fput(file); - -bad_file: - return ret; -} - -asmlinkage long -sys32_writev(int fd, struct compat_iovec *vector, u32 count) -{ - struct file *file; - ssize_t ret; - - ret = -EBADF; - file = fget(fd); - if(!file) - goto bad_file; - if (file->f_op && (file->f_mode & FMODE_WRITE) && - (file->f_op->writev || file->f_op->write)) - ret = do_readv_writev32(VERIFY_READ, file, vector, count); - fput(file); - -bad_file: - return ret; -} - /*** copied from mips64 ***/ /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to @@ -814,126 +488,6 @@ set_fd_set32(unsigned long n, u32 *ufdse __put_user(*fdset, ufdset); } -/*** This is a virtual copy of sys_select from fs/select.c and probably - *** should be compared to it from time to time - ***/ -static inline void *select_bits_alloc(int size) -{ - return kmalloc(6 * size, GFP_KERNEL); -} - -static inline void select_bits_free(void *bits, int size) -{ - kfree(bits); -} - -/* - * We can actually return ERESTARTSYS instead of EINTR, but I'd - * like to be certain this leads to no problems. So I return - * EINTR just for safety. - * - * Update: ERESTARTSYS breaks at least the xview clock binary, so - * I'm trying ERESTARTNOHAND which restart only when you want to. - */ -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) -#define DIVIDE_ROUND_UP(x,y) (((x)+(y)-1)/(y)) - -asmlinkage long -sys32_select(int n, u32 *inp, u32 *outp, u32 *exp, struct compat_timeval *tvp) -{ - fd_set_bits fds; - char *bits; - long timeout; - int ret, size, err; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (tvp) { - struct compat_timeval tv32; - time_t sec, usec; - - if ((ret = copy_from_user(&tv32, tvp, sizeof tv32))) - goto out_nofds; - - sec = tv32.tv_sec; - usec = tv32.tv_usec; - - ret = -EINVAL; - if (sec < 0 || usec < 0) - goto out_nofds; - - if ((unsigned long) sec < MAX_SELECT_SECONDS) { - timeout = DIVIDE_ROUND_UP(usec, 1000000/HZ); - timeout += sec * (unsigned long) HZ; - } - } - - ret = -EINVAL; - if (n < 0) - goto out_nofds; - - if (n > current->files->max_fdset) - n = current->files->max_fdset; - - /* - * We need 6 bitmaps (in/out/ex for both incoming and outgoing), - * since we used fdset we need to allocate memory in units of - * long-words. - */ - ret = -ENOMEM; - size = FDS_BYTES(n); - bits = select_bits_alloc(size); - if (!bits) - goto out_nofds; - fds.in = (unsigned long *) bits; - fds.out = (unsigned long *) (bits + size); - fds.ex = (unsigned long *) (bits + 2*size); - fds.res_in = (unsigned long *) (bits + 3*size); - fds.res_out = (unsigned long *) (bits + 4*size); - fds.res_ex = (unsigned long *) (bits + 5*size); - - if ((ret = get_fd_set32(n, inp, fds.in)) || - (ret = get_fd_set32(n, outp, fds.out)) || - (ret = get_fd_set32(n, exp, fds.ex))) - goto out; - zero_fd_set(n, fds.res_in); - zero_fd_set(n, fds.res_out); - zero_fd_set(n, fds.res_ex); - - ret = do_select(n, &fds, &timeout); - - if (tvp && !(current->personality & STICKY_TIMEOUTS)) { - time_t sec = 0, usec = 0; - if (timeout) { - sec = timeout / HZ; - usec = timeout % HZ; - usec *= (1000000/HZ); - } - err = put_user(sec, &tvp->tv_sec); - err |= __put_user(usec, &tvp->tv_usec); - if (err) - ret = -EFAULT; - } - - if (ret < 0) - goto out; - if (!ret) { - ret = -ERESTARTNOHAND; - if (signal_pending(current)) - goto out; - ret = 0; - } - - set_fd_set32(n, inp, fds.res_in); - set_fd_set32(n, outp, fds.res_out); - set_fd_set32(n, exp, fds.res_ex); - -out: - select_bits_free(bits, size); -out_nofds: - return ret; -} - struct msgbuf32 { int mtype; char mtext[1]; @@ -991,7 +545,6 @@ asmlinkage long sys32_msgrcv(int msqid, return err; } - asmlinkage int sys32_sendfile(int out_fd, int in_fd, compat_off_t *offset, s32 count) { mm_segment_t old_fs = get_fs(); @@ -1011,94 +564,6 @@ asmlinkage int sys32_sendfile(int out_fd return ret; } -/* EXPORT/UNEXPORT */ -struct nfsctl_export32 { - char ex_client[NFSCLNT_IDMAX+1]; - char ex_path[NFS_MAXPATHLEN+1]; - __kernel_old_dev_t ex_dev; - compat_ino_t ex_ino; - int ex_flags; - __kernel_uid_t ex_anon_uid; - __kernel_gid_t ex_anon_gid; -}; - -struct nfsctl_arg32 { - int ca_version; /* safeguard */ - /* wide kernel places this union on 8-byte boundary, narrow on 4 */ - union { - struct nfsctl_svc u_svc; - struct nfsctl_client u_client; - struct nfsctl_export32 u_export; - struct nfsctl_fdparm u_getfd; - struct nfsctl_fsparm u_getfs; - } u; -}; - -asmlinkage int sys32_nfsservctl(int cmd, void *argp, void *resp) -{ - int ret, tmp; - struct nfsctl_arg32 n32; - struct nfsctl_arg n; - - ret = copy_from_user(&n, argp, sizeof n.ca_version); - if (ret != 0) - return ret; - - /* adjust argp to point at the union inside the user's n32 struct */ - tmp = (unsigned long)&n32.u - (unsigned long)&n32; - argp = (void *)((unsigned long)argp + tmp); - switch(cmd) { - case NFSCTL_SVC: - ret = copy_from_user(&n.u, argp, sizeof n.u.u_svc); - break; - - case NFSCTL_ADDCLIENT: - case NFSCTL_DELCLIENT: - ret = copy_from_user(&n.u, argp, sizeof n.u.u_client); - break; - - case NFSCTL_GETFD: - ret = copy_from_user(&n.u, argp, sizeof n.u.u_getfd); - break; - - case NFSCTL_GETFS: - ret = copy_from_user(&n.u, argp, sizeof n.u.u_getfs); - break; - - case NFSCTL_UNEXPORT: /* nfsctl_export */ - case NFSCTL_EXPORT: /* nfsctl_export */ - ret = copy_from_user(&n32.u, argp, sizeof n32.u.u_export); -#undef CP -#define CP(x) n.u.u_export.ex_##x = n32.u.u_export.ex_##x - memcpy(n.u.u_export.ex_client, n32.u.u_export.ex_client, sizeof n32.u.u_export.ex_client); - memcpy(n.u.u_export.ex_path, n32.u.u_export.ex_path, sizeof n32.u.u_export.ex_path); - CP(dev); - CP(ino); - CP(flags); - CP(anon_uid); - CP(anon_gid); - break; - - default: - /* lockd probes for some other values (0x10000); - * so don't BUG() */ - ret = -EINVAL; - break; - } - - if (ret == 0) { - unsigned char rbuf[NFS_FHSIZE + sizeof (struct knfsd_fh)]; - KERNEL_SYSCALL(ret, sys_nfsservctl, cmd, &n, &rbuf); - if (cmd == NFSCTL_GETFD) { - ret = copy_to_user(resp, rbuf, NFS_FHSIZE); - } else if (cmd == NFSCTL_GETFS) { - ret = copy_to_user(resp, rbuf, sizeof (struct knfsd_fh)); - } - } - - return ret; -} - typedef long __kernel_loff_t32; /* move this to asm/posix_types.h? */ asmlinkage int sys32_sendfile64(int out_fd, int in_fd, __kernel_loff_t32 *offset, s32 count) --- linux-2.6.6-rc1/arch/ppc64/configs/g5_defconfig 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/ppc64/configs/g5_defconfig 2004-04-18 22:25:24.649083248 -0700 @@ -23,8 +23,10 @@ CONFIG_STANDALONE=y # CONFIG_SWAP=y CONFIG_SYSVIPC=y +# CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set CONFIG_LOG_BUF_SHIFT=17 CONFIG_HOTPLUG=y # CONFIG_IKCONFIG is not set @@ -35,6 +37,7 @@ CONFIG_EPOLL=y CONFIG_IOSCHED_NOOP=y CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set # @@ -58,11 +61,11 @@ CONFIG_PPC64=y CONFIG_PPC_OF=y CONFIG_ALTIVEC=y CONFIG_PPC_PMAC=y -# CONFIG_PMAC_DART is not set +CONFIG_PMAC_DART=y CONFIG_PPC_PMAC64=y CONFIG_BOOTX_TEXT=y CONFIG_POWER4_ONLY=y -# CONFIG_IOMMU_VMERGE is not set +CONFIG_IOMMU_VMERGE=y CONFIG_SMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_NR_CPUS=2 @@ -80,6 +83,7 @@ CONFIG_BINFMT_ELF=y # CONFIG_BINFMT_MISC is not set CONFIG_PCI_LEGACY_PROC=y CONFIG_PCI_NAMES=y +# CONFIG_HOTPLUG_CPU is not set # # PCMCIA/CardBus support @@ -224,7 +228,7 @@ CONFIG_SCSI_CONSTANTS=y # # SCSI Transport Attributes # -# CONFIG_SCSI_SPI_ATTRS is not set +CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_FC_ATTRS is not set # @@ -242,6 +246,8 @@ CONFIG_SCSI_SATA=y CONFIG_SCSI_SATA_SVW=y # CONFIG_SCSI_ATA_PIIX is not set # CONFIG_SCSI_SATA_PROMISE is not set +# CONFIG_SCSI_SATA_SIL is not set +# CONFIG_SCSI_SATA_SIS is not set # CONFIG_SCSI_SATA_VIA is not set # CONFIG_SCSI_SATA_VITESSE is not set # CONFIG_SCSI_BUSLOGIC is not set @@ -359,7 +365,6 @@ CONFIG_NET_IPIP=y # CONFIG_NET_IPGRE is not set # CONFIG_IP_MROUTE is not set # CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m @@ -431,7 +436,6 @@ CONFIG_XFRM=y # # SCTP Configuration (EXPERIMENTAL) # -CONFIG_IPV6_SCTP__=y # CONFIG_IP_SCTP is not set # CONFIG_ATM is not set # CONFIG_VLAN_8021Q is not set @@ -498,7 +502,6 @@ CONFIG_E1000=y # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set # CONFIG_R8169 is not set -# CONFIG_SIS190 is not set # CONFIG_SK98LIN is not set CONFIG_TIGON3=m @@ -506,6 +509,7 @@ CONFIG_TIGON3=m # Ethernet (10000 Mbit) # # CONFIG_IXGB is not set +# CONFIG_S2IO is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set # CONFIG_IBMVETH is not set @@ -675,6 +679,7 @@ CONFIG_I2C_ALGOBIT=y # I2C Hardware Bus support # # CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set # CONFIG_I2C_ALI15X3 is not set # CONFIG_I2C_AMD756 is not set # CONFIG_I2C_AMD8111 is not set @@ -719,6 +724,8 @@ CONFIG_I2C_KEYWEST=y # Other I2C Chip support # # CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCF8591 is not set # CONFIG_I2C_DEBUG_CORE is not set # CONFIG_I2C_DEBUG_ALGO is not set # CONFIG_I2C_DEBUG_BUS is not set @@ -811,6 +818,7 @@ CONFIG_USB_DEVICEFS=y # CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_SPLIT_ISO is not set +# CONFIG_USB_EHCI_ROOT_HUB_TT is not set CONFIG_USB_OHCI_HCD=y # CONFIG_USB_UHCI_HCD is not set @@ -951,6 +959,7 @@ CONFIG_USB_EZUSB=y # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set # CONFIG_USB_LED is not set +# CONFIG_USB_CYTHERM is not set # CONFIG_USB_TEST is not set # @@ -1003,6 +1012,7 @@ CONFIG_VFAT_FS=y # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y # CONFIG_DEVFS_FS is not set CONFIG_DEVPTS_FS_XATTR=y # CONFIG_DEVPTS_FS_SECURITY is not set @@ -1157,9 +1167,9 @@ CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m -# CONFIG_CRYPTO_ARC4 is not set +CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_DEFLATE=m -# CONFIG_CRYPTO_MICHAEL_MIC is not set +CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_TEST=m # --- linux-2.6.6-rc1/arch/ppc64/Kconfig 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/Kconfig 2004-04-18 22:25:42.161420968 -0700 @@ -173,6 +173,15 @@ config NUMA bool "NUMA support" depends on DISCONTIGMEM +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with POWER5 cpus at a cost of slightly increased + overhead in some places. If unsure say N here. + config PREEMPT bool "Preemptible Kernel" depends on BROKEN @@ -394,7 +403,20 @@ config DEBUG_INFO debugging info resulting in a larger kernel image. Say Y here only if you plan to use gdb to debug the kernel. If you don't debug the kernel, you can say N. - + +config SCHEDSTATS + bool "Collect scheduler statistics" + depends on PROC_FS + default y + help + If you say Y here, additional code will be inserted into the + scheduler and related routines to collect statistics about + scheduler behavior and provide them in /proc/schedstat. These + stats may be useful for both tuning and debugging the scheduler + If you aren't debugging the scheduler or trying to tune a specific + application, you can say N to avoid the very slight overhead + this adds. + endmenu source "security/Kconfig" --- linux-2.6.6-rc1/arch/ppc64/kernel/entry.S 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/entry.S 2004-04-18 22:25:24.649083248 -0700 @@ -487,7 +487,7 @@ _GLOBAL(enter_rtas) mflr r0 std r0,16(r1) stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */ - + /* Because RTAS is running in 32b mode, it clobbers the high order half * of all registers that it saves. We therefore save those registers * RTAS might touch to the stack. (r0, r3-r13 are caller saved) @@ -512,12 +512,25 @@ _GLOBAL(enter_rtas) mfsrr1 r10 std r10,_SRR1(r1) + /* There is no way it is acceptable to get here with interrupts enabled, + * check it with the asm equivalent of WARN_ON + */ + mfmsr r6 + andi. r0,r6,MSR_EE +1: tdnei r0,0 +.section __bug_table,"a" + .llong 1b,__LINE__ + 0x1000000, 1f, 2f +.previous +.section .rodata,"a" +1: .asciz __FILE__ +2: .asciz "enter_rtas" +.previous + /* Unfortunately, the stack pointer and the MSR are also clobbered, * so they are saved in the PACA which allows us to restore * our original state after RTAS returns. */ std r1,PACAR1(r13) - mfmsr r6 std r6,PACASAVEDMSR(r13) /* Setup our real return addr */ --- linux-2.6.6-rc1/arch/ppc64/kernel/head.S 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/head.S 2004-04-18 22:25:35.892374008 -0700 @@ -35,6 +35,7 @@ #include #include #include +#include #ifdef CONFIG_PPC_ISERIES #define DO_SOFT_DISABLE @@ -93,8 +94,13 @@ _stext: #ifdef CONFIG_PPC_PSERIES _STATIC(__start) + /* NOP this out unconditionally */ +BEGIN_FTR_SECTION b .__start_initialization_pSeries +END_FTR_SECTION(0, 1) #endif + /* Catch branch to 0 in real mode */ + trap #ifdef CONFIG_PPC_ISERIES /* * At offset 0x20, there is a pointer to iSeries LPAR data. @@ -2280,4 +2286,4 @@ stab_array: */ .globl cmd_line cmd_line: - .space 512 /* COMMAND_LINE_SIZE */ + .space COMMAND_LINE_SIZE --- linux-2.6.6-rc1/arch/ppc64/kernel/misc.S 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/misc.S 2004-04-18 22:25:59.127841680 -0700 @@ -717,8 +717,8 @@ _GLOBAL(sys_call_table32) .llong .ppc32_select .llong .sys_flock .llong .sys_msync - .llong .sys32_readv /* 145 */ - .llong .sys32_writev + .llong .compat_sys_readv /* 145 */ + .llong .compat_sys_writev .llong .sys32_getsid .llong .sys_fdatasync .llong .sys32_sysctl @@ -740,7 +740,7 @@ _GLOBAL(sys_call_table32) .llong .sys_getresuid /* 165 */ .llong .sys_ni_syscall /* old query_module syscall */ .llong .sys_poll - .llong .sys32_nfsservctl + .llong .compat_sys_nfsservctl .llong .sys_setresgid .llong .sys_getresgid /* 170 */ .llong .sys32_prctl --- linux-2.6.6-rc1/arch/ppc64/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/process.c 2004-04-18 22:25:54.252582832 -0700 @@ -472,12 +472,6 @@ static inline int validate_sp(unsigned l return 1; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched (*(unsigned long *)scheduling_functions_start_here) -#define last_sched (*(unsigned long *)scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { unsigned long ip, sp; @@ -496,7 +490,7 @@ unsigned long get_wchan(struct task_stru return 0; if (count > 0) { ip = *(unsigned long *)(sp + 16); - if (ip < first_sched || ip >= last_sched) + if (!in_sched_functions(ip)) return ip; } } while (count++ < 16); --- linux-2.6.6-rc1/arch/ppc64/kernel/prom.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/prom.c 2004-04-18 22:25:24.653082640 -0700 @@ -1926,6 +1926,11 @@ finish_node(struct device_node *np, unsi np->name = get_property(np, "name", 0); np->type = get_property(np, "device_type", 0); + if (!np->name) + np->name = ""; + if (!np->type) + np->type = ""; + /* get the device addresses and interrupts */ if (ifunc != NULL) mem_start = ifunc(np, mem_start, naddrc, nsizec); --- linux-2.6.6-rc1/arch/ppc64/kernel/rtas.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/rtas.c 2004-04-18 22:25:27.832599280 -0700 @@ -68,15 +68,20 @@ char rtas_data_buf[RTAS_DATA_BUF_SIZE]__ void call_rtas_display_status(char c) { - struct rtas_args *rtas = &(get_paca()->xRtas); + struct rtas_args *args = &(get_paca()->xRtas); + unsigned long s; + + spin_lock_irqsave(&rtas.lock, s); - rtas->token = 10; - rtas->nargs = 1; - rtas->nret = 1; - rtas->rets = (rtas_arg_t *)&(rtas->args[1]); - rtas->args[0] = (int)c; + args->token = 10; + args->nargs = 1; + args->nret = 1; + args->rets = (rtas_arg_t *)&(args->args[1]); + args->args[0] = (int)c; - enter_rtas((void *)__pa((unsigned long)rtas)); + enter_rtas((void *)__pa((unsigned long)args)); + + spin_unlock_irqrestore(&rtas.lock, s); } int @@ -91,8 +96,9 @@ rtas_token(const char *service) return tokp ? *tokp : RTAS_UNKNOWN_SERVICE; } -void -log_rtas_error(struct rtas_args *rtas_args) + +static int +__log_rtas_error(struct rtas_args *rtas_args) { struct rtas_args err_args, temp_args; @@ -111,13 +117,24 @@ log_rtas_error(struct rtas_args *rtas_ar PPCDBG(PPCDBG_RTAS, "\tentering rtas with 0x%lx\n", (void *)__pa((unsigned long)&err_args)); enter_rtas((void *)__pa((unsigned long)&get_paca()->xRtas)); - PPCDBG(PPCDBG_RTAS, "\treturned from rtas ...\n"); - + PPCDBG(PPCDBG_RTAS, "\treturned from rtas ...\n"); err_args = get_paca()->xRtas; get_paca()->xRtas = temp_args; - if (err_args.rets[0] == 0) + return err_args.rets[0]; +} + +void +log_rtas_error(struct rtas_args *rtas_args) +{ + unsigned long s; + int rc; + + spin_lock_irqsave(&rtas.lock, s); + rc = __log_rtas_error(rtas_args); + spin_unlock_irqrestore(&rtas.lock, s); + if (rc == 0) log_error(rtas_err_buf, ERR_TYPE_RTAS_LOG, 0); } @@ -126,9 +143,10 @@ rtas_call(int token, int nargs, int nret unsigned long *outputs, ...) { va_list list; - int i; + int i, logit = 0; unsigned long s; struct rtas_args *rtas_args = &(get_paca()->xRtas); + long ret; PPCDBG(PPCDBG_RTAS, "Entering rtas_call\n"); PPCDBG(PPCDBG_RTAS, "\ttoken = 0x%x\n", token); @@ -138,6 +156,9 @@ rtas_call(int token, int nargs, int nret if (token == RTAS_UNKNOWN_SERVICE) return -1; + /* Gotta do something different here, use global lock for now... */ + spin_lock_irqsave(&rtas.lock, s); + rtas_args->token = token; rtas_args->nargs = nargs; rtas_args->nret = nret; @@ -150,26 +171,16 @@ rtas_call(int token, int nargs, int nret va_end(list); for (i = 0; i < nret; ++i) - rtas_args->rets[i] = 0; + rtas_args->rets[i] = 0; -#if 0 /* Gotta do something different here, use global lock for now... */ - spin_lock_irqsave(&rtas_args->lock, s); -#else - spin_lock_irqsave(&rtas.lock, s); -#endif PPCDBG(PPCDBG_RTAS, "\tentering rtas with 0x%lx\n", (void *)__pa((unsigned long)rtas_args)); enter_rtas((void *)__pa((unsigned long)rtas_args)); PPCDBG(PPCDBG_RTAS, "\treturned from rtas ...\n"); if (rtas_args->rets[0] == -1) - log_rtas_error(rtas_args); + logit = (__log_rtas_error(rtas_args) == 0); -#if 0 /* Gotta do something different here, use global lock for now... */ - spin_unlock_irqrestore(&rtas_args->lock, s); -#else - spin_unlock_irqrestore(&rtas.lock, s); -#endif ifppcdebug(PPCDBG_RTAS) { for(i=0; i < nret ;i++) udbg_printf("\tnret[%d] = 0x%lx\n", i, (ulong)rtas_args->rets[i]); @@ -178,7 +189,15 @@ rtas_call(int token, int nargs, int nret if (nret > 1 && outputs != NULL) for (i = 0; i < nret-1; ++i) outputs[i] = rtas_args->rets[i+1]; - return (ulong)((nret > 0) ? rtas_args->rets[0] : 0); + ret = (ulong)((nret > 0) ? rtas_args->rets[0] : 0); + + /* Gotta do something different here, use global lock for now... */ + spin_unlock_irqrestore(&rtas.lock, s); + + if (logit) + log_error(rtas_err_buf, ERR_TYPE_RTAS_LOG, 0); + + return ret; } /* Given an RTAS status code of 990n compute the hinted delay of 10^n @@ -464,12 +483,12 @@ asmlinkage int ppc_rtas(struct rtas_args enter_rtas((void *)__pa((unsigned long)&get_paca()->xRtas)); args = get_paca()->xRtas; + spin_unlock_irqrestore(&rtas.lock, flags); + args.rets = (rtas_arg_t *)&(args.args[nargs]); if (args.rets[0] == -1) log_rtas_error(&args); - spin_unlock_irqrestore(&rtas.lock, flags); - /* Copy out args. */ if (copy_to_user(uargs->args + nargs, args.args + nargs, @@ -486,6 +505,8 @@ void rtas_stop_self(void) { struct rtas_args *rtas_args = &(get_paca()->xRtas); + local_irq_disable(); + rtas_args->token = rtas_token("stop-self"); BUG_ON(rtas_args->token == RTAS_UNKNOWN_SERVICE); rtas_args->nargs = 0; @@ -495,6 +516,7 @@ void rtas_stop_self(void) printk("%u %u Ready to die...\n", smp_processor_id(), hard_smp_processor_id()); enter_rtas((void *)__pa(rtas_args)); + panic("Alas, I survived.\n"); } #endif /* CONFIG_HOTPLUG_CPU */ --- linux-2.6.6-rc1/arch/ppc64/kernel/setup.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/setup.c 2004-04-18 22:25:35.893373856 -0700 @@ -82,7 +82,6 @@ unsigned long decr_overclock_proc0_set = int powersave_nap; -char saved_command_line[COMMAND_LINE_SIZE]; unsigned char aux_device_present; void parse_cmd_line(unsigned long r3, unsigned long r4, unsigned long r5, --- linux-2.6.6-rc1/arch/ppc64/kernel/signal32.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/signal32.c 2004-04-18 22:25:45.878855832 -0700 @@ -660,7 +660,7 @@ int sys32_sigaltstack(u32 newstack, u32 * Set up a signal frame for a "real-time" signal handler * (one which gets siginfo). */ -static void handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, +static void handle_rt_signal32(unsigned long sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *oldset, struct pt_regs * regs, unsigned long newsp) { @@ -706,7 +706,7 @@ static void handle_rt_signal32(unsigned regs->gpr[4] = (unsigned long) &rt_sf->info; regs->gpr[5] = (unsigned long) &rt_sf->uc; regs->gpr[6] = (unsigned long) rt_sf; - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) ka_copy->sa.sa_handler; regs->link = (unsigned long) frame->tramp; regs->trap = 0; @@ -718,7 +718,7 @@ badframe: regs, frame, newsp); #endif if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + current->sighand->action[SIGSEGV-1].sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } @@ -831,7 +831,7 @@ long sys32_rt_sigreturn(int r3, int r4, /* * OK, we're invoking a handler */ -static void handle_signal32(unsigned long sig, struct k_sigaction *ka, +static void handle_signal32(unsigned long sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *oldset, struct pt_regs * regs, unsigned long newsp) { @@ -856,7 +856,7 @@ static void handle_signal32(unsigned lon #if _NSIG != 64 #error "Please adjust handle_signal32()" #endif - if (__put_user((u32)(u64)ka->sa.sa_handler, &sc->handler) + if (__put_user((u32)(u64)ka_copy->sa.sa_handler, &sc->handler) || __put_user(oldset->sig[0], &sc->oldmask) || __put_user((oldset->sig[0] >> 32), &sc->_unused[3]) || __put_user((u32)(u64)frame, &sc->regs) @@ -871,7 +871,7 @@ static void handle_signal32(unsigned lon regs->gpr[1] = (unsigned long) newsp; regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) ka_copy->sa.sa_handler; regs->link = (unsigned long) frame->mctx.tramp; regs->trap = 0; @@ -883,7 +883,7 @@ badframe: regs, frame, *newspp); #endif if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + current->sighand->action[SIGSEGV-1].sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } @@ -947,18 +947,16 @@ badframe: int do_signal32(sigset_t *oldset, struct pt_regs *regs) { siginfo_t info; - struct k_sigaction *ka; unsigned int frame, newsp; int signr, ret; + struct k_sigaction ka_copy; if (!oldset) oldset = ¤t->blocked; newsp = frame = 0; - signr = get_signal_to_deliver(&info, regs, NULL); - - ka = (signr == 0)? NULL: ¤t->sighand->action[signr-1]; + signr = get_signal_to_deliver(&info, &ka_copy, regs, NULL); if (regs->trap == 0x0C00 /* System Call! */ && regs->ccr & 0x10000000 /* error signalled */ @@ -969,7 +967,7 @@ int do_signal32(sigset_t *oldset, struct if (signr > 0 && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK || (ret == ERESTARTSYS - && !(ka->sa.sa_flags & SA_RESTART)))) { + && !(ka_copy.sa.sa_flags & SA_RESTART)))) { /* make the system call return an EINTR error */ regs->result = -EINTR; regs->gpr[3] = EINTR; @@ -988,7 +986,7 @@ int do_signal32(sigset_t *oldset, struct if (signr == 0) return 0; /* no signals delivered */ - if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size + if ((ka_copy.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size && (!on_sig_stack(regs->gpr[1]))) newsp = (current->sas_ss_sp + current->sas_ss_size); else @@ -996,17 +994,15 @@ int do_signal32(sigset_t *oldset, struct newsp &= ~0xfUL; /* Whee! Actually deliver the signal. */ - if (ka->sa.sa_flags & SA_SIGINFO) - handle_rt_signal32(signr, ka, &info, oldset, regs, newsp); + if (ka_copy.sa.sa_flags & SA_SIGINFO) + handle_rt_signal32(signr, &ka_copy, &info, oldset, regs, newsp); else - handle_signal32(signr, ka, &info, oldset, regs, newsp); - - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; + handle_signal32(signr, &ka_copy, &info, oldset, regs, newsp); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (!(ka_copy.sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked, ¤t->blocked, + &ka_copy.sa.sa_mask); sigaddset(¤t->blocked, signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); --- linux-2.6.6-rc1/arch/ppc64/kernel/signal.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/signal.c 2004-04-18 22:25:45.880855528 -0700 @@ -230,15 +230,15 @@ static long restore_sigcontext(struct pt /* * Allocate space for the signal frame */ -static inline void * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, - size_t frame_size) +static inline void * get_sigframe(struct k_sigaction *ka_copy, + struct pt_regs *regs, size_t frame_size) { unsigned long newsp; /* Default to using normal stack */ newsp = regs->gpr[1]; - if (ka->sa.sa_flags & SA_ONSTACK) { + if (ka_copy->sa.sa_flags & SA_ONSTACK) { if (! on_sig_stack(regs->gpr[1])) newsp = (current->sas_ss_sp + current->sas_ss_size); } @@ -376,8 +376,8 @@ badframe: do_exit(SIGSEGV); } -static void setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) +static void setup_rt_frame(int signr, struct k_sigaction *ka_copy, + siginfo_t *info, sigset_t *set, struct pt_regs *regs) { /* Handler is *really* a pointer to the function descriptor for * the signal routine. The first entry in the function @@ -389,7 +389,7 @@ static void setup_rt_frame(int signr, st unsigned long newsp = 0; long err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka_copy, regs, sizeof(*frame)); if (verify_area(VERIFY_WRITE, frame, sizeof(*frame))) goto badframe; @@ -408,7 +408,7 @@ static void setup_rt_frame(int signr, st &frame->uc.uc_stack.ss_flags); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL, - (unsigned long)ka->sa.sa_handler); + (unsigned long)ka_copy->sa.sa_handler); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) goto badframe; @@ -418,7 +418,7 @@ static void setup_rt_frame(int signr, st if (err) goto badframe; - funct_desc_ptr = (func_descr_t *) ka->sa.sa_handler; + funct_desc_ptr = (func_descr_t *) ka_copy->sa.sa_handler; /* Allocate a dummy caller frame for the signal handler. */ newsp = (unsigned long)frame - __SIGNAL_FRAMESIZE; @@ -430,7 +430,7 @@ static void setup_rt_frame(int signr, st regs->gpr[1] = newsp; err |= get_user(regs->gpr[2], &funct_desc_ptr->toc); regs->gpr[3] = signr; - if (ka->sa.sa_flags & SA_SIGINFO) { + if (ka_copy->sa.sa_flags & SA_SIGINFO) { err |= get_user(regs->gpr[4], (unsigned long *)&frame->pinfo); err |= get_user(regs->gpr[5], (unsigned long *)&frame->puc); regs->gpr[6] = (unsigned long) frame; @@ -447,33 +447,33 @@ badframe: printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", regs, frame, newsp); #endif - do_exit(SIGSEGV); + if (signr == SIGSEGV) + current->sighand->action[SIGSEGV-1].sa.sa_handler = SIG_DFL; + force_sig(SIGSEGV, current); } /* * OK, we're invoking a handler */ -static void handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) +static void handle_signal(unsigned long sig, struct k_sigaction *ka_copy, + siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { /* Set up Signal Frame */ - setup_rt_frame(sig, ka, info, oldset, regs); - - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; + setup_rt_frame(sig, ka_copy, info, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (!(ka_copy->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked, ¤t->blocked, + &ka_copy->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } - return; } -static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) +static inline void syscall_restart(struct pt_regs *regs, + struct k_sigaction *ka_copy) { switch ((int)regs->result) { case -ERESTART_RESTARTBLOCK: @@ -488,7 +488,7 @@ static inline void syscall_restart(struc /* ERESTARTSYS means to restart the syscall if there is no * handler or the handler was registered with SA_RESTART */ - if (!(ka->sa.sa_flags & SA_RESTART)) { + if (!(ka_copy->sa.sa_flags & SA_RESTART)) { regs->result = -EINTR; break; } @@ -513,6 +513,7 @@ int do_signal(sigset_t *oldset, struct p { siginfo_t info; int signr; + struct k_sigaction ka_copy; /* * If the current thread is 32 bit - invoke the @@ -524,14 +525,12 @@ int do_signal(sigset_t *oldset, struct p if (!oldset) oldset = ¤t->blocked; - signr = get_signal_to_deliver(&info, regs, NULL); + signr = get_signal_to_deliver(&info, &ka_copy, regs, NULL); if (signr > 0) { - struct k_sigaction *ka = ¤t->sighand->action[signr-1]; - /* Whee! Actually deliver the signal. */ if (regs->trap == 0x0C00) - syscall_restart(regs, ka); - handle_signal(signr, ka, &info, oldset, regs); + syscall_restart(regs, &ka_copy); + handle_signal(signr, &ka_copy, &info, oldset, regs); return 1; } --- linux-2.6.6-rc1/arch/ppc64/kernel/smp.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/kernel/smp.c 2004-04-18 22:25:40.045742600 -0700 @@ -834,11 +834,6 @@ void __init smp_prepare_cpus(unsigned in paca[boot_cpuid].prof_counter = 1; paca[boot_cpuid].prof_multiplier = 1; - /* - * XXX very rough. - */ - cache_decay_ticks = HZ/100; - #ifndef CONFIG_PPC_ISERIES paca[boot_cpuid].next_jiffy_update_tb = tb_last_stamp = get_tb(); @@ -996,3 +991,277 @@ void __init smp_cpus_done(unsigned int m set_cpus_allowed(current, old_mask); } + +#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + cpumask_t my_cpumask = cpumask_of_cpu(i); + cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1); + + *cpu_domain = SD_SIBLING_INIT; + if (__is_processor(PV_POWER5)) + cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask); + else + cpu_domain->span = my_cpumask; + cpu_domain->groups = &sched_group_cpus[i]; + cpu_domain->parent = phys_domain; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + // phys_domain->cache_hot_time = XXX; + phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)]; + phys_domain->parent = node_domain; + + *node_domain = SD_NODE_INIT; + node_domain->span = cpu_possible_map; + // node_domain->cache_hot_time = XXX; + node_domain->groups = &sched_group_nodes[node]; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) { + per_cpu(cpu_domains, i).flags |= SD_SHARE_CPUPOWER; + per_cpu(cpu_domains, first_cpu(cpu_domain->span)).flags |= + SD_SHARE_CPUPOWER; + continue; + } + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + struct sched_group *node = &sched_group_nodes[i]; + cpumask_t node_cpumask = node_to_cpumask(i); + cpus_and(nodemask, node_cpumask, cpu_online_map); + + if (cpus_empty(nodemask)) + continue; + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* + * Make each extra sibling increase power by 10% of + * the basic CPU. This is very arbitrary. + */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + node->cpu_power += cpu->cpu_power; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + /* Set up nodes */ + first_cpu = last_cpu = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpumask_t node_cpumask = node_to_cpumask(i); + cpus_and(nodemask, node_cpumask, cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + /* ->cpu_power already setup */ + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu(i) { + int node = cpu_to_node(i); + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_domain, i); + } +} +#else /* !CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + cpumask_t my_cpumask = cpumask_of_cpu(i); + cpumask_t sibling_cpumask = cpumask_of_cpu(i ^ 0x1); + + *cpu_domain = SD_SIBLING_INIT; + if (__is_processor(PV_POWER5)) + cpus_or(cpu_domain->span, my_cpumask, sibling_cpumask); + else + cpu_domain->span = my_cpumask; + cpu_domain->groups = &sched_group_cpus[i]; + cpu_domain->parent = phys_domain; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = cpu_possible_map; + // phys_domain->cache_hot_time = XXX; + phys_domain->groups = &sched_group_phys[first_cpu(cpu_domain->span)]; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) { + per_cpu(cpu_domains, i).flags |= SD_SHARE_CPUPOWER; + per_cpu(cpu_domains, first_cpu(cpu_domain->span)).flags |= + SD_SHARE_CPUPOWER; + continue; + } + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu(i) { + struct sched_domain *cpu_domain = &per_cpu(cpu_domains, i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + /* See SMT+NUMA setup for comment */ + cpu->cpu_power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE*(cpus_weight(cpu->cpumask)-1) / 10; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_sd, i); + } +} +#endif /* CONFIG_NUMA */ +#else /* !CONFIG_SCHED_SMT */ + +#ifdef CONFIG_NUMA +#error ppc64 has no NUMA scheduler defined without CONFIG_SCHED_SMT. \ + Please enable CONFIG_SCHED_SMT or bug Anton. +#endif + +static struct sched_group sched_group_cpus[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); + +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); + + *cpu_sd = SD_CPU_INIT; + cpu_sd->span = cpu_possible_map; + // cpu_sd->cache_hot_time = XXX; + cpu_sd->groups = &sched_group_cpus[i]; + } + + /* Set up CPU groups */ + for_each_cpu_mask(i, cpu_possible_map) { + struct sched_group *cpu = &sched_group_cpus[i]; + + cpus_clear(cpu->cpumask); + cpu_set(i, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_sd, i); + } +} +#endif --- linux-2.6.6-rc1/arch/ppc64/kernel/sys_ppc32.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/ppc64/kernel/sys_ppc32.c 2004-04-18 22:25:59.129841376 -0700 @@ -78,178 +78,6 @@ #include "pci.h" -typedef ssize_t (*io_fn_t)(struct file *, char *, size_t, loff_t *); -typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); - -static long do_readv_writev32(int type, struct file *file, - const struct compat_iovec *vector, u32 count) -{ - compat_ssize_t tot_len; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack, *ivp; - struct inode *inode; - long retval, i; - io_fn_t fn; - iov_fn_t fnv; - - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - retval = 0; - if (count == 0) - goto out; - - /* First get the "struct iovec" from user memory and - * verify all the pointers - */ - retval = -EINVAL; - if (count > UIO_MAXIOV) - goto out; - if (!file->f_op) - goto out; - if (count > UIO_FASTIOV) { - retval = -ENOMEM; - iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - goto out; - } - retval = -EFAULT; - if (verify_area(VERIFY_READ, vector, sizeof(struct compat_iovec)*count)) - goto out; - - /* - * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. The total length is fitting an ssize_t - * - * Be careful here because iov_len is a size_t not an ssize_t - */ - tot_len = 0; - i = count; - ivp = iov; - retval = -EINVAL; - while(i > 0) { - compat_ssize_t tmp = tot_len; - compat_ssize_t len; - u32 buf; - - if (__get_user(len, &vector->iov_len) || - __get_user(buf, &vector->iov_base)) { - retval = -EFAULT; - goto out; - } - if (len < 0) /* size_t not fitting an compat_ssize_t .. */ - goto out; - tot_len += len; - if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ - goto out; - ivp->iov_base = (void *)A(buf); - ivp->iov_len = (__kernel_size_t) len; - vector++; - ivp++; - i--; - } - if (tot_len == 0) { - retval = 0; - goto out; - } - - inode = file->f_dentry->d_inode; - /* VERIFY_WRITE actually means a read, as we write to user space */ - retval = locks_verify_area((type == READ - ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), - inode, file, file->f_pos, tot_len); - if (retval) - goto out; - - if (type == READ) { - fn = file->f_op->read; - fnv = file->f_op->readv; - } else { - fn = (io_fn_t)file->f_op->write; - fnv = file->f_op->writev; - } - if (fnv) { - retval = fnv(file, iov, count, &file->f_pos); - goto out; - } - - /* Do it by hand, with file-ops */ - ivp = iov; - while (count > 0) { - void * base; - int len, nr; - - base = ivp->iov_base; - len = ivp->iov_len; - ivp++; - count--; - - nr = fn(file, base, len, &file->f_pos); - - if (nr < 0) { - if (!retval) - retval = nr; - break; - } - retval += nr; - if (nr != len) - break; - } -out: - if (iov != iovstack) - kfree(iov); - if ((retval + (type == READ)) > 0) - dnotify_parent(file->f_dentry, - (type == READ) ? DN_ACCESS : DN_MODIFY); - - return retval; -} - -asmlinkage long sys32_readv(int fd, struct compat_iovec *vector, u32 count) -{ - struct file *file; - int ret = -EBADF; - - file = fget(fd); - if (!file || !(file->f_mode & FMODE_READ)) - goto out; - - ret = -EINVAL; - if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) - goto out; - - ret = do_readv_writev32(READ, file, vector, count); - -out: - if (file) - fput(file); - return ret; -} - -asmlinkage long sys32_writev(int fd, struct compat_iovec *vector, u32 count) -{ - struct file *file; - int ret = -EBADF; - - file = fget(fd); - if (!file || !(file->f_mode & FMODE_WRITE)) - goto out; - - ret = -EINVAL; - if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) - goto out; - - ret = do_readv_writev32(WRITE, file, vector, count); - -out: - if (file) - fput(file); - return ret; -} - /* readdir & getdents */ #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) #define ROUND_UP(x) (((x)+sizeof(u32)-1) & ~(sizeof(u32)-1)) @@ -398,167 +226,12 @@ out: return error; } -/* - * Ooo, nasty. We need here to frob 32-bit unsigned longs to - * 64-bit unsigned longs. - */ -static inline int -get_fd_set32(unsigned long n, unsigned long *fdset, u32 *ufdset) -{ - if (ufdset) { - unsigned long odd; - - if (verify_area(VERIFY_WRITE, ufdset, n*sizeof(u32))) - return -EFAULT; - - odd = n & 1UL; - n &= ~1UL; - while (n) { - unsigned long h, l; - __get_user(l, ufdset); - __get_user(h, ufdset+1); - ufdset += 2; - *fdset++ = h << 32 | l; - n -= 2; - } - if (odd) - __get_user(*fdset, ufdset); - } else { - /* Tricky, must clear full unsigned long in the - * kernel fdset at the end, this makes sure that - * actually happens. - */ - memset(fdset, 0, ((n + 1) & ~1)*sizeof(u32)); - } - return 0; -} - -static inline void -set_fd_set32(unsigned long n, u32 *ufdset, unsigned long *fdset) -{ - unsigned long odd; - - if (!ufdset) - return; - - odd = n & 1UL; - n &= ~1UL; - while (n) { - unsigned long h, l; - l = *fdset++; - h = l >> 32; - __put_user(l, ufdset); - __put_user(h, ufdset+1); - ufdset += 2; - n -= 2; - } - if (odd) - __put_user(*fdset, ufdset); -} - - - -#define MAX_SELECT_SECONDS ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) - -asmlinkage long sys32_select(int n, u32 *inp, u32 *outp, u32 *exp, u32 tvp_x) -{ - fd_set_bits fds; - struct compat_timeval *tvp = (struct compat_timeval *)AA(tvp_x); - char *bits; - unsigned long nn; - long timeout; - int ret, size, max_fdset; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (tvp) { - time_t sec, usec; - if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp))) - || (ret = __get_user(sec, &tvp->tv_sec)) - || (ret = __get_user(usec, &tvp->tv_usec))) - goto out_nofds; - - ret = -EINVAL; - if(sec < 0 || usec < 0) - goto out_nofds; - - if ((unsigned long) sec < MAX_SELECT_SECONDS) { - timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); - timeout += sec * (unsigned long) HZ; - } - } - - ret = -EINVAL; - if (n < 0) - goto out_nofds; - - /* max_fdset can increase, so grab it once to avoid race */ - max_fdset = current->files->max_fdset; - if (n > max_fdset) - n = max_fdset; - - /* - * We need 6 bitmaps (in/out/ex for both incoming and outgoing), - * since we used fdset we need to allocate memory in units of - * long-words. - */ - ret = -ENOMEM; - size = FDS_BYTES(n); - bits = kmalloc(6 * size, GFP_KERNEL); - if (!bits) - goto out_nofds; - fds.in = (unsigned long *) bits; - fds.out = (unsigned long *) (bits + size); - fds.ex = (unsigned long *) (bits + 2*size); - fds.res_in = (unsigned long *) (bits + 3*size); - fds.res_out = (unsigned long *) (bits + 4*size); - fds.res_ex = (unsigned long *) (bits + 5*size); - - nn = (n + 8*sizeof(u32) - 1) / (8*sizeof(u32)); - if ((ret = get_fd_set32(nn, fds.in, inp)) || - (ret = get_fd_set32(nn, fds.out, outp)) || - (ret = get_fd_set32(nn, fds.ex, exp))) - goto out; - zero_fd_set(n, fds.res_in); - zero_fd_set(n, fds.res_out); - zero_fd_set(n, fds.res_ex); - - ret = do_select(n, &fds, &timeout); - - if (tvp && !(current->personality & STICKY_TIMEOUTS)) { - time_t sec = 0, usec = 0; - if (timeout) { - sec = timeout / HZ; - usec = timeout % HZ; - usec *= (1000000/HZ); - } - put_user(sec, &tvp->tv_sec); - put_user(usec, &tvp->tv_usec); - } - - if (ret < 0) - goto out; - if (!ret) { - ret = -ERESTARTNOHAND; - if (signal_pending(current)) - goto out; - ret = 0; - } - - set_fd_set32(nn, inp, fds.res_in); - set_fd_set32(nn, outp, fds.res_out); - set_fd_set32(nn, exp, fds.res_ex); - -out: - kfree(bits); - -out_nofds: - return ret; -} - -int ppc32_select(u32 n, u32* inp, u32* outp, u32* exp, u32 tvp_x) +asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + compat_uptr_t tvp_x) { /* sign extend n */ - return sys32_select((int)n, inp, outp, exp, tvp_x); + return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x)); } int cp_compat_stat(struct kstat *stat, struct compat_stat *statbuf) @@ -678,245 +351,6 @@ asmlinkage long sys32_adjtimex(struct ti return ret; } -/* Stuff for NFS server syscalls... */ -struct nfsctl_svc32 { - u16 svc32_port; - s32 svc32_nthreads; -}; - -struct nfsctl_client32 { - s8 cl32_ident[NFSCLNT_IDMAX+1]; - s32 cl32_naddr; - struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX]; - s32 cl32_fhkeytype; - s32 cl32_fhkeylen; - u8 cl32_fhkey[NFSCLNT_KEYMAX]; -}; - -struct nfsctl_export32 { - s8 ex32_client[NFSCLNT_IDMAX+1]; - s8 ex32_path[NFS_MAXPATHLEN+1]; - compat_dev_t ex32_dev; - compat_ino_t ex32_ino; - s32 ex32_flags; - compat_uid_t ex32_anon_uid; - compat_gid_t ex32_anon_gid; -}; - -struct nfsctl_fdparm32 { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - s32 gd32_version; -}; - -struct nfsctl_fsparm32 { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - s32 gd32_maxlen; -}; - -struct nfsctl_arg32 { - s32 ca32_version; /* safeguard */ - union { - struct nfsctl_svc32 u32_svc; - struct nfsctl_client32 u32_client; - struct nfsctl_export32 u32_export; - struct nfsctl_fdparm32 u32_getfd; - struct nfsctl_fsparm32 u32_getfs; - } u; -#define ca32_svc u.u32_svc -#define ca32_client u.u32_client -#define ca32_export u.u32_export -#define ca32_getfd u.u32_getfd -#define ca32_getfs u.u32_getfs -}; - -union nfsctl_res32 { - __u8 cr32_getfh[NFS_FHSIZE]; - struct knfsd_fh cr32_getfs; -}; - -static int nfs_svc32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= __get_user(karg->ca_svc.svc_port, &arg32->ca32_svc.svc32_port); - err |= __get_user(karg->ca_svc.svc_nthreads, &arg32->ca32_svc.svc32_nthreads); - return err; -} - -static int nfs_clnt32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_client.cl_ident[0], - &arg32->ca32_client.cl32_ident[0], - NFSCLNT_IDMAX); - err |= __get_user(karg->ca_client.cl_naddr, &arg32->ca32_client.cl32_naddr); - err |= copy_from_user(&karg->ca_client.cl_addrlist[0], - &arg32->ca32_client.cl32_addrlist[0], - (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); - err |= __get_user(karg->ca_client.cl_fhkeytype, - &arg32->ca32_client.cl32_fhkeytype); - err |= __get_user(karg->ca_client.cl_fhkeylen, - &arg32->ca32_client.cl32_fhkeylen); - err |= copy_from_user(&karg->ca_client.cl_fhkey[0], - &arg32->ca32_client.cl32_fhkey[0], - NFSCLNT_KEYMAX); - - if(err) return -EFAULT; - return 0; -} - -static int nfs_exp32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_export.ex_client[0], - &arg32->ca32_export.ex32_client[0], - NFSCLNT_IDMAX); - err |= copy_from_user(&karg->ca_export.ex_path[0], - &arg32->ca32_export.ex32_path[0], - NFS_MAXPATHLEN); - err |= __get_user(karg->ca_export.ex_dev, - &arg32->ca32_export.ex32_dev); - err |= __get_user(karg->ca_export.ex_ino, - &arg32->ca32_export.ex32_ino); - err |= __get_user(karg->ca_export.ex_flags, - &arg32->ca32_export.ex32_flags); - err |= __get_user(karg->ca_export.ex_anon_uid, - &arg32->ca32_export.ex32_anon_uid); - err |= __get_user(karg->ca_export.ex_anon_gid, - &arg32->ca32_export.ex32_anon_gid); - karg->ca_export.ex_anon_uid = karg->ca_export.ex_anon_uid; - karg->ca_export.ex_anon_gid = karg->ca_export.ex_anon_gid; - - if(err) return -EFAULT; - return 0; -} - -static int nfs_getfd32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_getfd.gd_addr, - &arg32->ca32_getfd.gd32_addr, - (sizeof(struct sockaddr))); - err |= copy_from_user(&karg->ca_getfd.gd_path, - &arg32->ca32_getfd.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfd.gd_version, - &arg32->ca32_getfd.gd32_version); - - if(err) return -EFAULT; - return 0; -} - -static int nfs_getfs32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_getfs.gd_addr, - &arg32->ca32_getfs.gd32_addr, - (sizeof(struct sockaddr))); - err |= copy_from_user(&karg->ca_getfs.gd_path, - &arg32->ca32_getfs.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfs.gd_maxlen, - &arg32->ca32_getfs.gd32_maxlen); - - if(err) return -EFAULT; - return 0; -} - -/* This really doesn't need translations, we are only passing - * back a union which contains opaque nfs file handle data. - */ -static int nfs_getfh32_res_trans(union nfsctl_res *kres, union nfsctl_res32 *res32) -{ - int err; - - err = copy_to_user(res32, kres, sizeof(*res32)); - - if(err) return -EFAULT; - return 0; -} - -/* Note: it is necessary to treat cmd_parm as an unsigned int, - * with the corresponding cast to a signed int to insure that the - * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) - * and the register representation of a signed int (msr in 64-bit mode) is performed. - */ -int asmlinkage sys32_nfsservctl(u32 cmd_parm, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) -{ - int cmd = (int)cmd_parm; - struct nfsctl_arg *karg = NULL; - union nfsctl_res *kres = NULL; - mm_segment_t oldfs; - int err; - - karg = kmalloc(sizeof(*karg), GFP_USER); - if(!karg) - return -ENOMEM; - if(res32) { - kres = kmalloc(sizeof(*kres), GFP_USER); - if(!kres) { - kfree(karg); - return -ENOMEM; - } - } - switch(cmd) { - case NFSCTL_SVC: - err = nfs_svc32_trans(karg, arg32); - break; - case NFSCTL_ADDCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_DELCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_EXPORT: - case NFSCTL_UNEXPORT: - err = nfs_exp32_trans(karg, arg32); - break; - case NFSCTL_GETFD: - err = nfs_getfd32_trans(karg, arg32); - break; - case NFSCTL_GETFS: - err = nfs_getfs32_trans(karg, arg32); - break; - default: - err = -EINVAL; - break; - } - if(err) - goto done; - oldfs = get_fs(); - set_fs(KERNEL_DS); - err = sys_nfsservctl(cmd, karg, kres); - set_fs(oldfs); - - if (err) - goto done; - - if((cmd == NFSCTL_GETFD) || - (cmd == NFSCTL_GETFS)) - err = nfs_getfh32_res_trans(kres, res32); - -done: - if(karg) - kfree(karg); - if(kres) - kfree(kres); - return err; -} - - /* These are here just in case some old sparc32 binary calls it. */ asmlinkage long sys32_pause(void) @@ -1721,191 +1155,6 @@ asmlinkage int sys32_sendfile64(int out_ return ret; } -/* - * count32() counts the number of arguments/envelopes - */ -static int count32(u32 * argv, int max) -{ - int i = 0; - - if (argv != NULL) { - for (;;) { - u32 p; int error; - - error = get_user(p,argv); - if (error) - return error; - if (!p) - break; - argv++; - if (++i > max) - return -E2BIG; - } - } - return i; -} - -/* - * 'copy_string32()' copies argument/envelope strings from user - * memory to free pages in kernel mem. These are in a format ready - * to be put directly into the top of new user memory. - */ -static int copy_strings32(int argc, u32 * argv, struct linux_binprm *bprm) -{ - while (argc-- > 0) { - u32 str; - int len; - unsigned long pos; - - if (get_user(str, argv + argc) || - !str || - !(len = strnlen_user((char *)A(str), bprm->p))) - return -EFAULT; - - if (bprm->p < len) - return -E2BIG; - - bprm->p -= len; - - pos = bprm->p; - while (len) { - char *kaddr; - struct page *page; - int offset, bytes_to_copy, new, err; - - offset = pos % PAGE_SIZE; - page = bprm->page[pos / PAGE_SIZE]; - new = 0; - if (!page) { - page = alloc_page(GFP_USER); - bprm->page[pos / PAGE_SIZE] = page; - if (!page) - return -ENOMEM; - new = 1; - } - kaddr = (char *)kmap(page); - - if (new && offset) - memset(kaddr, 0, offset); - bytes_to_copy = PAGE_SIZE - offset; - if (bytes_to_copy > len) { - bytes_to_copy = len; - if (new) - memset(kaddr+offset+len, 0, - PAGE_SIZE-offset-len); - } - - err = copy_from_user(kaddr + offset, (char *)A(str), - bytes_to_copy); - kunmap((unsigned long)kaddr); - - if (err) - return -EFAULT; - - pos += bytes_to_copy; - str += bytes_to_copy; - len -= bytes_to_copy; - } - } - return 0; -} - -/* - * sys32_execve() executes a new program. - */ -static int do_execve32(char * filename, u32 * argv, u32 * envp, struct pt_regs * regs) -{ - struct linux_binprm bprm; - struct file * file; - int retval; - int i; - - sched_balance_exec(); - - file = open_exec(filename); - - retval = PTR_ERR(file); - if (IS_ERR(file)) - return retval; - - bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - memset(bprm.page, 0, MAX_ARG_PAGES * sizeof(bprm.page[0])); - - bprm.file = file; - bprm.filename = filename; - bprm.interp = filename; - bprm.sh_bang = 0; - bprm.loader = 0; - bprm.exec = 0; - bprm.security = NULL; - bprm.mm = mm_alloc(); - retval = -ENOMEM; - if (!bprm.mm) - goto out_file; - - retval = init_new_context(current, bprm.mm); - if (retval < 0) - goto out_mm; - - bprm.argc = count32(argv, bprm.p / sizeof(u32)); - if ((retval = bprm.argc) < 0) - goto out_mm; - - bprm.envc = count32(envp, bprm.p / sizeof(u32)); - if ((retval = bprm.envc) < 0) - goto out_mm; - - retval = security_bprm_alloc(&bprm); - if (retval) - goto out; - - retval = prepare_binprm(&bprm); - if (retval < 0) - goto out; - - retval = copy_strings_kernel(1, &bprm.filename, &bprm); - if (retval < 0) - goto out; - - bprm.exec = bprm.p; - retval = copy_strings32(bprm.envc, envp, &bprm); - if (retval < 0) - goto out; - - retval = copy_strings32(bprm.argc, argv, &bprm); - if (retval < 0) - goto out; - - retval = search_binary_handler(&bprm,regs); - if (retval >= 0) { - /* execve success */ - security_bprm_free(&bprm); - return retval; - } - -out: - /* Something went wrong, return the inode and free the argument pages*/ - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page * page = bprm.page[i]; - if (page) - __free_page(page); - } - - if (bprm.security) - security_bprm_free(&bprm); - -out_mm: - if (bprm.mm) - mmdrop(bprm.mm); - -out_file: - if (bprm.file) { - allow_write_access(bprm.file); - fput(bprm.file); - } - return retval; -} - long sys32_execve(unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5, struct pt_regs *regs) @@ -1924,7 +1173,7 @@ long sys32_execve(unsigned long a0, unsi giveup_altivec(current); #endif /* CONFIG_ALTIVEC */ - error = do_execve32(filename, (u32*) a1, (u32*) a2, regs); + error = compat_do_execve(filename, compat_ptr(a1), compat_ptr(a2), regs); if (error == 0) current->ptrace &= ~PT_DTRACE; --- linux-2.6.6-rc1/arch/ppc64/mm/hugetlbpage.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc64/mm/hugetlbpage.c 2004-04-18 22:25:59.346808392 -0700 @@ -25,7 +25,6 @@ #include #include #include -#include #include @@ -204,7 +203,7 @@ static int prepare_low_seg_for_htlb(stru } page = pmd_page(*pmd); pmd_clear(pmd); - pgtable_remove_rmap(page); + dec_page_state(nr_page_table_pages); pte_free_tlb(tlb, page); } tlb_finish_mmu(tlb, start, end); @@ -335,15 +334,9 @@ follow_hugetlb_page(struct mm_struct *mm } struct page * -follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { - return NULL; -} - -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - return NULL; + return ERR_PTR(-EINVAL); } int pmd_huge(pmd_t pmd) @@ -360,10 +353,8 @@ follow_huge_pmd(struct mm_struct *mm, un BUG_ON(! pmd_hugepage(*pmd)); page = hugepte_page(*(hugepte_t *)pmd); - if (page) { + if (page) page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); - get_page(page); - } return page; } @@ -609,15 +600,6 @@ unsigned long hugetlb_get_unmapped_area( } } -static inline unsigned long computeHugeHptePP(unsigned int hugepte) -{ - unsigned long flags = 0x2; - - if (! (hugepte & _HUGEPAGE_RW)) - flags |= 0x1; - return flags; -} - int hash_huge_page(struct mm_struct *mm, unsigned long access, unsigned long ea, unsigned long vsid, int local) { @@ -671,7 +653,7 @@ int hash_huge_page(struct mm_struct *mm, old_pte = *ptep; new_pte = old_pte; - hpteflags = computeHugeHptePP(hugepte_val(new_pte)); + hpteflags = 0x2 | (! (hugepte_val(new_pte) & _HUGEPAGE_RW)); /* Check if pte already has an hpte (case 2) */ if (unlikely(hugepte_val(old_pte) & _HUGEPAGE_HASHPTE)) { @@ -747,7 +729,7 @@ repeat: static void flush_hash_hugepage(mm_context_t context, unsigned long ea, hugepte_t pte, int local) { - unsigned long vsid, vpn, va, hash, secondary, slot; + unsigned long vsid, vpn, va, hash, slot; BUG_ON(hugepte_bad(pte)); BUG_ON(!in_hugepage_area(context, ea)); @@ -757,8 +739,7 @@ static void flush_hash_hugepage(mm_conte va = (vsid << 28) | (ea & 0x0fffffff); vpn = va >> LARGE_PAGE_SHIFT; hash = hpt_hash(vpn, 1); - secondary = !!(hugepte_val(pte) & _HUGEPAGE_SECONDARY); - if (secondary) + if (hugepte_val(pte) & _HUGEPAGE_SECONDARY) hash = ~hash; slot = (hash & htab_data.htab_hash_mask) * HPTES_PER_GROUP; slot += (hugepte_val(pte) & _HUGEPAGE_GROUP_IX) >> 5; --- linux-2.6.6-rc1/arch/ppc64/mm/tlb.c 2004-03-10 20:41:26.000000000 -0800 +++ 25/arch/ppc64/mm/tlb.c 2004-04-18 22:26:02.844276696 -0700 @@ -31,7 +31,6 @@ #include #include #include -#include DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); @@ -42,6 +41,33 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_ga DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); unsigned long pte_freelist_forced_free; +void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) +{ + /* This is safe as we are holding page_table_lock */ + cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); + struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); + + if (atomic_read(&tlb->mm->mm_users) < 2 || + cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { + pte_free(ptepage); + return; + } + + if (*batchp == NULL) { + *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); + if (*batchp == NULL) { + pte_free_now(ptepage); + return; + } + (*batchp)->index = 0; + } + (*batchp)->pages[(*batchp)->index++] = ptepage; + if ((*batchp)->index == PTE_FREELIST_SIZE) { + pte_free_submit(*batchp); + *batchp = NULL; + } +} + /* * Update the MMU hash table to correspond with a change to * a Linux PTE. If wrprot is true, it is permissible to @@ -59,7 +85,8 @@ void hpte_update(pte_t *ptep, unsigned l ptepage = virt_to_page(ptep); mm = (struct mm_struct *) ptepage->mapping; - addr = ptep_to_address(ptep); + addr = ptepage->index + + (((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE); if (REGION_ID(addr) == USER_REGION_ID) context = mm->context.id; --- linux-2.6.6-rc1/arch/ppc/Kconfig 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/ppc/Kconfig 2004-04-18 22:25:42.162420816 -0700 @@ -1209,6 +1209,19 @@ config DEBUG_INFO debug the kernel. If you don't debug the kernel, you can say N. +config SCHEDSTATS + bool "Collect scheduler statistics" + depends on PROC_FS + default y + help + If you say Y here, additional code will be inserted into the + scheduler and related routines to collect statistics about + scheduler behavior and provide them in /proc/schedstat. These + stats may be useful for both tuning and debugging the scheduler + If you aren't debugging the scheduler or trying to tune a specific + application, you can say N to avoid the very slight overhead + this adds. + config BOOTX_TEXT bool "Support for early boot text console (BootX or OpenFirmware only)" depends PPC_OF --- linux-2.6.6-rc1/arch/ppc/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc/kernel/process.c 2004-04-18 22:26:00.022705640 -0700 @@ -83,7 +83,7 @@ kernel_stack_top(struct task_struct *tsk unsigned long task_top(struct task_struct *tsk) { - return ((unsigned long)tsk) + sizeof(struct task_struct); + return ((unsigned long)tsk) + sizeof(struct thread_info); } /* check to make sure the kernel stack is healthy */ @@ -658,12 +658,6 @@ void __init ll_puts(const char *s) } #endif -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { unsigned long ip, sp; @@ -678,7 +672,7 @@ unsigned long get_wchan(struct task_stru return 0; if (count > 0) { ip = *(unsigned long *)(sp + 4); - if (ip < first_sched || ip >= last_sched) + if (!in_sched_functions(ip)) return ip; } } while (count++ < 16); --- linux-2.6.6-rc1/arch/ppc/kernel/setup.c 2004-03-10 20:41:26.000000000 -0800 +++ 25/arch/ppc/kernel/setup.c 2004-04-18 22:25:35.894373704 -0700 @@ -53,7 +53,6 @@ extern void ppc6xx_idle(void); extern void power4_idle(void); extern boot_infos_t *boot_infos; -char saved_command_line[COMMAND_LINE_SIZE]; unsigned char aux_device_present; struct ide_machdep_calls ppc_ide_md; char *sysmap; --- linux-2.6.6-rc1/arch/ppc/kernel/signal.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/ppc/kernel/signal.c 2004-04-18 22:25:45.881855376 -0700 @@ -311,7 +311,7 @@ restore_sigmask(sigset_t *set) * (one which gets siginfo). */ static void -handle_rt_signal(unsigned long sig, struct k_sigaction *ka, +handle_rt_signal(unsigned long sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *oldset, struct pt_regs * regs, unsigned long newsp) { @@ -354,7 +354,7 @@ handle_rt_signal(unsigned long sig, stru regs->gpr[4] = (unsigned long) &rt_sf->info; regs->gpr[5] = (unsigned long) &rt_sf->uc; regs->gpr[6] = (unsigned long) rt_sf; - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) ka_copy->sa.sa_handler; regs->link = (unsigned long) frame->tramp; regs->trap = 0; @@ -366,7 +366,7 @@ badframe: regs, frame, newsp); #endif if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + current->sighand->action[SIGSEGV-1].sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } @@ -466,7 +466,7 @@ int sys_rt_sigreturn(int r3, int r4, int * OK, we're invoking a handler */ static void -handle_signal(unsigned long sig, struct k_sigaction *ka, +handle_signal(unsigned long sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *oldset, struct pt_regs * regs, unsigned long newsp) { @@ -491,7 +491,7 @@ handle_signal(unsigned long sig, struct #if _NSIG != 64 #error "Please adjust handle_signal()" #endif - if (__put_user((unsigned long) ka->sa.sa_handler, &sc->handler) + if (__put_user((unsigned long) ka_copy->sa.sa_handler, &sc->handler) || __put_user(oldset->sig[0], &sc->oldmask) || __put_user(oldset->sig[1], &sc->_unused[3]) || __put_user((struct pt_regs *)frame, &sc->regs) @@ -506,7 +506,7 @@ handle_signal(unsigned long sig, struct regs->gpr[1] = newsp; regs->gpr[3] = sig; regs->gpr[4] = (unsigned long) sc; - regs->nip = (unsigned long) ka->sa.sa_handler; + regs->nip = (unsigned long) ka_copy->sa.sa_handler; regs->link = (unsigned long) frame->mctx.tramp; regs->trap = 0; @@ -518,7 +518,7 @@ badframe: regs, frame, *newspp); #endif if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + current->sighand->action[SIGSEGV-1].sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } @@ -565,18 +565,16 @@ badframe: int do_signal(sigset_t *oldset, struct pt_regs *regs) { siginfo_t info; - struct k_sigaction *ka; unsigned long frame, newsp; int signr, ret; + struct k_sigaction ka_copy; if (!oldset) oldset = ¤t->blocked; newsp = frame = 0; - signr = get_signal_to_deliver(&info, regs, NULL); - - ka = (signr == 0)? NULL: ¤t->sighand->action[signr-1]; + signr = get_signal_to_deliver(&info, &ka_copy, regs, NULL); if (TRAP(regs) == 0x0C00 /* System Call! */ && regs->ccr & 0x10000000 /* error signalled */ @@ -587,7 +585,7 @@ int do_signal(sigset_t *oldset, struct p if (signr > 0 && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK || (ret == ERESTARTSYS - && !(ka->sa.sa_flags & SA_RESTART)))) { + && !(ka_copy.sa.sa_flags & SA_RESTART)))) { /* make the system call return an EINTR error */ regs->result = -EINTR; regs->gpr[3] = EINTR; @@ -606,7 +604,7 @@ int do_signal(sigset_t *oldset, struct p if (signr == 0) return 0; /* no signals delivered */ - if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size + if ((ka_copy.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size && !on_sig_stack(regs->gpr[1])) newsp = current->sas_ss_sp + current->sas_ss_size; else @@ -614,17 +612,18 @@ int do_signal(sigset_t *oldset, struct p newsp &= ~0xfUL; /* Whee! Actually deliver the signal. */ - if (ka->sa.sa_flags & SA_SIGINFO) - handle_rt_signal(signr, ka, &info, oldset, regs, newsp); + if (ka_copy.sa.sa_flags & SA_SIGINFO) + handle_rt_signal(signr, &ka_copy, &info, oldset, regs, newsp); else - handle_signal(signr, ka, &info, oldset, regs, newsp); + handle_signal(signr, &ka_copy, &info, oldset, regs, newsp); - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; + if (ka_copy.sa.sa_flags & SA_ONESHOT) + ka_copy.sa.sa_handler = SIG_DFL; - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (!(ka_copy.sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked, ¤t->blocked, + &ka_copy.sa.sa_mask); sigaddset(¤t->blocked, signr); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); --- linux-2.6.6-rc1/arch/ppc/mm/pgtable.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/ppc/mm/pgtable.c 2004-04-18 22:25:33.466742760 -0700 @@ -86,9 +86,14 @@ pte_t *pte_alloc_one_kernel(struct mm_st extern int mem_init_done; extern void *early_get_page(void); - if (mem_init_done) + if (mem_init_done) { pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); - else + if (pte) { + struct page *ptepage = virt_to_page(pte); + ptepage->mapping = (void *) mm; + ptepage->index = address & PMD_MASK; + } + } else pte = (pte_t *)early_get_page(); if (pte) clear_page(pte); @@ -97,7 +102,7 @@ pte_t *pte_alloc_one_kernel(struct mm_st struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *pte; + struct page *ptepage; #ifdef CONFIG_HIGHPTE int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; @@ -105,10 +110,13 @@ struct page *pte_alloc_one(struct mm_str int flags = GFP_KERNEL | __GFP_REPEAT; #endif - pte = alloc_pages(flags, 0); - if (pte) - clear_highpage(pte); - return pte; + ptepage = alloc_pages(flags, 0); + if (ptepage) { + ptepage->mapping = (void *) mm; + ptepage->index = address & PMD_MASK; + clear_highpage(ptepage); + } + return ptepage; } void pte_free_kernel(pte_t *pte) @@ -116,15 +124,17 @@ void pte_free_kernel(pte_t *pte) #ifdef CONFIG_SMP hash_page_sync(); #endif + virt_to_page(pte)->mapping = NULL; free_page((unsigned long)pte); } -void pte_free(struct page *pte) +void pte_free(struct page *ptepage) { #ifdef CONFIG_SMP hash_page_sync(); #endif - __free_page(pte); + ptepage->mapping = NULL; + __free_page(ptepage); } #ifndef CONFIG_44x --- linux-2.6.6-rc1/arch/ppc/platforms/lopec_setup.c 2003-09-27 18:57:43.000000000 -0700 +++ 25/arch/ppc/platforms/lopec_setup.c 2004-04-18 22:25:35.894373704 -0700 @@ -33,7 +33,6 @@ #include #include -extern char saved_command_line[]; extern void lopec_find_bridges(void); /* --- linux-2.6.6-rc1/arch/ppc/platforms/pmac_setup.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/ppc/platforms/pmac_setup.c 2004-04-18 22:25:35.895373552 -0700 @@ -103,8 +103,6 @@ int has_l2cache = 0; static int current_root_goodness = -1; -extern char saved_command_line[]; - extern int pmac_newworld; #define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */ --- linux-2.6.6-rc1/arch/ppc/platforms/pplus.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/ppc/platforms/pplus.c 2004-04-18 22:25:35.896373400 -0700 @@ -48,8 +48,6 @@ TODC_ALLOC(); -extern char saved_command_line[]; - extern void pplus_setup_hose(void); extern void pplus_set_VIA_IDE_native(void); --- linux-2.6.6-rc1/arch/ppc/platforms/prep_setup.c 2004-04-03 20:39:10.000000000 -0800 +++ 25/arch/ppc/platforms/prep_setup.c 2004-04-18 22:25:35.897373248 -0700 @@ -76,7 +76,6 @@ extern void rs_nvram_write_val(int addr, extern void ibm_prep_init(void); extern void prep_find_bridges(void); -extern char saved_command_line[]; int _prep_type; --- linux-2.6.6-rc1/arch/s390/kernel/compat_exec.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/s390/kernel/compat_exec.c 2004-04-18 22:25:49.820256648 -0700 @@ -72,6 +72,7 @@ int setup_arg_pages32(struct linux_binpr mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; + mpol_set_vma_default(mpnt); INIT_LIST_HEAD(&mpnt->shared); mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); --- linux-2.6.6-rc1/arch/s390/kernel/compat_linux.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/s390/kernel/compat_linux.c 2004-04-18 22:25:59.131841072 -0700 @@ -373,144 +373,6 @@ asmlinkage long sys32_ftruncate64(unsign return sys_ftruncate(fd, (high << 32) | low); } -typedef ssize_t (*io_fn_t)(struct file *, char *, size_t, loff_t *); -typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); - -static long do_readv_writev32(int type, struct file *file, - const struct compat_iovec *vector, u32 count) -{ - unsigned long tot_len; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack, *ivp; - struct inode *inode; - long retval, i; - io_fn_t fn; - iov_fn_t fnv; - - /* First get the "struct iovec" from user memory and - * verify all the pointers - */ - if (!count) - return 0; - if (verify_area(VERIFY_READ, vector, sizeof(struct compat_iovec)*count)) - return -EFAULT; - if (count > UIO_MAXIOV) - return -EINVAL; - if (count > UIO_FASTIOV) { - iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - return -ENOMEM; - } - - tot_len = 0; - i = count; - ivp = iov; - retval = -EINVAL; - while(i > 0) { - compat_ssize_t tmp = tot_len; - compat_ssize_t len; - u32 buf; - - if (__get_user(len, &vector->iov_len) || - __get_user(buf, &vector->iov_base)) { - retval = -EFAULT; - goto out; - } - if (len < 0) /* size_t not fitting an ssize_t32 .. */ - goto out; - tot_len += len; - if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ - goto out; - ivp->iov_base = (void *)A(buf); - ivp->iov_len = (__kernel_size_t) len; - vector++; - ivp++; - i--; - } - if (tot_len == 0) { - retval = 0; - goto out; - } - - inode = file->f_dentry->d_inode; - /* VERIFY_WRITE actually means a read, as we write to user space */ - retval = locks_verify_area((type == VERIFY_WRITE - ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), - inode, file, file->f_pos, tot_len); - if (retval) - goto out; - - /* VERIFY_WRITE actually means a read, as we write to user space */ - fnv = (type == VERIFY_WRITE ? file->f_op->readv : file->f_op->writev); - if (fnv) { - retval = fnv(file, iov, count, &file->f_pos); - goto out; - } - - fn = (type == VERIFY_WRITE ? file->f_op->read : - (io_fn_t) file->f_op->write); - - ivp = iov; - while (count > 0) { - void * base; - int len, nr; - - base = ivp->iov_base; - len = ivp->iov_len; - ivp++; - count--; - nr = fn(file, base, len, &file->f_pos); - if (nr < 0) { - if (!retval) - retval = nr; - break; - } - retval += nr; - if (nr != len) - break; - } -out: - if (iov != iovstack) - kfree(iov); - - return retval; -} - -asmlinkage long sys32_readv(int fd, struct compat_iovec *vector, unsigned long count) -{ - struct file *file; - long ret = -EBADF; - - file = fget(fd); - if(!file) - goto bad_file; - - if (file->f_op && (file->f_mode & FMODE_READ) && - (file->f_op->readv || file->f_op->read)) - ret = do_readv_writev32(VERIFY_WRITE, file, vector, count); - fput(file); - -bad_file: - return ret; -} - -asmlinkage long sys32_writev(int fd, struct compat_iovec *vector, unsigned long count) -{ - struct file *file; - int ret = -EBADF; - - file = fget(fd); - if(!file) - goto bad_file; - if (file->f_op && (file->f_mode & FMODE_WRITE) && - (file->f_op->writev || file->f_op->write)) - ret = do_readv_writev32(VERIFY_READ, file, vector, count); - fput(file); - -bad_file: - return ret; -} - /* readdir & getdents */ #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) @@ -641,160 +503,6 @@ out: /* end of readdir & getdents */ -/* - * Ooo, nasty. We need here to frob 32-bit unsigned longs to - * 64-bit unsigned longs. - */ - -static inline int -get_fd_set32(unsigned long n, unsigned long *fdset, u32 *ufdset) -{ - if (ufdset) { - unsigned long odd; - - if (verify_area(VERIFY_WRITE, ufdset, n*sizeof(u32))) - return -EFAULT; - - odd = n & 1UL; - n &= ~1UL; - while (n) { - unsigned long h, l; - __get_user(l, ufdset); - __get_user(h, ufdset+1); - ufdset += 2; - *fdset++ = h << 32 | l; - n -= 2; - } - if (odd) - __get_user(*fdset, ufdset); - } else { - /* Tricky, must clear full unsigned long in the - * kernel fdset at the end, this makes sure that - * actually happens. - */ - memset(fdset, 0, ((n + 1) & ~1)*sizeof(u32)); - } - return 0; -} - -static inline void -set_fd_set32(unsigned long n, u32 *ufdset, unsigned long *fdset) -{ - unsigned long odd; - - if (!ufdset) - return; - - odd = n & 1UL; - n &= ~1UL; - while (n) { - unsigned long h, l; - l = *fdset++; - h = l >> 32; - __put_user(l, ufdset); - __put_user(h, ufdset+1); - ufdset += 2; - n -= 2; - } - if (odd) - __put_user(*fdset, ufdset); -} - -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) - -asmlinkage long sys32_select(int n, u32 *inp, u32 *outp, u32 *exp, - struct compat_timeval *tvp) -{ - fd_set_bits fds; - char *bits; - unsigned long nn; - long timeout; - int ret, size; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (tvp) { - int sec, usec; - - if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp))) - || (ret = __get_user(sec, &tvp->tv_sec)) - || (ret = __get_user(usec, &tvp->tv_usec))) - goto out_nofds; - - ret = -EINVAL; - if(sec < 0 || usec < 0) - goto out_nofds; - - if ((unsigned long) sec < MAX_SELECT_SECONDS) { - timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); - timeout += sec * (unsigned long) HZ; - } - } - - ret = -EINVAL; - if (n < 0) - goto out_nofds; - if (n > current->files->max_fdset) - n = current->files->max_fdset; - - /* - * We need 6 bitmaps (in/out/ex for both incoming and outgoing), - * since we used fdset we need to allocate memory in units of - * long-words. - */ - ret = -ENOMEM; - size = FDS_BYTES(n); - bits = kmalloc(6 * size, GFP_KERNEL); - if (!bits) - goto out_nofds; - fds.in = (unsigned long *) bits; - fds.out = (unsigned long *) (bits + size); - fds.ex = (unsigned long *) (bits + 2*size); - fds.res_in = (unsigned long *) (bits + 3*size); - fds.res_out = (unsigned long *) (bits + 4*size); - fds.res_ex = (unsigned long *) (bits + 5*size); - - nn = (n + 8*sizeof(u32) - 1) / (8*sizeof(u32)); - if ((ret = get_fd_set32(nn, fds.in, inp)) || - (ret = get_fd_set32(nn, fds.out, outp)) || - (ret = get_fd_set32(nn, fds.ex, exp))) - goto out; - zero_fd_set(n, fds.res_in); - zero_fd_set(n, fds.res_out); - zero_fd_set(n, fds.res_ex); - - ret = do_select(n, &fds, &timeout); - - if (tvp && !(current->personality & STICKY_TIMEOUTS)) { - int sec = 0, usec = 0; - if (timeout) { - sec = timeout / HZ; - usec = timeout % HZ; - usec *= (1000000/HZ); - } - put_user(sec, &tvp->tv_sec); - put_user(usec, &tvp->tv_usec); - } - - if (ret < 0) - goto out; - if (!ret) { - ret = -ERESTARTNOHAND; - if (signal_pending(current)) - goto out; - ret = 0; - } - - set_fd_set32(nn, inp, fds.res_in); - set_fd_set32(nn, outp, fds.res_out); - set_fd_set32(nn, exp, fds.res_ex); - -out: - kfree(bits); -out_nofds: - return ret; -} - int cp_compat_stat(struct kstat *stat, struct compat_stat *statbuf) { int err; @@ -1044,188 +752,6 @@ sys32_rt_sigqueueinfo(int pid, int sig, return ret; } -extern void check_pending(int signum); - -/* - * count32() counts the number of arguments/envelopes - */ -static int count32(u32 * argv) -{ - int i = 0; - - if (argv != NULL) { - for (;;) { - u32 p; int error; - - error = get_user(p,argv); - if (error) return error; - if (!p) break; - argv++; i++; - } - } - return i; -} - -/* - * 'copy_string32()' copies argument/envelope strings from user - * memory to free pages in kernel mem. These are in a format ready - * to be put directly into the top of new user memory. - */ -static int copy_strings32(int argc, u32 * argv, struct linux_binprm *bprm) -{ - while (argc-- > 0) { - u32 str; - int len; - unsigned long pos; - - if (get_user(str, argv + argc) || - !str || - !(len = strnlen_user((char *)A(str), bprm->p))) - return -EFAULT; - - if (bprm->p < len) - return -E2BIG; - - bprm->p -= len; - - pos = bprm->p; - while (len) { - char *kaddr; - struct page *page; - int offset, bytes_to_copy, new, err; - - offset = pos % PAGE_SIZE; - page = bprm->page[pos / PAGE_SIZE]; - new = 0; - if (!page) { - page = alloc_page(GFP_USER); - bprm->page[pos / PAGE_SIZE] = page; - if (!page) - return -ENOMEM; - new = 1; - } - kaddr = (char *)kmap(page); - - if (new && offset) - memset(kaddr, 0, offset); - bytes_to_copy = PAGE_SIZE - offset; - if (bytes_to_copy > len) { - bytes_to_copy = len; - if (new) - memset(kaddr+offset+len, 0, - PAGE_SIZE-offset-len); - } - - err = copy_from_user(kaddr + offset, (char *)A(str), - bytes_to_copy); - kunmap(page); - - if (err) - return -EFAULT; - - pos += bytes_to_copy; - str += bytes_to_copy; - len -= bytes_to_copy; - } - } - return 0; -} - -/* - * sys32_execve() executes a new program. - */ -static inline int -do_execve32(char * filename, u32 * argv, u32 * envp, struct pt_regs * regs) -{ - struct linux_binprm bprm; - struct file * file; - int retval; - int i; - - sched_balance_exec(); - - file = open_exec(filename); - - retval = PTR_ERR(file); - if (IS_ERR(file)) - return retval; - - bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - memset(bprm.page, 0, MAX_ARG_PAGES * sizeof(bprm.page[0])); - - bprm.file = file; - bprm.filename = filename; - bprm.interp = filename; - bprm.sh_bang = 0; - bprm.loader = 0; - bprm.exec = 0; - bprm.mm = mm_alloc(); - retval = -ENOMEM; - if (!bprm.mm) - goto out_file; - - /* init_new_context is empty for s390x. */ - - bprm.argc = count32(argv); - if ((retval = bprm.argc) < 0) - goto out_mm; - - bprm.envc = count32(envp); - if ((retval = bprm.envc) < 0) - goto out_mm; - - retval = security_bprm_alloc(&bprm); - if (retval) - goto out; - - retval = prepare_binprm(&bprm); - if (retval < 0) - goto out; - - retval = copy_strings_kernel(1, &bprm.filename, &bprm); - if (retval < 0) - goto out; - - bprm.exec = bprm.p; - retval = copy_strings32(bprm.envc, envp, &bprm); - if (retval < 0) - goto out; - - retval = copy_strings32(bprm.argc, argv, &bprm); - if (retval < 0) - goto out; - - retval = search_binary_handler(&bprm, regs); - if (retval >= 0) { - /* execve success */ - security_bprm_free(&bprm); - return retval; - } - -out: - /* Something went wrong, return the inode and free the argument pages*/ - for (i=0 ; iptrace &= ~PT_DTRACE; @@ -1288,226 +815,6 @@ sys32_delete_module(const char __user *n #endif /* CONFIG_MODULES */ -/* Stuff for NFS server syscalls... */ -struct nfsctl_svc32 { - u16 svc32_port; - s32 svc32_nthreads; -}; - -struct nfsctl_client32 { - s8 cl32_ident[NFSCLNT_IDMAX+1]; - s32 cl32_naddr; - struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX]; - s32 cl32_fhkeytype; - s32 cl32_fhkeylen; - u8 cl32_fhkey[NFSCLNT_KEYMAX]; -}; - -struct nfsctl_export32 { - s8 ex32_client[NFSCLNT_IDMAX+1]; - s8 ex32_path[NFS_MAXPATHLEN+1]; - compat_dev_t ex32_dev; - compat_ino_t ex32_ino; - s32 ex32_flags; - compat_uid_t ex32_anon_uid; - compat_gid_t ex32_anon_gid; -}; - -struct nfsctl_fdparm32 { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - s32 gd32_version; -}; - -struct nfsctl_fsparm32 { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - s32 gd32_maxlen; -}; - -struct nfsctl_arg32 { - s32 ca32_version; /* safeguard */ - union { - struct nfsctl_svc32 u32_svc; - struct nfsctl_client32 u32_client; - struct nfsctl_export32 u32_export; - struct nfsctl_fdparm32 u32_getfd; - struct nfsctl_fsparm32 u32_getfs; - } u; -#define ca32_svc u.u32_svc -#define ca32_client u.u32_client -#define ca32_export u.u32_export -#define ca32_getfd u.u32_getfd -#define ca32_getfs u.u32_getfs -#define ca32_authd u.u32_authd -}; - -union nfsctl_res32 { - __u8 cr32_getfh[NFS_FHSIZE]; - struct knfsd_fh cr32_getfs; -}; - -static int nfs_svc32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= __get_user(karg->ca_svc.svc_port, &arg32->ca32_svc.svc32_port); - err |= __get_user(karg->ca_svc.svc_nthreads, &arg32->ca32_svc.svc32_nthreads); - return err; -} - -static int nfs_clnt32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_client.cl_ident[0], - &arg32->ca32_client.cl32_ident[0], - NFSCLNT_IDMAX); - err |= __get_user(karg->ca_client.cl_naddr, &arg32->ca32_client.cl32_naddr); - err |= copy_from_user(&karg->ca_client.cl_addrlist[0], - &arg32->ca32_client.cl32_addrlist[0], - (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); - err |= __get_user(karg->ca_client.cl_fhkeytype, - &arg32->ca32_client.cl32_fhkeytype); - err |= __get_user(karg->ca_client.cl_fhkeylen, - &arg32->ca32_client.cl32_fhkeylen); - err |= copy_from_user(&karg->ca_client.cl_fhkey[0], - &arg32->ca32_client.cl32_fhkey[0], - NFSCLNT_KEYMAX); - return err; -} - -static int nfs_exp32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_export.ex_client[0], - &arg32->ca32_export.ex32_client[0], - NFSCLNT_IDMAX); - err |= copy_from_user(&karg->ca_export.ex_path[0], - &arg32->ca32_export.ex32_path[0], - NFS_MAXPATHLEN); - err |= __get_user(karg->ca_export.ex_dev, - &arg32->ca32_export.ex32_dev); - err |= __get_user(karg->ca_export.ex_ino, - &arg32->ca32_export.ex32_ino); - err |= __get_user(karg->ca_export.ex_flags, - &arg32->ca32_export.ex32_flags); - err |= __get_user(karg->ca_export.ex_anon_uid, - &arg32->ca32_export.ex32_anon_uid); - err |= __get_user(karg->ca_export.ex_anon_gid, - &arg32->ca32_export.ex32_anon_gid); - karg->ca_export.ex_anon_uid = high2lowuid(karg->ca_export.ex_anon_uid); - karg->ca_export.ex_anon_gid = high2lowgid(karg->ca_export.ex_anon_gid); - return err; -} - -static int nfs_getfd32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_getfd.gd_addr, - &arg32->ca32_getfd.gd32_addr, - (sizeof(struct sockaddr))); - err |= copy_from_user(&karg->ca_getfd.gd_path, - &arg32->ca32_getfd.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfd.gd_version, - &arg32->ca32_getfd.gd32_version); - return err; -} - -static int nfs_getfs32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_getfs.gd_addr, - &arg32->ca32_getfs.gd32_addr, - (sizeof(struct sockaddr))); - err |= copy_from_user(&karg->ca_getfs.gd_path, - &arg32->ca32_getfs.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfs.gd_maxlen, - &arg32->ca32_getfs.gd32_maxlen); - return err; -} - -/* This really doesn't need translations, we are only passing - * back a union which contains opaque nfs file handle data. - */ -static int nfs_getfh32_res_trans(union nfsctl_res *kres, union nfsctl_res32 *res32) -{ - return copy_to_user(res32, kres, sizeof(*res32)) ? -EFAULT : 0; -} - -long asmlinkage sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) -{ - struct nfsctl_arg *karg = NULL; - union nfsctl_res *kres = NULL; - mm_segment_t oldfs; - int err; - - karg = kmalloc(sizeof(*karg), GFP_USER); - if(!karg) - return -ENOMEM; - if(res32) { - kres = kmalloc(sizeof(*kres), GFP_USER); - if(!kres) { - kfree(karg); - return -ENOMEM; - } - } - switch(cmd) { - case NFSCTL_SVC: - err = nfs_svc32_trans(karg, arg32); - break; - case NFSCTL_ADDCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_DELCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_EXPORT: - case NFSCTL_UNEXPORT: - err = nfs_exp32_trans(karg, arg32); - break; - case NFSCTL_GETFD: - err = nfs_getfd32_trans(karg, arg32); - break; - case NFSCTL_GETFS: - err = nfs_getfs32_trans(karg, arg32); - break; - default: - err = -EINVAL; - break; - } - if(err) - goto done; - oldfs = get_fs(); - set_fs(KERNEL_DS); - err = sys_nfsservctl(cmd, karg, kres); - set_fs(oldfs); - - if (err) - goto done; - - if((cmd == NFSCTL_GETFD) || - (cmd == NFSCTL_GETFS)) - err = nfs_getfh32_res_trans(kres, res32); - -done: - if(karg) - kfree(karg); - if(kres) - kfree(kres); - return err; -} - /* Translations due to time_t size differences. Which affects all sorts of things, like timeval and itimerval. */ --- linux-2.6.6-rc1/arch/s390/kernel/compat_signal.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/s390/kernel/compat_signal.c 2004-04-18 22:25:45.604897480 -0700 @@ -74,8 +74,8 @@ int copy_siginfo_to_user32(siginfo_t32 * err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE); else { switch (from->si_code >> 16) { - case __SI_RT: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ: + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: err |= __put_user(from->si_int, &to->si_int); /* fallthrough */ case __SI_KILL >> 16: @@ -452,10 +452,10 @@ static inline int map_signal(int sig) return sig; } -static void setup_frame32(int sig, struct k_sigaction *ka, +static void setup_frame32(int sig, struct k_sigaction *ka_copy, sigset_t *set, struct pt_regs * regs) { - sigframe32 *frame = get_sigframe(ka, regs, sizeof(sigframe32)); + sigframe32 *frame = get_sigframe(ka_copy, regs, sizeof(sigframe32)); if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32))) goto give_sigsegv; @@ -469,8 +469,8 @@ static void setup_frame32(int sig, struc /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + if (ka_copy->sa.sa_flags & SA_RESTORER) { + regs->gprs[14] = (__u64) ka_copy->sa.sa_restorer; } else { regs->gprs[14] = (__u64) frame->retcode; if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, @@ -484,7 +484,7 @@ static void setup_frame32(int sig, struc /* Set up registers for signal handler */ regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->psw.addr = (__u64) ka_copy->sa.sa_handler; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (__u64) &frame->sc; @@ -497,15 +497,16 @@ static void setup_frame32(int sig, struc give_sigsegv: if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + ka_copy->sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } -static void setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) +static void setup_rt_frame32(int sig, struct k_sigaction *ka_copy, + siginfo_t *info, sigset_t *set, + struct pt_regs * regs) { int err = 0; - rt_sigframe32 *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32)); + rt_sigframe32 *frame = get_sigframe(ka_copy, regs, sizeof(rt_sigframe32)); if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32))) goto give_sigsegv; @@ -526,8 +527,8 @@ static void setup_rt_frame32(int sig, st /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { - regs->gprs[14] = (__u64) ka->sa.sa_restorer; + if (ka_copy->sa.sa_flags & SA_RESTORER) { + regs->gprs[14] = (__u64) ka_copy->sa.sa_restorer; } else { regs->gprs[14] = (__u64) frame->retcode; err |= __put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, @@ -540,7 +541,7 @@ static void setup_rt_frame32(int sig, st /* Set up registers for signal handler */ regs->gprs[15] = (__u64) frame; - regs->psw.addr = (__u64) ka->sa.sa_handler; + regs->psw.addr = (__u64) ka_copy->sa.sa_handler; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (__u64) &frame->info; @@ -549,7 +550,7 @@ static void setup_rt_frame32(int sig, st give_sigsegv: if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + ka_copy->sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } @@ -558,23 +559,22 @@ give_sigsegv: */ void -handle_signal32(unsigned long sig, siginfo_t *info, sigset_t *oldset, - struct pt_regs * regs) +handle_signal32(unsigned long sig, struct k_sigaction *ka_copy, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) { - struct k_sigaction *ka = ¤t->sighand->action[sig-1]; - /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame32(sig, ka, info, oldset, regs); + if (ka_copy->sa.sa_flags & SA_SIGINFO) + setup_rt_frame32(sig, ka_copy, info, oldset, regs); else - setup_frame32(sig, ka, oldset, regs); + setup_frame32(sig, ka_copy, oldset, regs); - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; + if (ka_copy->sa.sa_flags & SA_ONESHOT) + ka_copy->sa.sa_handler = SIG_DFL; - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (!(ka_copy->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked,¤t->blocked, + &ka_copy->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); --- linux-2.6.6-rc1/arch/s390/kernel/compat_wrapper.S 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/s390/kernel/compat_wrapper.S 2004-04-18 22:25:59.133840768 -0700 @@ -641,14 +641,14 @@ sys32_getdents_wrapper: llgfr %r4,%r4 # unsigned int jg sys32_getdents # branch to system call - .globl sys32_select_wrapper -sys32_select_wrapper: + .globl compat_sys_select_wrapper +compat_sys_select_wrapper: lgfr %r2,%r2 # int - llgtr %r3,%r3 # fd_set * - llgtr %r4,%r4 # fd_set * - llgtr %r5,%r5 # fd_set * - llgtr %r6,%r6 # struct timeval_emu31 * - jg sys32_select # branch to system call + llgtr %r3,%r3 # compat_fd_set * + llgtr %r4,%r4 # compat_fd_set * + llgtr %r5,%r5 # compat_fd_set * + llgtr %r6,%r6 # struct compat_timeval * + jg compat_sys_select # branch to system call .globl sys32_flock_wrapper sys32_flock_wrapper: @@ -663,19 +663,19 @@ sys32_msync_wrapper: lgfr %r4,%r4 # int jg sys_msync # branch to system call - .globl sys32_readv_wrapper -sys32_readv_wrapper: + .globl compat_sys_readv_wrapper +compat_sys_readv_wrapper: lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct iovec_emu31 * + llgtr %r3,%r3 # const struct compat_iovec * llgfr %r4,%r4 # unsigned long - jg sys32_readv # branch to system call + jg compat_sys_readv # branch to system call - .globl sys32_writev_wrapper -sys32_writev_wrapper: + .globl compat_sys_writev_wrapper +compat_sys_writev_wrapper: lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct iovec_emu31 * + llgtr %r3,%r3 # const struct compat_iovec * llgfr %r4,%r4 # unsigned long - jg sys32_writev # branch to system call + jg compat_sys_writev # branch to system call .globl sys32_getsid_wrapper sys32_getsid_wrapper: @@ -786,12 +786,12 @@ sys32_poll_wrapper: lgfr %r4,%r4 # long jg sys_poll # branch to system call - .globl sys32_nfsservctl_wrapper -sys32_nfsservctl_wrapper: + .globl compat_sys_nfsservctl_wrapper +compat_sys_nfsservctl_wrapper: lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct nfsctl_arg_emu31 * - llgtr %r4,%r4 # union nfsctl_res_emu31 * - jg sys32_nfsservctl # branch to system call + llgtr %r3,%r3 # struct compat_nfsctl_arg* + llgtr %r4,%r4 # union compat_nfsctl_res* + jg compat_sys_nfsservctl # branch to system call .globl sys32_setresgid16_wrapper sys32_setresgid16_wrapper: @@ -1352,3 +1352,47 @@ compat_sys_fstatfs64_wrapper: llgfr %r3,%r3 # compat_size_t llgtr %r4,%r4 # struct compat_statfs64 * jg compat_fstatfs64 + + .globl compat_sys_mq_open_wrapper +compat_sys_mq_open_wrapper: + llgtr %r2,%r2 # const char * + lgfr %r3,%r3 # int + llgfr %r4,%r4 # mode_t + llgtr %r5,%r5 # struct compat_mq_attr * + jg compat_sys_mq_open + + .globl sys_mq_unlink_wrapper +sys32_mq_unlink_wrapper: + llgtr %r2,%r2 # const char * + jg sys_mq_unlink + + .globl compat_sys_mq_timedsend_wrapper +compat_sys_mq_timedsend_wrapper: + lgfr %r2,%r2 # mqd_t + llgtr %r3,%r3 # const char * + llgfr %r4,%r4 # size_t + llgfr %r5,%r5 # unsigned int + llgtr %r6,%r6 # const struct compat_timespec * + jg compat_sys_mq_timedsend + + .globl compat_sys_mq_timedreceive_wrapper +compat_sys_mq_timedreceive_wrapper: + lgfr %r2,%r2 # mqd_t + llgtr %r3,%r3 # char * + llgfr %r4,%r4 # size_t + llgtr %r5,%r5 # unsigned int * + llgtr %r6,%r6 # const struct compat_timespec * + jg compat_sys_mq_timedreceive + + .globl compat_sys_mq_notify_wrapper +compat_sys_mq_notify_wrapper: + lgfr %r2,%r2 # mqd_t + llgtr %r3,%r3 # struct compat_sigevent * + jg compat_sys_mq_notify + + .globl compat_sys_mq_getsetattr_wrapper +compat_sys_mq_getsetattr_wrapper: + lgfr %r2,%r2 # mqd_t + llgtr %r3,%r3 # struct compat_mq_attr * + llgtr %r4,%r4 # struct compat_mq_attr * + jg compat_sys_mq_getsetattr --- linux-2.6.6-rc1/arch/s390/kernel/entry64.S 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/s390/kernel/entry64.S 2004-04-18 22:25:24.659081728 -0700 @@ -227,7 +227,7 @@ sysc_do_restart: larl %r10,sys_call_table_emu # use 31 bit emulation system calls sysc_noemu: #endif - tm __TI_flags+7(%r9),_TIF_SYSCALL_TRACE + tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) lgf %r8,0(%r7,%r10) # load address of system call routine jo sysc_tracesys basr %r14,%r8 # call sys_xxxx @@ -299,6 +299,8 @@ __critical_end: # special linkage: %r12 contains the return address for trace_svc # sysc_tracesys: + la %r2,SP_PTREGS(%r15) # load pt_regs + la %r3,0 srl %r7,2 stg %r7,SP_R2(%r15) brasl %r14,syscall_trace @@ -314,8 +316,10 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+7(%r9),_TIF_SYSCALL_TRACE + tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) jno sysc_return + la %r2,SP_PTREGS(%r15) # load pt_regs + la %r3,1 larl %r14,sysc_return # return point is sysc_return jg syscall_trace @@ -541,7 +545,7 @@ pgm_svcstd: larl %r10,sys_call_table_emu # use 31 bit emulation system calls pgm_svcper_noemu: #endif - tm __TI_flags+7(%r9),_TIF_SYSCALL_TRACE + tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) lgf %r8,0(%r7,%r10) # load address of system call routine jo pgm_tracesys basr %r14,%r8 # call sys_xxxx @@ -566,6 +570,8 @@ pgm_svcper_nosig: # call trace before and after sys_call # pgm_tracesys: + la %r2,SP_PTREGS(%r15) # load pt_regs + la %r3,0 srlg %r7,%r7,2 stg %r7,SP_R2(%r15) brasl %r14,syscall_trace @@ -581,8 +587,10 @@ pgm_svc_go: basr %r14,%r8 # call sys_xxx stg %r2,SP_R2(%r15) # store return value pgm_svc_nogo: - tm __TI_flags+7(%r9),_TIF_SYSCALL_TRACE + tm __TI_flags+7(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) jno pgm_svcret + la %r2,SP_PTREGS(%r15) # load pt_regs + la %r3,1 larl %r14,pgm_svcret # return point is sysc_return jg syscall_trace --- linux-2.6.6-rc1/arch/s390/kernel/entry.S 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/s390/kernel/entry.S 2004-04-18 22:25:24.658081880 -0700 @@ -235,7 +235,7 @@ sysc_enter: lr %r7,%r1 # copy svc number to %r7 sla %r7,2 # *4 sysc_do_restart: - tm __TI_flags+3(%r9),_TIF_SYSCALL_TRACE + tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) l %r8,sys_call_table-system_call(%r7,%r13) # get system call addr. bo BASED(sysc_tracesys) basr %r14,%r8 # call sys_xxxx @@ -309,6 +309,8 @@ __critical_end: # sysc_tracesys: l %r1,BASED(.Ltrace) + la %r2,SP_PTREGS(%r15) # load pt_regs + la %r3,0 srl %r7,2 st %r7,SP_R2(%r15) basr %r14,%r1 @@ -323,9 +325,11 @@ sysc_tracego: basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value sysc_tracenogo: - tm __TI_flags+3(%r9),_TIF_SYSCALL_TRACE + tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) bno BASED(sysc_return) l %r1,BASED(.Ltrace) + la %r2,SP_PTREGS(%r15) # load pt_regs + la %r3,1 la %r14,BASED(sysc_return) br %r1 @@ -502,7 +506,7 @@ pgm_svcper: lr %r7,%r1 # copy svc number to %r7 sla %r7,2 # *4 pgm_svcstd: - tm __TI_flags+3(%r9),_TIF_SYSCALL_TRACE + tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) l %r8,sys_call_table-system_call(%r7,%r13) # get system call addr. bo BASED(pgm_tracesys) basr %r14,%r8 # call sys_xxxx @@ -529,6 +533,8 @@ pgm_svcper_nosig: # pgm_tracesys: l %r1,BASED(.Ltrace) + la %r2,SP_PTREGS(%r15) # load pt_regs + la %r3,0 srl %r7,2 st %r7,SP_R2(%r15) basr %r14,%r1 @@ -543,9 +549,11 @@ pgm_svc_go: basr %r14,%r8 # call sys_xxx st %r2,SP_R2(%r15) # store return value pgm_svc_nogo: - tm __TI_flags+3(%r9),_TIF_SYSCALL_TRACE + tm __TI_flags+3(%r9),(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT) bno BASED(pgm_svcret) l %r1,BASED(.Ltrace) + la %r2,SP_PTREGS(%r15) # load pt_regs + la %r3,1 la %r14,BASED(pgm_svcret) br %r1 --- linux-2.6.6-rc1/arch/s390/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/s390/kernel/process.c 2004-04-18 22:25:54.254582528 -0700 @@ -381,12 +381,6 @@ void dump_thread(struct pt_regs * regs, dump->regs.per_info = current->thread.per_info; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { unsigned long r14, r15, bc; @@ -409,12 +403,10 @@ unsigned long get_wchan(struct task_stru #else r14 = *(unsigned long *) (bc+112); #endif - if (r14 < first_sched || r14 >= last_sched) + if (!in_sched_functions(r14)) return r14; bc = (*(unsigned long *) bc) & PSW_ADDR_INSN; } while (count++ < 16); return 0; } -#undef last_sched -#undef first_sched --- linux-2.6.6-rc1/arch/s390/kernel/ptrace.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/s390/kernel/ptrace.c 2004-04-18 22:25:24.660081576 -0700 @@ -690,8 +690,16 @@ out: } asmlinkage void -syscall_trace(void) +syscall_trace(struct pt_regs *regs, int entryexit) { + if (unlikely(current->audit_context)) { + if (!entryexit) + audit_syscall_entry(current, regs->gprs[2], + regs->orig_gpr2, regs->gprs[3], + regs->gprs[4], regs->gprs[5]); + else + audit_syscall_exit(current, regs->gprs[2]); + } if (!test_thread_flag(TIF_SYSCALL_TRACE)) return; if (!(current->ptrace & PT_PTRACED)) --- linux-2.6.6-rc1/arch/s390/kernel/setup.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/s390/kernel/setup.c 2004-04-18 22:25:35.897373248 -0700 @@ -74,7 +74,6 @@ extern int _text,_etext, _edata, _end; #include static char command_line[COMMAND_LINE_SIZE] = { 0, }; - char saved_command_line[COMMAND_LINE_SIZE]; static struct resource code_resource = { "Kernel code", 0x100000, 0 }; static struct resource data_resource = { "Kernel data", 0, 0 }; --- linux-2.6.6-rc1/arch/s390/kernel/signal.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/s390/kernel/signal.c 2004-04-18 22:25:45.605897328 -0700 @@ -303,10 +303,10 @@ static inline int map_signal(int sig) return sig; } -static void setup_frame(int sig, struct k_sigaction *ka, +static void setup_frame(int sig, struct k_sigaction *ka_copy, sigset_t *set, struct pt_regs * regs) { - sigframe *frame = get_sigframe(ka, regs, sizeof(sigframe)); + sigframe *frame = get_sigframe(ka_copy, regs, sizeof(sigframe)); if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe))) goto give_sigsegv; @@ -320,9 +320,9 @@ static void setup_frame(int sig, struct /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { + if (ka_copy->sa.sa_flags & SA_RESTORER) { regs->gprs[14] = (unsigned long) - ka->sa.sa_restorer | PSW_ADDR_AMODE; + ka_copy->sa.sa_restorer | PSW_ADDR_AMODE; } else { regs->gprs[14] = (unsigned long) frame->retcode | PSW_ADDR_AMODE; @@ -337,7 +337,7 @@ static void setup_frame(int sig, struct /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; - regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; + regs->psw.addr = (unsigned long) ka_copy->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (unsigned long) &frame->sc; @@ -350,15 +350,15 @@ static void setup_frame(int sig, struct give_sigsegv: if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + ka_copy->sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static void setup_rt_frame(int sig, struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { int err = 0; - rt_sigframe *frame = get_sigframe(ka, regs, sizeof(rt_sigframe)); + rt_sigframe *frame = get_sigframe(ka_copy, regs, sizeof(rt_sigframe)); if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe))) goto give_sigsegv; @@ -379,9 +379,9 @@ static void setup_rt_frame(int sig, stru /* Set up to return from userspace. If provided, use a stub already in userspace. */ - if (ka->sa.sa_flags & SA_RESTORER) { + if (ka_copy->sa.sa_flags & SA_RESTORER) { regs->gprs[14] = (unsigned long) - ka->sa.sa_restorer | PSW_ADDR_AMODE; + ka_copy->sa.sa_restorer | PSW_ADDR_AMODE; } else { regs->gprs[14] = (unsigned long) frame->retcode | PSW_ADDR_AMODE; @@ -395,7 +395,7 @@ static void setup_rt_frame(int sig, stru /* Set up registers for signal handler */ regs->gprs[15] = (unsigned long) frame; - regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE; + regs->psw.addr = (unsigned long) ka_copy->sa.sa_handler | PSW_ADDR_AMODE; regs->gprs[2] = map_signal(sig); regs->gprs[3] = (unsigned long) &frame->info; @@ -404,7 +404,7 @@ static void setup_rt_frame(int sig, stru give_sigsegv: if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + ka_copy->sa.sa_handler = SIG_DFL; force_sig(SIGSEGV, current); } @@ -413,23 +413,22 @@ give_sigsegv: */ static void -handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset, - struct pt_regs * regs) +handle_signal(unsigned long sig, struct k_sigaction *ka_copy, + siginfo_t *info, sigset_t *oldset, struct pt_regs * regs) { - struct k_sigaction *ka = ¤t->sighand->action[sig-1]; - /* Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); + if (ka_copy->sa.sa_flags & SA_SIGINFO) + setup_rt_frame(sig, ka_copy, info, oldset, regs); else - setup_frame(sig, ka, oldset, regs); + setup_frame(sig, ka_copy, oldset, regs); - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; + if (ka_copy->sa.sa_flags & SA_ONESHOT) + ka_copy->sa.sa_handler = SIG_DFL; - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (!(ka_copy->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked,¤t->blocked, + &ka_copy->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); @@ -450,6 +449,7 @@ int do_signal(struct pt_regs *regs, sigs unsigned long retval = 0, continue_addr = 0, restart_addr = 0; siginfo_t info; int signr; + struct k_sigaction ka_copy; /* * We want the common case to go fast, which @@ -483,7 +483,7 @@ int do_signal(struct pt_regs *regs, sigs /* Get signal to deliver. When running under ptrace, at this point the debugger may change all our registers ... */ - signr = get_signal_to_deliver(&info, regs, NULL); + signr = get_signal_to_deliver(&info, &ka_copy, regs, NULL); /* Depending on the signal settings we may need to revert the decision to restart the system call. */ @@ -502,14 +502,15 @@ int do_signal(struct pt_regs *regs, sigs #ifdef CONFIG_S390_SUPPORT if (test_thread_flag(TIF_31BIT)) { extern void handle_signal32(unsigned long sig, + struct k_sigaction *ka_copy, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs); - handle_signal32(signr, &info, oldset, regs); + handle_signal32(signr, &ka_copy, &info, oldset, regs); return 1; } #endif - handle_signal(signr, &info, oldset, regs); + handle_signal(signr, &ka_copy, &info, oldset, regs); return 1; } --- linux-2.6.6-rc1/arch/s390/kernel/syscalls.S 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/s390/kernel/syscalls.S 2004-04-18 22:25:59.133840768 -0700 @@ -150,11 +150,11 @@ SYSCALL(sys_setfsuid16,sys_ni_syscall,sy SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */ SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */ SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) -SYSCALL(sys_select,sys_select,sys32_select_wrapper) +SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper) SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper) -SYSCALL(sys_readv,sys_readv,sys32_readv_wrapper) /* 145 */ -SYSCALL(sys_writev,sys_writev,sys32_writev_wrapper) +SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */ +SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper) SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper) SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper) SYSCALL(sys_sysctl,sys_sysctl,sys32_sysctl_wrapper) @@ -177,7 +177,7 @@ SYSCALL(sys_getresuid16,sys_ni_syscall,s NI_SYSCALL /* for vm86 */ NI_SYSCALL /* old sys_query_module */ SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) -SYSCALL(sys_nfsservctl,sys_nfsservctl,sys32_nfsservctl_wrapper) +SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper) SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */ SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */ SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) @@ -275,3 +275,13 @@ NI_SYSCALL /* reserved for vserver SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) +NI_SYSCALL /* 267 new sys_remap_file_pages */ +NI_SYSCALL /* 268 sys_mbind */ +NI_SYSCALL /* 269 sys_get_mempolicy */ +NI_SYSCALL /* 270 sys_set_mempolicy */ +SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper) +SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper) +SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper) +SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper) +SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) +SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper) --- linux-2.6.6-rc1/arch/sh/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/sh/kernel/process.c 2004-04-18 22:25:54.254582528 -0700 @@ -461,12 +461,6 @@ out: return error; } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { unsigned long schedule_frame; @@ -479,7 +473,7 @@ unsigned long get_wchan(struct task_stru * The same comment as on the Alpha applies here, too ... */ pc = thread_saved_pc(p); - if (pc >= first_sched && pc < last_sched) { + if (in_sched_functions(pc)) { schedule_frame = ((unsigned long *)(long)p->thread.sp)[1]; return (unsigned long)((unsigned long *)schedule_frame)[1]; } --- linux-2.6.6-rc1/arch/sh/kernel/setup.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sh/kernel/setup.c 2004-04-18 22:25:35.898373096 -0700 @@ -85,14 +85,12 @@ static struct sh_machine_vector* __init #define INITRD_SIZE (*(unsigned long *) (PARAM+0x014)) /* ... */ #define COMMAND_LINE ((char *) (PARAM+0x100)) -#define COMMAND_LINE_SIZE 256 #define RAMDISK_IMAGE_START_MASK 0x07FF #define RAMDISK_PROMPT_FLAG 0x8000 #define RAMDISK_LOAD_FLAG 0x4000 static char command_line[COMMAND_LINE_SIZE] = { 0, }; - char saved_command_line[COMMAND_LINE_SIZE]; struct resource standard_io_resources[] = { { "dma1", 0x00, 0x1f }, --- linux-2.6.6-rc1/arch/sh/mm/hugetlbpage.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/sh/mm/hugetlbpage.c 2004-04-18 22:25:59.347808240 -0700 @@ -166,15 +166,9 @@ int follow_hugetlb_page(struct mm_struct } struct page *follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) { - return NULL; -} - -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - return NULL; + return ERR_PTR(-EINVAL); } int pmd_huge(pmd_t pmd) --- linux-2.6.6-rc1/arch/sparc64/Kconfig 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/sparc64/Kconfig 2004-04-18 22:25:47.916546056 -0700 @@ -687,12 +687,19 @@ config DEBUG_BOOTMEM depends on DEBUG_KERNEL bool "Debug BOOTMEM initialization" +config LOCKMETER + bool "Kernel lock metering" + depends on SMP && !PREEMPT + help + Say Y to enable kernel lock metering, which adds overhead to SMP locks, + but allows you to see various statistics using the lockstat command. + # We have a custom atomic_dec_and_lock() implementation but it's not # compatible with spinlock debugging so we need to fall back on # the generic version in that case. config HAVE_DEC_LOCK bool - depends on SMP && !DEBUG_SPINLOCK + depends on SMP && !DEBUG_SPINLOCK && !LOCKMETER default y config MCOUNT @@ -700,6 +707,11 @@ config MCOUNT depends on STACK_DEBUG default y +config FRAME_POINTER + bool + depends on MCOUNT + default y + endmenu source "security/Kconfig" --- linux-2.6.6-rc1/arch/sparc64/kernel/ioctl32.c 2004-03-10 20:41:26.000000000 -0800 +++ 25/arch/sparc64/kernel/ioctl32.c 2004-04-18 22:25:24.664080968 -0700 @@ -1106,17 +1106,6 @@ COMPATIBLE_IOCTL(DRM_IOCTL_FINISH) COMPATIBLE_IOCTL(WIOCSTART) COMPATIBLE_IOCTL(WIOCSTOP) COMPATIBLE_IOCTL(WIOCGSTAT) -COMPATIBLE_IOCTL(HCIUARTSETPROTO) -COMPATIBLE_IOCTL(HCIUARTGETPROTO) -COMPATIBLE_IOCTL(RFCOMMCREATEDEV) -COMPATIBLE_IOCTL(RFCOMMRELEASEDEV) -COMPATIBLE_IOCTL(RFCOMMGETDEVLIST) -COMPATIBLE_IOCTL(RFCOMMGETDEVINFO) -COMPATIBLE_IOCTL(RFCOMMSTEALDLC) -COMPATIBLE_IOCTL(BNEPCONNADD) -COMPATIBLE_IOCTL(BNEPCONNDEL) -COMPATIBLE_IOCTL(BNEPGETCONNLIST) -COMPATIBLE_IOCTL(BNEPGETCONNINFO) /* And these ioctls need translation */ /* NCPFS */ HANDLE_IOCTL(NCP_IOC_NCPREQUEST_32, do_ncp_ncprequest) --- linux-2.6.6-rc1/arch/sparc64/kernel/Makefile 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/sparc64/kernel/Makefile 2004-04-18 22:25:24.663081120 -0700 @@ -3,7 +3,7 @@ # EXTRA_AFLAGS := -ansi -EXTRA_CFLAGS := -Werror +# EXTRA_CFLAGS := -Werror extra-y := head.o init_task.o vmlinux.lds.s --- linux-2.6.6-rc1/arch/sparc64/kernel/power.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/arch/sparc64/kernel/power.c 2004-04-18 22:25:24.664080968 -0700 @@ -20,6 +20,12 @@ #define __KERNEL_SYSCALLS__ #include +/* + * sysctl - toggle power-off restriction for serial console + * systems in machine_power_off() + */ +int scons_pwroff = 1; + #ifdef CONFIG_PCI static unsigned long power_reg = 0UL; @@ -44,7 +50,7 @@ static void (*poweroff_method)(void) = m void machine_power_off(void) { - if (!serial_console) { + if (!serial_console || scons_pwroff) { #ifdef CONFIG_PCI if (power_reg != 0UL) { /* Both register bits seem to have the --- linux-2.6.6-rc1/arch/sparc64/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/sparc64/kernel/process.c 2004-04-18 22:25:54.255582376 -0700 @@ -847,8 +847,7 @@ unsigned long get_wchan(struct task_stru break; rw = (struct reg_window *) fp; pc = rw->ins[7]; - if (pc < ((unsigned long) scheduling_functions_start_here) || - pc >= ((unsigned long) scheduling_functions_end_here)) { + if (!in_sched_functions(pc)) { ret = pc; goto out; } --- linux-2.6.6-rc1/arch/sparc64/kernel/setup.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/sparc64/kernel/setup.c 2004-04-18 22:25:35.899372944 -0700 @@ -451,8 +451,7 @@ extern unsigned short ram_flags; extern int root_mountflags; -char saved_command_line[256]; -char reboot_command[256]; +char reboot_command[COMMAND_LINE_SIZE]; static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 }; --- linux-2.6.6-rc1/arch/sparc64/kernel/signal32.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/sparc64/kernel/signal32.c 2004-04-18 22:25:24.665080816 -0700 @@ -129,8 +129,8 @@ int copy_siginfo_to_user32(siginfo_t32 _ err |= __put_user(from->si_trapno, &to->si_trapno); err |= __put_user((long)from->si_addr, &to->si_addr); break; - case __SI_RT: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ: + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); --- linux-2.6.6-rc1/arch/sparc64/kernel/sparc64_ksyms.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/sparc64/kernel/sparc64_ksyms.c 2004-04-18 22:25:35.899372944 -0700 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -76,7 +77,6 @@ extern int __memcmp(const void *, const extern int __strncmp(const char *, const char *, __kernel_size_t); extern __kernel_size_t __strlen(const char *); extern __kernel_size_t strlen(const char *); -extern char saved_command_line[]; extern void linux_sparc_syscall(void); extern void rtrap(void); extern void show_regs(struct pt_regs *); --- linux-2.6.6-rc1/arch/sparc64/kernel/sys_sparc32.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/sparc64/kernel/sys_sparc32.c 2004-04-18 22:25:59.136840312 -0700 @@ -832,182 +832,6 @@ asmlinkage int sys32_ftruncate64(unsigne return sys_ftruncate(fd, (high << 32) | low); } -typedef ssize_t (*io_fn_t)(struct file *, char *, size_t, loff_t *); -typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); - -static long do_readv_writev32(int type, struct file *file, - const struct compat_iovec *vector, u32 count) -{ - compat_ssize_t tot_len; - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov=iovstack, *ivp; - struct inode *inode; - long retval, i; - io_fn_t fn; - iov_fn_t fnv; - - /* - * SuS says "The readv() function *may* fail if the iovcnt argument - * was less than or equal to 0, or greater than {IOV_MAX}. Linux has - * traditionally returned zero for zero segments, so... - */ - retval = 0; - if (count == 0) - goto out; - - /* First get the "struct iovec" from user memory and - * verify all the pointers - */ - retval = -EINVAL; - if (count > UIO_MAXIOV) - goto out; - if (!file->f_op) - goto out; - if (count > UIO_FASTIOV) { - retval = -ENOMEM; - iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - goto out; - } - retval = -EFAULT; - if (verify_area(VERIFY_READ, vector, sizeof(struct compat_iovec)*count)) - goto out; - - /* - * Single unix specification: - * We should -EINVAL if an element length is not >= 0 and fitting an - * ssize_t. The total length is fitting an ssize_t - * - * Be careful here because iov_len is a size_t not an ssize_t - */ - tot_len = 0; - i = count; - ivp = iov; - retval = -EINVAL; - while(i > 0) { - compat_ssize_t tmp = tot_len; - compat_ssize_t len; - u32 buf; - - if (__get_user(len, &vector->iov_len) || - __get_user(buf, &vector->iov_base)) { - retval = -EFAULT; - goto out; - } - if (len < 0) /* size_t not fitting an ssize_t32 .. */ - goto out; - tot_len += len; - if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ - goto out; - ivp->iov_base = (void *)A(buf); - ivp->iov_len = (__kernel_size_t) len; - vector++; - ivp++; - i--; - } - if (tot_len == 0) { - retval = 0; - goto out; - } - - inode = file->f_dentry->d_inode; - /* VERIFY_WRITE actually means a read, as we write to user space */ - retval = locks_verify_area((type == READ - ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), - inode, file, file->f_pos, tot_len); - if (retval) - goto out; - - if (type == READ) { - fn = file->f_op->read; - fnv = file->f_op->readv; - } else { - fn = (io_fn_t)file->f_op->write; - fnv = file->f_op->writev; - } - if (fnv) { - retval = fnv(file, iov, count, &file->f_pos); - goto out; - } - - /* Do it by hand, with file-ops */ - ivp = iov; - while (count > 0) { - void * base; - int len, nr; - - base = ivp->iov_base; - len = ivp->iov_len; - ivp++; - count--; - - nr = fn(file, base, len, &file->f_pos); - - if (nr < 0) { - if (!retval) - retval = nr; - break; - } - retval += nr; - if (nr != len) - break; - } -out: - if (iov != iovstack) - kfree(iov); - if ((retval + (type == READ)) > 0) - dnotify_parent(file->f_dentry, - (type == READ) ? DN_ACCESS : DN_MODIFY); - - return retval; -} - -asmlinkage long sys32_readv(int fd, struct compat_iovec *vector, u32 count) -{ - struct file *file; - int ret; - - file = fget(fd); - if(!file) - return -EBADF; - - ret = -EBADF; - if (!(file->f_mode & FMODE_READ)) - goto out; - ret = -EINVAL; - if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) - goto out; - - ret = do_readv_writev32(READ, file, vector, count); - -out: - fput(file); - return ret; -} - -asmlinkage long sys32_writev(int fd, struct compat_iovec *vector, u32 count) -{ - struct file *file; - int ret; - - file = fget(fd); - if(!file) - return -EBADF; - - ret = -EBADF; - if (!(file->f_mode & FMODE_WRITE)) - goto out; - ret = -EINVAL; - if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) - goto out; - - ret = do_readv_writev32(WRITE, file, vector, count); - -out: - fput(file); - return ret; -} - /* readdir & getdents */ #define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de))) @@ -1140,158 +964,6 @@ out: /* end of readdir & getdents */ -/* - * Ooo, nasty. We need here to frob 32-bit unsigned longs to - * 64-bit unsigned longs. - */ - -static int get_fd_set32(unsigned long n, unsigned long *fdset, u32 *ufdset) -{ - if (ufdset) { - unsigned long odd; - - if (verify_area(VERIFY_WRITE, ufdset, n*sizeof(u32))) - return -EFAULT; - - odd = n & 1UL; - n &= ~1UL; - while (n) { - unsigned long h, l; - __get_user(l, ufdset); - __get_user(h, ufdset+1); - ufdset += 2; - *fdset++ = h << 32 | l; - n -= 2; - } - if (odd) - __get_user(*fdset, ufdset); - } else { - /* Tricky, must clear full unsigned long in the - * kernel fdset at the end, this makes sure that - * actually happens. - */ - memset(fdset, 0, ((n + 1) & ~1)*sizeof(u32)); - } - return 0; -} - -static void set_fd_set32(unsigned long n, u32 *ufdset, unsigned long *fdset) -{ - unsigned long odd; - - if (!ufdset) - return; - - odd = n & 1UL; - n &= ~1UL; - while (n) { - unsigned long h, l; - l = *fdset++; - h = l >> 32; - __put_user(l, ufdset); - __put_user(h, ufdset+1); - ufdset += 2; - n -= 2; - } - if (odd) - __put_user(*fdset, ufdset); -} - -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) - -asmlinkage int sys32_select(int n, u32 *inp, u32 *outp, u32 *exp, u32 tvp_x) -{ - fd_set_bits fds; - struct compat_timeval *tvp = (struct compat_timeval *)AA(tvp_x); - char *bits; - unsigned long nn; - long timeout; - int ret, size; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (tvp) { - time_t sec, usec; - - if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp))) - || (ret = __get_user(sec, &tvp->tv_sec)) - || (ret = __get_user(usec, &tvp->tv_usec))) - goto out_nofds; - - ret = -EINVAL; - if(sec < 0 || usec < 0) - goto out_nofds; - - if ((unsigned long) sec < MAX_SELECT_SECONDS) { - timeout = (usec + 1000000/HZ - 1) / (1000000/HZ); - timeout += sec * (unsigned long) HZ; - } - } - - ret = -EINVAL; - if (n < 0) - goto out_nofds; - if (n > current->files->max_fdset) - n = current->files->max_fdset; - - /* - * We need 6 bitmaps (in/out/ex for both incoming and outgoing), - * since we used fdset we need to allocate memory in units of - * long-words. - */ - ret = -ENOMEM; - size = FDS_BYTES(n); - bits = kmalloc(6 * size, GFP_KERNEL); - if (!bits) - goto out_nofds; - fds.in = (unsigned long *) bits; - fds.out = (unsigned long *) (bits + size); - fds.ex = (unsigned long *) (bits + 2*size); - fds.res_in = (unsigned long *) (bits + 3*size); - fds.res_out = (unsigned long *) (bits + 4*size); - fds.res_ex = (unsigned long *) (bits + 5*size); - - nn = (n + 8*sizeof(u32) - 1) / (8*sizeof(u32)); - if ((ret = get_fd_set32(nn, fds.in, inp)) || - (ret = get_fd_set32(nn, fds.out, outp)) || - (ret = get_fd_set32(nn, fds.ex, exp))) - goto out; - zero_fd_set(n, fds.res_in); - zero_fd_set(n, fds.res_out); - zero_fd_set(n, fds.res_ex); - - ret = do_select(n, &fds, &timeout); - - if (tvp && !(current->personality & STICKY_TIMEOUTS)) { - time_t sec = 0, usec = 0; - if (timeout) { - sec = timeout / HZ; - usec = timeout % HZ; - usec *= (1000000/HZ); - } - put_user(sec, &tvp->tv_sec); - put_user(usec, &tvp->tv_usec); - } - - if (ret < 0) - goto out; - if (!ret) { - ret = -ERESTARTNOHAND; - if (signal_pending(current)) - goto out; - ret = 0; - } - - set_fd_set32(nn, inp, fds.res_in); - set_fd_set32(nn, outp, fds.res_out); - set_fd_set32(nn, exp, fds.res_ex); - -out: - kfree(bits); -out_nofds: - return ret; -} - int cp_compat_stat(struct kstat *stat, struct compat_stat *statbuf) { int err; @@ -1562,8 +1234,6 @@ sys32_rt_sigqueueinfo(int pid, int sig, return ret; } -extern void check_pending(int signum); - asmlinkage int sys32_sigaction (int sig, struct old_sigaction32 *act, struct old_sigaction32 *oact) { struct k_sigaction new_ka, old_ka; @@ -1659,193 +1329,6 @@ sys32_rt_sigaction(int sig, struct sigac return ret; } - -/* - * count32() counts the number of arguments/envelopes - */ -static int count32(u32 * argv, int max) -{ - int i = 0; - - if (argv != NULL) { - for (;;) { - u32 p; int error; - - error = get_user(p,argv); - if (error) - return error; - if (!p) - break; - argv++; - if (++i > max) - return -E2BIG; - } - } - return i; -} - -/* - * 'copy_string32()' copies argument/envelope strings from user - * memory to free pages in kernel mem. These are in a format ready - * to be put directly into the top of new user memory. - */ -static int copy_strings32(int argc, u32 * argv, struct linux_binprm *bprm) -{ - while (argc-- > 0) { - u32 str; - int len; - unsigned long pos; - - if (get_user(str, argv + argc) || - !str || - !(len = strnlen_user((char *)A(str), bprm->p))) - return -EFAULT; - - if (bprm->p < len) - return -E2BIG; - - bprm->p -= len; - - pos = bprm->p; - while (len) { - char *kaddr; - struct page *page; - int offset, bytes_to_copy, new, err; - - offset = pos % PAGE_SIZE; - page = bprm->page[pos / PAGE_SIZE]; - new = 0; - if (!page) { - page = alloc_page(GFP_USER); - bprm->page[pos / PAGE_SIZE] = page; - if (!page) - return -ENOMEM; - new = 1; - } - kaddr = kmap(page); - - if (new && offset) - memset(kaddr, 0, offset); - bytes_to_copy = PAGE_SIZE - offset; - if (bytes_to_copy > len) { - bytes_to_copy = len; - if (new) - memset(kaddr+offset+len, 0, - PAGE_SIZE-offset-len); - } - - err = copy_from_user(kaddr + offset, (char *)A(str), - bytes_to_copy); - kunmap(page); - - if (err) - return -EFAULT; - - pos += bytes_to_copy; - str += bytes_to_copy; - len -= bytes_to_copy; - } - } - return 0; -} - -/* - * sys32_execve() executes a new program. - */ -static inline int -do_execve32(char * filename, u32 * argv, u32 * envp, struct pt_regs * regs) -{ - struct linux_binprm bprm; - struct file * file; - int retval; - int i; - - sched_balance_exec(); - - file = open_exec(filename); - - retval = PTR_ERR(file); - if (IS_ERR(file)) - return retval; - - bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - memset(bprm.page, 0, MAX_ARG_PAGES * sizeof(bprm.page[0])); - - bprm.file = file; - bprm.filename = filename; - bprm.interp = filename; - bprm.sh_bang = 0; - bprm.loader = 0; - bprm.exec = 0; - bprm.security = NULL; - bprm.mm = mm_alloc(); - retval = -ENOMEM; - if (!bprm.mm) - goto out_file; - - retval = init_new_context(current, bprm.mm); - if (retval < 0) - goto out_mm; - - bprm.argc = count32(argv, bprm.p / sizeof(u32)); - if ((retval = bprm.argc) < 0) - goto out_mm; - - bprm.envc = count32(envp, bprm.p / sizeof(u32)); - if ((retval = bprm.envc) < 0) - goto out_mm; - - retval = security_bprm_alloc(&bprm); - if (retval) - goto out; - - retval = prepare_binprm(&bprm); - if (retval < 0) - goto out; - - retval = copy_strings_kernel(1, &bprm.filename, &bprm); - if (retval < 0) - goto out; - - bprm.exec = bprm.p; - retval = copy_strings32(bprm.envc, envp, &bprm); - if (retval < 0) - goto out; - - retval = copy_strings32(bprm.argc, argv, &bprm); - if (retval < 0) - goto out; - - retval = search_binary_handler(&bprm, regs); - if (retval >= 0) { - /* execve success */ - security_bprm_free(&bprm); - return retval; - } - -out: - /* Something went wrong, return the inode and free the argument pages*/ - for (i = 0 ; i < MAX_ARG_PAGES ; i++) { - struct page * page = bprm.page[i]; - if (page) - __free_page(page); - } - - if (bprm.security) - security_bprm_free(&bprm); - -out_mm: - if (bprm.mm) - mmdrop(bprm.mm); - -out_file: - if (bprm.file) { - allow_write_access(bprm.file); - fput(bprm.file); - } - return retval; -} - /* * sparc32_execve() executes a new program after the asm stub has set * things up for us. This should basically do what I want it to. @@ -1865,9 +1348,9 @@ asmlinkage int sparc32_execve(struct pt_ error = PTR_ERR(filename); if(IS_ERR(filename)) goto out; - error = do_execve32(filename, - (u32 *)AA((u32)regs->u_regs[base + UREG_I1]), - (u32 *)AA((u32)regs->u_regs[base + UREG_I2]), regs); + error = compat_do_execve(filename, + compat_ptr((u32)regs->u_regs[base + UREG_I1]), + compat_ptr((u32)regs->u_regs[base + UREG_I2]), regs); putname(filename); if(!error) { @@ -1909,232 +1392,6 @@ sys32_delete_module(const char *name_use #endif /* CONFIG_MODULES */ -#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) -/* Stuff for NFS server syscalls... */ -struct nfsctl_svc32 { - u16 svc32_port; - s32 svc32_nthreads; -}; - -struct nfsctl_client32 { - s8 cl32_ident[NFSCLNT_IDMAX+1]; - s32 cl32_naddr; - struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX]; - s32 cl32_fhkeytype; - s32 cl32_fhkeylen; - u8 cl32_fhkey[NFSCLNT_KEYMAX]; -}; - -struct nfsctl_export32 { - s8 ex32_client[NFSCLNT_IDMAX+1]; - s8 ex32_path[NFS_MAXPATHLEN+1]; - compat_dev_t ex32_dev; - compat_ino_t ex32_ino; - s32 ex32_flags; - compat_uid_t ex32_anon_uid; - compat_gid_t ex32_anon_gid; -}; - -struct nfsctl_fdparm32 { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - s32 gd32_version; -}; - -struct nfsctl_fsparm32 { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - s32 gd32_maxlen; -}; - -struct nfsctl_arg32 { - s32 ca32_version; /* safeguard */ - union { - struct nfsctl_svc32 u32_svc; - struct nfsctl_client32 u32_client; - struct nfsctl_export32 u32_export; - struct nfsctl_fdparm32 u32_getfd; - struct nfsctl_fsparm32 u32_getfs; - } u; -#define ca32_svc u.u32_svc -#define ca32_client u.u32_client -#define ca32_export u.u32_export -#define ca32_getfd u.u32_getfd -#define ca32_getfs u.u32_getfs -}; - -union nfsctl_res32 { - __u8 cr32_getfh[NFS_FHSIZE]; - struct knfsd_fh cr32_getfs; -}; - -static int nfs_svc32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= __get_user(karg->ca_svc.svc_port, &arg32->ca32_svc.svc32_port); - err |= __get_user(karg->ca_svc.svc_nthreads, &arg32->ca32_svc.svc32_nthreads); - return err; -} - -static int nfs_clnt32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_client.cl_ident[0], - &arg32->ca32_client.cl32_ident[0], - NFSCLNT_IDMAX); - err |= __get_user(karg->ca_client.cl_naddr, &arg32->ca32_client.cl32_naddr); - err |= copy_from_user(&karg->ca_client.cl_addrlist[0], - &arg32->ca32_client.cl32_addrlist[0], - (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); - err |= __get_user(karg->ca_client.cl_fhkeytype, - &arg32->ca32_client.cl32_fhkeytype); - err |= __get_user(karg->ca_client.cl_fhkeylen, - &arg32->ca32_client.cl32_fhkeylen); - err |= copy_from_user(&karg->ca_client.cl_fhkey[0], - &arg32->ca32_client.cl32_fhkey[0], - NFSCLNT_KEYMAX); - return (err ? -EFAULT : 0); -} - -static int nfs_exp32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_export.ex_client[0], - &arg32->ca32_export.ex32_client[0], - NFSCLNT_IDMAX); - err |= copy_from_user(&karg->ca_export.ex_path[0], - &arg32->ca32_export.ex32_path[0], - NFS_MAXPATHLEN); - err |= __get_user(karg->ca_export.ex_dev, - &arg32->ca32_export.ex32_dev); - err |= __get_user(karg->ca_export.ex_ino, - &arg32->ca32_export.ex32_ino); - err |= __get_user(karg->ca_export.ex_flags, - &arg32->ca32_export.ex32_flags); - err |= __get_user(karg->ca_export.ex_anon_uid, - &arg32->ca32_export.ex32_anon_uid); - err |= __get_user(karg->ca_export.ex_anon_gid, - &arg32->ca32_export.ex32_anon_gid); - karg->ca_export.ex_anon_uid = high2lowuid(karg->ca_export.ex_anon_uid); - karg->ca_export.ex_anon_gid = high2lowgid(karg->ca_export.ex_anon_gid); - return (err ? -EFAULT : 0); -} - -static int nfs_getfd32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_getfd.gd_addr, - &arg32->ca32_getfd.gd32_addr, - (sizeof(struct sockaddr))); - err |= copy_from_user(&karg->ca_getfd.gd_path, - &arg32->ca32_getfd.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfd.gd_version, - &arg32->ca32_getfd.gd32_version); - return (err ? -EFAULT : 0); -} - -static int nfs_getfs32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = __get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_getfs.gd_addr, - &arg32->ca32_getfs.gd32_addr, - (sizeof(struct sockaddr))); - err |= copy_from_user(&karg->ca_getfs.gd_path, - &arg32->ca32_getfs.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= __get_user(karg->ca_getfs.gd_maxlen, - &arg32->ca32_getfs.gd32_maxlen); - return (err ? -EFAULT : 0); -} - -/* This really doesn't need translations, we are only passing - * back a union which contains opaque nfs file handle data. - */ -static int nfs_getfh32_res_trans(union nfsctl_res *kres, union nfsctl_res32 *res32) -{ - return (copy_to_user(res32, kres, sizeof(*res32)) ? -EFAULT : 0); -} - -int asmlinkage sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) -{ - struct nfsctl_arg *karg = NULL; - union nfsctl_res *kres = NULL; - mm_segment_t oldfs; - int err; - - karg = kmalloc(sizeof(*karg), GFP_USER); - if(!karg) - return -ENOMEM; - if(res32) { - kres = kmalloc(sizeof(*kres), GFP_USER); - if(!kres) { - kfree(karg); - return -ENOMEM; - } - } - switch(cmd) { - case NFSCTL_SVC: - err = nfs_svc32_trans(karg, arg32); - break; - case NFSCTL_ADDCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_DELCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_EXPORT: - case NFSCTL_UNEXPORT: - err = nfs_exp32_trans(karg, arg32); - break; - case NFSCTL_GETFD: - err = nfs_getfd32_trans(karg, arg32); - break; - case NFSCTL_GETFS: - err = nfs_getfs32_trans(karg, arg32); - break; - default: - err = -EINVAL; - break; - } - if(err) - goto done; - oldfs = get_fs(); - set_fs(KERNEL_DS); - err = sys_nfsservctl(cmd, karg, kres); - set_fs(oldfs); - - if (err) - goto done; - - if((cmd == NFSCTL_GETFD) || - (cmd == NFSCTL_GETFS)) - err = nfs_getfh32_res_trans(kres, res32); - -done: - if(karg) - kfree(karg); - if(kres) - kfree(kres); - return err; -} -#else /* !NFSD */ -int asmlinkage sys32_nfsservctl(int cmd, void *notused, void *notused2) -{ - return sys_ni_syscall(); -} -#endif - /* Translations due to time_t size differences. Which affects all sorts of things, like timeval and itimerval. */ --- linux-2.6.6-rc1/arch/sparc64/kernel/sys_sunos32.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/sparc64/kernel/sys_sunos32.c 2004-04-18 22:25:58.858882568 -0700 @@ -528,18 +528,15 @@ asmlinkage int sunos_pathconf(u32 u_path return ret; } -/* SunOS mount system call emulation */ -extern asmlinkage int -sys32_select(int n, u32 inp, u32 outp, u32 exp, u32 tvp); - asmlinkage int sunos_select(int width, u32 inp, u32 outp, u32 exp, u32 tvp_x) { int ret; /* SunOS binaries expect that select won't change the tvp contents */ - ret = sys32_select (width, inp, outp, exp, tvp_x); + ret = compat_sys_select(width, compat_ptr(inp), compat_ptr(outp), + compat_ptr(exp), compat_ptr(tvp_x)); if (ret == -EINTR && tvp_x) { - struct compat_timeval *tvp = (struct compat_timeval *)A(tvp_x); + struct compat_timeval *tvp = compat_ptr(tvp_x); time_t sec, usec; __get_user(sec, &tvp->tv_sec); @@ -1203,9 +1200,6 @@ static inline int check_nonblock(int ret return ret; } -extern asmlinkage int sys32_readv(u32 fd, u32 vector, s32 count); -extern asmlinkage int sys32_writev(u32 fd, u32 vector, s32 count); - asmlinkage int sunos_read(unsigned int fd, u32 buf, u32 count) { int ret; @@ -1218,7 +1212,7 @@ asmlinkage int sunos_readv(u32 fd, u32 v { int ret; - ret = check_nonblock(sys32_readv(fd, vector, count), fd); + ret = check_nonblock(compat_sys_readv(fd, (void*)A(vector), count), fd); return ret; } @@ -1234,7 +1228,7 @@ asmlinkage int sunos_writev(u32 fd, u32 { int ret; - ret = check_nonblock(sys32_writev(fd, vector, count), fd); + ret = check_nonblock(compat_sys_writev(fd, (void*)A(vector), count), fd); return ret; } --- linux-2.6.6-rc1/arch/sparc64/kernel/systbls.S 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/sparc64/kernel/systbls.S 2004-04-18 22:25:59.137840160 -0700 @@ -37,13 +37,13 @@ sys_call_table32: .word sys_madvise, sys_vhangup, sys32_truncate64, sys_mincore, sys32_getgroups16 /*80*/ .word sys32_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, sys32_ftruncate64 .word sys_swapon, compat_sys_getitimer, sys_setuid, sys_sethostname, sys_setgid -/*90*/ .word sys_dup2, sys_setfsuid, compat_sys_fcntl, sys32_select, sys_setfsgid +/*90*/ .word sys_dup2, sys_setfsuid, compat_sys_fcntl, compat_sys_select, sys_setfsgid .word sys_fsync, sys_setpriority32, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall /*100*/ .word sys_getpriority, sys32_rt_sigreturn, sys32_rt_sigaction, sys32_rt_sigprocmask, sys32_rt_sigpending .word sys32_rt_sigtimedwait, sys32_rt_sigqueueinfo, sys32_rt_sigsuspend, sys_setresuid, sys_getresuid /*110*/ .word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall .word sys_getgroups, sys32_gettimeofday, compat_sys_getrusage, sys_nis_syscall, sys_getcwd -/*120*/ .word sys32_readv, sys32_writev, sys32_settimeofday, sys32_fchown16, sys_fchmod +/*120*/ .word compat_sys_readv, compat_sys_writev, sys32_settimeofday, sys32_fchown16, sys_fchmod .word sys_nis_syscall, sys32_setreuid16, sys32_setregid16, sys_rename, sys_truncate /*130*/ .word sys_ftruncate, sys_flock, sys_lstat64, sys_nis_syscall, sys_nis_syscall .word sys_nis_syscall, sys_mkdir, sys_rmdir, sys32_utimes, sys_stat64 @@ -65,11 +65,11 @@ sys_call_table32: .word sys32_ipc, sys32_sigreturn, sys_clone, sys_nis_syscall, sys32_adjtimex /*220*/ .word compat_sys_sigprocmask, sys_ni_syscall, sys32_delete_module, sys_ni_syscall, sys_getpgid .word sys32_bdflush, sys32_sysfs, sys_nis_syscall, sys32_setfsuid16, sys32_setfsgid16 -/*230*/ .word sys32_select, sys_time, sys_nis_syscall, sys_stime, compat_statfs64 +/*230*/ .word compat_sys_select, sys_time, sys_nis_syscall, sys_stime, compat_statfs64 .word compat_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall /*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys32_sched_rr_get_interval, compat_sys_nanosleep -/*250*/ .word sys32_mremap, sys32_sysctl, sys_getsid, sys_fdatasync, sys32_nfsservctl +/*250*/ .word sys32_mremap, sys32_sysctl, sys_getsid, sys_fdatasync, compat_sys_nfsservctl .word sys_ni_syscall, compat_clock_settime, compat_clock_gettime, compat_clock_getres, compat_clock_nanosleep /*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, compat_timer_settime, compat_timer_gettime, sys_timer_getoverrun .word sys_timer_delete, sys32_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy --- linux-2.6.6-rc1/arch/sparc64/lib/mcount.S 2003-06-14 12:18:22.000000000 -0700 +++ 25/arch/sparc64/lib/mcount.S 2004-04-18 22:25:24.665080816 -0700 @@ -30,8 +30,9 @@ ovstack: #endif .text .align 32 - .globl mcount + .globl mcount, _mcount mcount: +_mcount: #ifdef CONFIG_STACK_DEBUG /* * Check whether %sp is dangerously low. --- linux-2.6.6-rc1/arch/sparc64/lib/rwlock.S 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/sparc64/lib/rwlock.S 2004-04-18 22:25:47.916546056 -0700 @@ -85,5 +85,20 @@ __write_trylock_succeed: __write_trylock_fail: retl mov 0, %o0 + + .globl __read_trylock +__read_trylock: /* %o0 = lock_ptr */ + ldsw [%o0], %g5 + brlz,pn %g5, 100f + add %g5, 1, %g7 + cas [%o0], %g5, %g7 + cmp %g5, %g7 + bne,pn %icc, __read_trylock + membar #StoreLoad | #StoreStore + retl + mov 1, %o0 +100: retl + mov 0, %o0 + rwlock_impl_end: --- linux-2.6.6-rc1/arch/sparc64/Makefile 2003-08-22 19:23:40.000000000 -0700 +++ 25/arch/sparc64/Makefile 2004-04-18 22:25:24.662081272 -0700 @@ -52,7 +52,6 @@ ifeq ($(INLINE_LIMIT),y) endif ifeq ($(CONFIG_MCOUNT),y) - CFLAGS := $(subst -fomit-frame-pointer,,$(CFLAGS)) CFLAGS := $(CFLAGS) -pg endif --- linux-2.6.6-rc1/arch/sparc64/mm/hugetlbpage.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/sparc64/mm/hugetlbpage.c 2004-04-18 22:25:59.347808240 -0700 @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -163,15 +164,9 @@ int follow_hugetlb_page(struct mm_struct } struct page *follow_huge_addr(struct mm_struct *mm, - struct vm_area_struct *vma, unsigned long address, int write) { - return NULL; -} - -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr) -{ - return NULL; + return ERR_PTR(-EINVAL); } int pmd_huge(pmd_t pmd) --- linux-2.6.6-rc1/arch/sparc64/mm/init.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/sparc64/mm/init.c 2004-04-18 22:25:24.667080512 -0700 @@ -224,10 +224,11 @@ void update_mmu_cache(struct vm_area_str void flush_dcache_page(struct page *page) { + struct address_space *mapping = page_mapping(page); int dirty = test_bit(PG_dcache_dirty, &page->flags); int dirty_cpu = dcache_dirty_cpu(page); - if (page_mapping(page) && !mapping_mapped(page->mapping)) { + if (mapping && !mapping_mapped(mapping)) { if (dirty) { if (dirty_cpu == smp_processor_id()) return; --- linux-2.6.6-rc1/arch/sparc/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/sparc/kernel/process.c 2004-04-18 22:26:00.148686488 -0700 @@ -56,6 +56,12 @@ void (*pm_idle)(void); */ void (*pm_power_off)(void); +/* + * sysctl - toggle power-off restriction for serial console + * systems in machine_power_off() + */ +int scons_pwroff = 1; + extern void fpsave(unsigned long *, unsigned long *, void *, unsigned long *); struct task_struct *last_task_used_math = NULL; @@ -187,7 +193,7 @@ EXPORT_SYMBOL(machine_restart); void machine_power_off(void) { #ifdef CONFIG_SUN_AUXIO - if (auxio_power_register && !serial_console) + if (auxio_power_register && (!serial_console || scons_pwroff)) *auxio_power_register |= AUXIO_POWER_OFF; #endif machine_halt(); @@ -318,7 +324,7 @@ void show_stack(struct task_struct *tsk, fp = (unsigned long) _ksp; do { /* Bogus frame pointer? */ - if (fp < (task_base + sizeof(struct task_struct)) || + if (fp < (task_base + sizeof(struct thread_info)) || fp >= (task_base + (PAGE_SIZE << 1))) break; rw = (struct reg_window *) fp; @@ -710,13 +716,12 @@ unsigned long get_wchan(struct task_stru fp = task->thread_info->ksp + bias; do { /* Bogus frame pointer? */ - if (fp < (task_base + sizeof(struct task_struct)) || + if (fp < (task_base + sizeof(struct thread_info)) || fp >= (task_base + (2 * PAGE_SIZE))) break; rw = (struct reg_window *) fp; pc = rw->ins[7]; - if (pc < ((unsigned long) scheduling_functions_start_here) || - pc >= ((unsigned long) scheduling_functions_end_here)) { + if (!in_sched_functions(pc)) { ret = pc; goto out; } --- linux-2.6.6-rc1/arch/sparc/kernel/setup.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/sparc/kernel/setup.c 2004-04-18 22:25:35.900372792 -0700 @@ -244,8 +244,7 @@ extern unsigned short ram_flags; extern int root_mountflags; -char saved_command_line[256]; -char reboot_command[256]; +char reboot_command[COMMAND_LINE_SIZE]; enum sparc_cpu sparc_cpu_model; struct tt_entry *sparc_ttable; --- linux-2.6.6-rc1/arch/sparc/kernel/sparc_ksyms.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/sparc/kernel/sparc_ksyms.c 2004-04-18 22:25:35.900372792 -0700 @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -74,7 +75,6 @@ extern void *__memscan_zero(void *, size extern void *__memscan_generic(void *, int, size_t); extern int __memcmp(const void *, const void *, __kernel_size_t); extern int __strncmp(const char *, const char *, __kernel_size_t); -extern char saved_command_line[]; extern void bcopy (const char *, char *, int); extern int __ashrdi3(int, int); --- linux-2.6.6-rc1/arch/um/kernel/user_util.c 2003-06-14 12:17:59.000000000 -0700 +++ 25/arch/um/kernel/user_util.c 2004-04-18 22:25:35.901372640 -0700 @@ -34,7 +34,6 @@ #define COMMAND_LINE_SIZE _POSIX_ARG_MAX /* Changed in linux_main and setup_arch, which run before SMP is started */ -char saved_command_line[COMMAND_LINE_SIZE] = { 0 }; char command_line[COMMAND_LINE_SIZE] = { 0 }; void add_arg(char *cmd_line, char *arg) --- linux-2.6.6-rc1/arch/v850/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/v850/kernel/process.c 2004-04-18 22:25:54.257582072 -0700 @@ -203,8 +203,8 @@ int sys_execve (char *name, char **argv, /* * These bracket the sleeping functions.. */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) +#define first_sched ((unsigned long)__sched_text_start) +#define last_sched ((unsigned long)__sched_text_end) unsigned long get_wchan (struct task_struct *p) { --- linux-2.6.6-rc1/arch/v850/kernel/setup.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/arch/v850/kernel/setup.c 2004-04-18 22:25:35.901372640 -0700 @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -40,8 +41,7 @@ extern char _root_fs_image_start __attri extern char _root_fs_image_end __attribute__ ((__weak__)); -char command_line[512]; -char saved_command_line[512]; +char command_line[COMMAND_LINE_SIZE]; /* Memory not used by the kernel. */ static unsigned long total_ram_pages; --- linux-2.6.6-rc1/arch/x86_64/ia32/ia32_binfmt.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/x86_64/ia32/ia32_binfmt.c 2004-04-18 22:25:49.820256648 -0700 @@ -365,6 +365,7 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; + mpol_set_vma_default(mpnt); INIT_LIST_HEAD(&mpnt->shared); mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); --- linux-2.6.6-rc1/arch/x86_64/ia32/ia32entry.S 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/x86_64/ia32/ia32entry.S 2004-04-18 22:25:59.137840160 -0700 @@ -447,11 +447,11 @@ ia32_sys_call_table: .quad sys_setfsgid16 .quad sys_llseek /* 140 */ .quad sys32_getdents - .quad sys32_select + .quad compat_sys_select .quad sys_flock .quad sys_msync - .quad sys32_readv /* 145 */ - .quad sys32_writev + .quad compat_sys_readv /* 145 */ + .quad compat_sys_writev .quad sys_getsid .quad sys_fdatasync .quad sys32_sysctl /* sysctl */ @@ -474,7 +474,7 @@ ia32_sys_call_table: .quad sys32_vm86_warning /* vm86 */ .quad quiet_ni_syscall /* query_module */ .quad sys_poll - .quad sys32_nfsservctl + .quad compat_sys_nfsservctl .quad sys_setresgid16 /* 170 */ .quad sys_getresgid16 .quad sys_prctl @@ -578,6 +578,16 @@ ia32_sys_call_table: .quad sys_tgkill .quad compat_sys_utimes .quad sys32_fadvise64_64 + .quad sys_ni_syscall /* sys_vserver */ + .quad sys_ni_syscall /* sys_mbind */ + .quad sys_ni_syscall /* 275 sys_get_mempolicy */ + .quad sys_ni_syscall /* sys_set_mempolicy */ + .quad compat_sys_mq_open + .quad sys_mq_unlink + .quad compat_sys_mq_timedsend + .quad compat_sys_mq_timedreceive /* 280 */ + .quad compat_sys_mq_notify + .quad compat_sys_mq_getsetattr /* don't forget to change IA32_NR_syscalls */ ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 --- linux-2.6.6-rc1/arch/x86_64/ia32/ia32_ioctl.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/x86_64/ia32/ia32_ioctl.c 2004-04-18 22:25:24.668080360 -0700 @@ -188,17 +188,6 @@ COMPATIBLE_IOCTL(RTC_RD_TIME) COMPATIBLE_IOCTL(RTC_SET_TIME) COMPATIBLE_IOCTL(RTC_WKALM_SET) COMPATIBLE_IOCTL(RTC_WKALM_RD) -COMPATIBLE_IOCTL(HCIUARTSETPROTO) -COMPATIBLE_IOCTL(HCIUARTGETPROTO) -COMPATIBLE_IOCTL(RFCOMMCREATEDEV) -COMPATIBLE_IOCTL(RFCOMMRELEASEDEV) -COMPATIBLE_IOCTL(RFCOMMGETDEVLIST) -COMPATIBLE_IOCTL(RFCOMMGETDEVINFO) -COMPATIBLE_IOCTL(RFCOMMSTEALDLC) -COMPATIBLE_IOCTL(BNEPCONNADD) -COMPATIBLE_IOCTL(BNEPCONNDEL) -COMPATIBLE_IOCTL(BNEPGETCONNLIST) -COMPATIBLE_IOCTL(BNEPGETCONNINFO) COMPATIBLE_IOCTL(FIOQSIZE) /* And these ioctls need translation */ --- linux-2.6.6-rc1/arch/x86_64/ia32/ia32_signal.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/x86_64/ia32/ia32_signal.c 2004-04-18 22:25:45.739876960 -0700 @@ -85,8 +85,8 @@ int ia32_copy_siginfo_to_user(siginfo_t3 err |= __put_user(from->si_overrun, &to->si_overrun); err |= __put_user((u32)(u64)from->si_ptr, &to->si_ptr); break; - case __SI_RT: /* This is not generated by the kernel as of now. */ - case __SI_MESGQ: + case __SI_RT >> 16: /* This is not generated by the kernel as of now. */ + case __SI_MESGQ >> 16: err |= __put_user(from->si_uid, &to->si_uid); err |= __put_user(from->si_int, &to->si_int); break; @@ -395,7 +395,8 @@ ia32_setup_sigcontext(struct sigcontext_ * Determine which stack to use.. */ static void * -get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) +get_sigframe(struct k_sigaction *ka_copy, struct pt_regs * regs, + size_t frame_size) { unsigned long rsp; @@ -403,28 +404,28 @@ get_sigframe(struct k_sigaction *ka, str rsp = regs->rsp; /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { + if (ka_copy->sa.sa_flags & SA_ONSTACK) { if (sas_ss_flags(rsp) == 0) rsp = current->sas_ss_sp + current->sas_ss_size; } /* This is the legacy signal stack switching. */ else if ((regs->ss & 0xffff) != __USER_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) { - rsp = (unsigned long) ka->sa.sa_restorer; + !(ka_copy->sa.sa_flags & SA_RESTORER) && + ka_copy->sa.sa_restorer) { + rsp = (unsigned long) ka_copy->sa.sa_restorer; } return (void *)((rsp - frame_size) & -8UL); } -void ia32_setup_frame(int sig, struct k_sigaction *ka, +void ia32_setup_frame(int sig, struct k_sigaction *ka_copy, compat_sigset_t *set, struct pt_regs * regs) { struct sigframe *frame; int err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka_copy, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -455,8 +456,8 @@ void ia32_setup_frame(int sig, struct k_ /* Return stub is in 32bit vsyscall page */ { void *restorer = VSYSCALL32_SIGRETURN; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; + if (ka_copy->sa.sa_flags & SA_RESTORER) + restorer = ka_copy->sa.sa_restorer; err |= __put_user(ptr_to_u32(restorer), &frame->pretcode); } /* These are actually not used anymore, but left because some @@ -481,7 +482,7 @@ void ia32_setup_frame(int sig, struct k_ /* Set up registers for signal handler */ regs->rsp = (unsigned long) frame; - regs->rip = (unsigned long) ka->sa.sa_handler; + regs->rip = (unsigned long) ka_copy->sa.sa_handler; asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); @@ -501,17 +502,17 @@ void ia32_setup_frame(int sig, struct k_ give_sigsegv: if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + current->sighand->action[SIGSEGV-1].sa.sa_handler = SIG_DFL; signal_fault(regs,frame,"32bit signal deliver"); } -void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +void ia32_setup_rt_frame(int sig, struct k_sigaction *ka_copy, siginfo_t *info, compat_sigset_t *set, struct pt_regs * regs) { struct rt_sigframe *frame; int err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame)); + frame = get_sigframe(ka_copy, regs, sizeof(*frame)); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; @@ -548,8 +549,8 @@ void ia32_setup_rt_frame(int sig, struct { void *restorer = VSYSCALL32_RTSIGRETURN; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; + if (ka_copy->sa.sa_flags & SA_RESTORER) + restorer = ka_copy->sa.sa_restorer; err |= __put_user(ptr_to_u32(restorer), &frame->pretcode); } @@ -577,7 +578,7 @@ void ia32_setup_rt_frame(int sig, struct /* Set up registers for signal handler */ regs->rsp = (unsigned long) frame; - regs->rip = (unsigned long) ka->sa.sa_handler; + regs->rip = (unsigned long) ka_copy->sa.sa_handler; asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); @@ -597,7 +598,7 @@ void ia32_setup_rt_frame(int sig, struct give_sigsegv: if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + current->sighand->action[SIGSEGV-1].sa.sa_handler = SIG_DFL; signal_fault(regs, frame, "32bit rt signal setup"); } --- linux-2.6.6-rc1/arch/x86_64/ia32/sys_ia32.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/x86_64/ia32/sys_ia32.c 2004-04-18 22:25:59.139839856 -0700 @@ -606,107 +606,6 @@ out: return error; } -/* - * We can actually return ERESTARTSYS instead of EINTR, but I'd - * like to be certain this leads to no problems. So I return - * EINTR just for safety. - * - * Update: ERESTARTSYS breaks at least the xview clock binary, so - * I'm trying ERESTARTNOHAND which restart only when you want to. - */ -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) -#define ROUND_UP_TIME(x,y) (((x)+(y)-1)/(y)) - -asmlinkage long -sys32_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct compat_timeval *tvp32) -{ - fd_set_bits fds; - char *bits; - long timeout; - int ret, size; - - timeout = MAX_SCHEDULE_TIMEOUT; - if (tvp32) { - time_t sec, usec; - - get_user(sec, &tvp32->tv_sec); - get_user(usec, &tvp32->tv_usec); - - ret = -EINVAL; - if (sec < 0 || usec < 0) - goto out_nofds; - - if ((unsigned long) sec < MAX_SELECT_SECONDS) { - timeout = ROUND_UP_TIME(usec, 1000000/HZ); - timeout += sec * (unsigned long) HZ; - } - } - - ret = -EINVAL; - if (n < 0) - goto out_nofds; - - if (n > current->files->max_fdset) - n = current->files->max_fdset; - - /* - * We need 6 bitmaps (in/out/ex for both incoming and outgoing), - * since we used fdset we need to allocate memory in units of - * long-words. - */ - ret = -ENOMEM; - size = FDS_BYTES(n); - bits = kmalloc(6 * size, GFP_KERNEL); - if (!bits) - goto out_nofds; - fds.in = (unsigned long *) bits; - fds.out = (unsigned long *) (bits + size); - fds.ex = (unsigned long *) (bits + 2*size); - fds.res_in = (unsigned long *) (bits + 3*size); - fds.res_out = (unsigned long *) (bits + 4*size); - fds.res_ex = (unsigned long *) (bits + 5*size); - - if ((ret = get_fd_set(n, inp, fds.in)) || - (ret = get_fd_set(n, outp, fds.out)) || - (ret = get_fd_set(n, exp, fds.ex))) - goto out; - zero_fd_set(n, fds.res_in); - zero_fd_set(n, fds.res_out); - zero_fd_set(n, fds.res_ex); - - ret = do_select(n, &fds, &timeout); - - if (tvp32 && !(current->personality & STICKY_TIMEOUTS)) { - time_t sec = 0, usec = 0; - if (timeout) { - sec = timeout / HZ; - usec = timeout % HZ; - usec *= (1000000/HZ); - } - put_user(sec, (int *)&tvp32->tv_sec); - put_user(usec, (int *)&tvp32->tv_usec); - } - - if (ret < 0) - goto out; - if (!ret) { - ret = -ERESTARTNOHAND; - if (signal_pending(current)) - goto out; - ret = 0; - } - - set_fd_set(n, inp, fds.res_in); - set_fd_set(n, outp, fds.res_out); - set_fd_set(n, exp, fds.res_ex); - -out: - kfree(bits); -out_nofds: - return ret; -} - struct sel_arg_struct { unsigned int n; unsigned int inp; @@ -722,106 +621,8 @@ sys32_old_select(struct sel_arg_struct * if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; - return sys32_select(a.n, (fd_set *)A(a.inp), (fd_set *)A(a.outp), (fd_set *)A(a.exp), - (struct compat_timeval *)A(a.tvp)); -} - -static struct iovec * -get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 *count, int type, int *errp) -{ - int i; - u32 buf, len; - struct iovec *ivp, *iov; - unsigned long totlen; - - /* Get the "struct iovec" from user memory */ - - *errp = 0; - if (!*count) - return 0; - *errp = -EINVAL; - if (*count > UIO_MAXIOV) - return(struct iovec *)0; - *errp = -EFAULT; - if(verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*(*count))) - return(struct iovec *)0; - if (*count > UIO_FASTIOV) { - *errp = -ENOMEM; - iov = kmalloc(*count*sizeof(struct iovec), GFP_KERNEL); - if (!iov) - return((struct iovec *)0); - } else - iov = iov_buf; - - ivp = iov; - totlen = 0; - for (i = 0; i < *count; i++) { - *errp = __get_user(len, &iov32->iov_len) | - __get_user(buf, &iov32->iov_base); - if (*errp) - goto error; - *errp = verify_area(type, (void *)A(buf), len); - if (*errp) { - if (i > 0) { - *count = i; - break; - } - goto error; - } - /* SuS checks: */ - *errp = -EINVAL; - if ((int)len < 0) - goto error; - if ((totlen += len) >= 0x7fffffff) - goto error; - ivp->iov_base = (void *)A(buf); - ivp->iov_len = (__kernel_size_t)len; - iov32++; - ivp++; - } - *errp = 0; - return(iov); - -error: - if (iov != iov_buf) - kfree(iov); - return NULL; -} - -asmlinkage long -sys32_readv(int fd, struct compat_iovec *vector, u32 count) -{ - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov; - int ret; - mm_segment_t old_fs = get_fs(); - - if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_WRITE, &ret)) == NULL) - return ret; - set_fs(KERNEL_DS); - ret = sys_readv(fd, iov, count); - set_fs(old_fs); - if (iov != iovstack) - kfree(iov); - return ret; -} - -asmlinkage long -sys32_writev(int fd, struct compat_iovec *vector, u32 count) -{ - struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov; - int ret; - mm_segment_t old_fs = get_fs(); - - if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_READ, &ret)) == NULL) - return ret; - set_fs(KERNEL_DS); - ret = sys_writev(fd, iov, count); - set_fs(old_fs); - if (iov != iovstack) - kfree(iov); - return ret; + return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp), + compat_ptr(a.exp), compat_ptr(a.tvp)); } /* @@ -1323,93 +1124,22 @@ long sys32_ustat(unsigned dev, struct us return ret; } -static int nargs(u32 src, char **dst) -{ - int cnt; - u32 val; - - cnt = 0; - do { - int ret = get_user(val, (__u32 *)(u64)src); - if (ret) - return ret; - if (dst) - dst[cnt] = (char *)(u64)val; - cnt++; - src += 4; - if (cnt >= (MAX_ARG_PAGES*PAGE_SIZE)/sizeof(void*)) - return -E2BIG; - } while(val); - if (dst) - dst[cnt-1] = 0; - return cnt; -} - -asmlinkage long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs) -{ - mm_segment_t oldseg; - char **buf = NULL; - int na = 0,ne = 0; - int ret; - unsigned sz = 0; - - if (argv) { - na = nargs(argv, NULL); - if (na < 0) - return -EFAULT; - } - if (envp) { - ne = nargs(envp, NULL); - if (ne < 0) - return -EFAULT; - } - - if (argv || envp) { - sz = (na+ne)*sizeof(void *); - if (sz > PAGE_SIZE) - buf = vmalloc(sz); - else - buf = kmalloc(sz, GFP_KERNEL); - if (!buf) - return -ENOMEM; - } - - if (argv) { - ret = nargs(argv, buf); - if (ret < 0) - goto free; - } - - if (envp) { - ret = nargs(envp, buf + na); - if (ret < 0) - goto free; - } - - name = getname(name); - ret = PTR_ERR(name); - if (IS_ERR(name)) - goto free; - - oldseg = get_fs(); - set_fs(KERNEL_DS); - ret = do_execve(name, argv ? buf : NULL, envp ? buf+na : NULL, ®s); - set_fs(oldseg); +asmlinkage long sys32_execve(char *name, compat_uptr_t __user *argv, + compat_uptr_t __user *envp, struct pt_regs regs) +{ + long error; + char * filename; - if (ret == 0) + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; + error = compat_do_execve(filename, argv, envp, ®s); + if (error == 0) current->ptrace &= ~PT_DTRACE; - - putname(name); - -free: - if (argv || envp) { - if (sz > PAGE_SIZE) - vfree(buf); - else - kfree(buf); - } - return ret; -} + putname(filename); + return error; +} asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp, struct pt_regs regs) { @@ -1436,233 +1166,6 @@ long sys32_kill(int pid, int sig) } -#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) -/* Stuff for NFS server syscalls... */ -struct nfsctl_svc32 { - u16 svc32_port; - s32 svc32_nthreads; -}; - -struct nfsctl_client32 { - s8 cl32_ident[NFSCLNT_IDMAX+1]; - s32 cl32_naddr; - struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX]; - s32 cl32_fhkeytype; - s32 cl32_fhkeylen; - u8 cl32_fhkey[NFSCLNT_KEYMAX]; -}; - -struct nfsctl_export32 { - s8 ex32_client[NFSCLNT_IDMAX+1]; - s8 ex32_path[NFS_MAXPATHLEN+1]; - compat_dev_t ex32_dev; - compat_ino_t ex32_ino; - s32 ex32_flags; - compat_pid_t ex32_anon_uid; - compat_gid_t ex32_anon_gid; -}; - -struct nfsctl_fdparm32 { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - s32 gd32_version; -}; - -struct nfsctl_fsparm32 { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - s32 gd32_maxlen; -}; - -struct nfsctl_arg32 { - s32 ca32_version; /* safeguard */ - union { - struct nfsctl_svc32 u32_svc; - struct nfsctl_client32 u32_client; - struct nfsctl_export32 u32_export; - struct nfsctl_fdparm32 u32_getfd; - struct nfsctl_fsparm32 u32_getfs; - } u; -#define ca32_svc u.u32_svc -#define ca32_client u.u32_client -#define ca32_export u.u32_export -#define ca32_getfd u.u32_getfd -#define ca32_getfs u.u32_getfs -}; - -union nfsctl_res32 { - __u8 cr32_getfh[NFS_FHSIZE]; - struct knfsd_fh cr32_getfs; -}; - -static int nfs_svc32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = get_user(karg->ca_version, &arg32->ca32_version); - err |= __get_user(karg->ca_svc.svc_port, &arg32->ca32_svc.svc32_port); - err |= __get_user(karg->ca_svc.svc_nthreads, &arg32->ca32_svc.svc32_nthreads); - return err; -} - -static int nfs_clnt32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_client.cl_ident[0], - &arg32->ca32_client.cl32_ident[0], - NFSCLNT_IDMAX); - err |= __get_user(karg->ca_client.cl_naddr, &arg32->ca32_client.cl32_naddr); - err |= copy_from_user(&karg->ca_client.cl_addrlist[0], - &arg32->ca32_client.cl32_addrlist[0], - (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); - err |= __get_user(karg->ca_client.cl_fhkeytype, - &arg32->ca32_client.cl32_fhkeytype); - err |= __get_user(karg->ca_client.cl_fhkeylen, - &arg32->ca32_client.cl32_fhkeylen); - err |= copy_from_user(&karg->ca_client.cl_fhkey[0], - &arg32->ca32_client.cl32_fhkey[0], - NFSCLNT_KEYMAX); - return err; -} - -static int nfs_exp32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_export.ex_client[0], - &arg32->ca32_export.ex32_client[0], - NFSCLNT_IDMAX); - err |= copy_from_user(&karg->ca_export.ex_path[0], - &arg32->ca32_export.ex32_path[0], - NFS_MAXPATHLEN); - err |= __get_user(karg->ca_export.ex_dev, - &arg32->ca32_export.ex32_dev); - err |= __get_user(karg->ca_export.ex_ino, - &arg32->ca32_export.ex32_ino); - err |= __get_user(karg->ca_export.ex_flags, - &arg32->ca32_export.ex32_flags); - err |= __get_user(karg->ca_export.ex_anon_uid, - &arg32->ca32_export.ex32_anon_uid); - err |= __get_user(karg->ca_export.ex_anon_gid, - &arg32->ca32_export.ex32_anon_gid); - SET_UID(karg->ca_export.ex_anon_uid, karg->ca_export.ex_anon_uid); - SET_GID(karg->ca_export.ex_anon_gid, karg->ca_export.ex_anon_gid); - return err; -} - - -static int nfs_getfd32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_getfd.gd_addr, - &arg32->ca32_getfd.gd32_addr, - (sizeof(struct sockaddr))); - err |= copy_from_user(&karg->ca_getfd.gd_path, - &arg32->ca32_getfd.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= get_user(karg->ca_getfd.gd_version, - &arg32->ca32_getfd.gd32_version); - return err; -} - -static int nfs_getfs32_trans(struct nfsctl_arg *karg, struct nfsctl_arg32 *arg32) -{ - int err; - - err = get_user(karg->ca_version, &arg32->ca32_version); - err |= copy_from_user(&karg->ca_getfs.gd_addr, - &arg32->ca32_getfs.gd32_addr, - (sizeof(struct sockaddr))); - err |= copy_from_user(&karg->ca_getfs.gd_path, - &arg32->ca32_getfs.gd32_path, - (NFS_MAXPATHLEN+1)); - err |= get_user(karg->ca_getfs.gd_maxlen, - &arg32->ca32_getfs.gd32_maxlen); - return err; -} - -/* This really doesn't need translations, we are only passing - * back a union which contains opaque nfs file handle data. - */ -static int nfs_getfh32_res_trans(union nfsctl_res *kres, union nfsctl_res32 *res32) -{ - return copy_to_user(res32, kres, sizeof(*res32)) ? -EFAULT : 0; -} - -long asmlinkage sys32_nfsservctl(int cmd, struct nfsctl_arg32 *arg32, union nfsctl_res32 *res32) -{ - struct nfsctl_arg *karg = NULL; - union nfsctl_res *kres = NULL; - mm_segment_t oldfs; - int err; - - karg = kmalloc(sizeof(*karg), GFP_USER); - if(!karg) - return -ENOMEM; - if(res32) { - kres = kmalloc(sizeof(*kres), GFP_USER); - if(!kres) { - kfree(karg); - return -ENOMEM; - } - } - switch(cmd) { - case NFSCTL_SVC: - err = nfs_svc32_trans(karg, arg32); - break; - case NFSCTL_ADDCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_DELCLIENT: - err = nfs_clnt32_trans(karg, arg32); - break; - case NFSCTL_EXPORT: - case NFSCTL_UNEXPORT: - err = nfs_exp32_trans(karg, arg32); - break; - case NFSCTL_GETFD: - err = nfs_getfd32_trans(karg, arg32); - break; - case NFSCTL_GETFS: - err = nfs_getfs32_trans(karg, arg32); - break; - default: - err = -EINVAL; - break; - } - if(err) - goto done; - oldfs = get_fs(); - set_fs(KERNEL_DS); - err = sys_nfsservctl(cmd, karg, kres); - set_fs(oldfs); - - if (err) - goto done; - - if((cmd == NFSCTL_GETFD) || - (cmd == NFSCTL_GETFS)) - err = nfs_getfh32_res_trans(kres, res32); - -done: - if(karg) - kfree(karg); - if(kres) - kfree(kres); - return err; -} -#else /* !NFSD */ -long asmlinkage sys32_nfsservctl(int cmd, void *notused, void *notused2) -{ - return sys_ni_syscall(); -} -#endif - long sys32_io_setup(unsigned nr_reqs, u32 *ctx32p) { long ret; --- linux-2.6.6-rc1/arch/x86_64/Kconfig 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/x86_64/Kconfig 2004-04-18 22:25:42.163420664 -0700 @@ -338,26 +338,6 @@ config PCI_MMCONFIG depends on PCI select ACPI_BOOT -# the drivers/pci/msi.c code needs to be fixed first before enabling -config PCI_USE_VECTOR - bool "Vector-based interrupt indexing" - depends on X86_LOCAL_APIC && NOTWORKING - default n - help - This replaces the current existing IRQ-based index interrupt scheme - with the vector-base index scheme. The advantages of vector base - over IRQ base are listed below: - 1) Support MSI implementation. - 2) Support future IOxAPIC hotplug - - Note that this enables MSI, Message Signaled Interrupt, on all - MSI capable device functions detected if users also install the - MSI patch. Message Signal Interrupt enables an MSI-capable - hardware device to send an inbound Memory Write on its PCI bus - instead of asserting IRQ signal on device IRQ pin. - - If you don't know what to do here, say N. - source "drivers/pci/Kconfig" source "drivers/pcmcia/Kconfig" @@ -466,12 +446,26 @@ config INIT_DEBUG config DEBUG_INFO bool "Compile the kernel with debug info" depends on DEBUG_KERNEL + default n help If you say Y here the resulting kernel image will include debugging info resulting in a larger kernel image. Say Y here only if you plan to use gdb to debug the kernel. Please note that this option requires new binutils. If you don't debug the kernel, you can say N. + +config SCHEDSTATS + bool "Collect scheduler statistics" + depends on PROC_FS + default y + help + If you say Y here, additional code will be inserted into the + scheduler and related routines to collect statistics about + scheduler behavior and provide them in /proc/schedstat. These + stats may be useful for both tuning and debugging the scheduler + If you aren't debugging the scheduler or trying to tune a specific + application, you can say N to avoid the very slight overhead + this adds. config FRAME_POINTER bool "Compile the kernel with frame pointers" @@ -504,9 +498,8 @@ config IOMMU_LEAK help Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. - -#config X86_REMOTE_DEBUG -# bool "kgdb debugging stub" + +source "arch/x86_64/Kconfig.kgdb" endmenu --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/arch/x86_64/Kconfig.kgdb 2004-04-18 22:25:31.588028368 -0700 @@ -0,0 +1,176 @@ +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL + select DEBUG_INFO + help + If you say Y here, the system will be compiled with the debug + option (-g) and a debugging stub will be included in the + kernel. This stub communicates with gdb on another (host) + computer via a serial port. The host computer should have + access to the kernel binary file (vmlinux) and a serial port + that is connected to the target machine. Gdb can be made to + configure the serial port or you can use stty and setserial to + do this. See the 'target' command in gdb. This option also + configures in the ability to request a breakpoint early in the + boot process. To request the breakpoint just include 'kgdb' + as a boot option when booting the target machine. The system + will then break as soon as it looks at the boot options. This + option also installs a breakpoint in panic and sends any + kernel faults to the debugger. For more information see the + Documentation/i386/kgdb.txt file. + +choice + depends on KGDB + prompt "Debug serial port BAUD" + default KGDB_115200BAUD + help + Gdb and the kernel stub need to agree on the baud rate to be + used. Some systems (x86 family at this writing) allow this to + be configured. + +config KGDB_9600BAUD + bool "9600" + +config KGDB_19200BAUD + bool "19200" + +config KGDB_38400BAUD + bool "38400" + +config KGDB_57600BAUD + bool "57600" + +config KGDB_115200BAUD + bool "115200" +endchoice + +config KGDB_PORT + hex "hex I/O port address of the debug serial port" + depends on KGDB + default 3f8 + help + Some systems (x86 family at this writing) allow the port + address to be configured. The number entered is assumed to be + hex, don't put 0x in front of it. The standard address are: + COM1 3f8 , irq 4 and COM2 2f8 irq 3. Setserial /dev/ttySx + will tell you what you have. It is good to test the serial + connection with a live system before trying to debug. + +config KGDB_IRQ + int "IRQ of the debug serial port" + depends on KGDB + default 4 + help + This is the irq for the debug port. If everything is working + correctly and the kernel has interrupts on a control C to the + port should cause a break into the kernel debug stub. + +config DEBUG_INFO + bool + depends on KGDB + default y + +config KGDB_MORE + bool "Add any additional compile options" + depends on KGDB + default n + help + Saying yes here turns on the ability to enter additional + compile options. + + +config KGDB_OPTIONS + depends on KGDB_MORE + string "Additional compile arguments" + default "-O1" + help + This option allows you enter additional compile options for + the whole kernel compile. Each platform will have a default + that seems right for it. For example on PPC "-ggdb -O1", and + for i386 "-O1". Note that by configuring KGDB "-g" is already + turned on. In addition, on i386 platforms + "-fomit-frame-pointer" is deleted from the standard compile + options. + +config NO_KGDB_CPUS + int "Number of CPUs" + depends on KGDB && SMP + default NR_CPUS + help + + This option sets the number of cpus for kgdb ONLY. It is used + to prune some internal structures so they look "nice" when + displayed with gdb. This is to overcome possibly larger + numbers that may have been entered above. Enter the real + number to get nice clean kgdb_info displays. + +config KGDB_TS + bool "Enable kgdb time stamp macros?" + depends on KGDB + default n + help + Kgdb event macros allow you to instrument your code with calls + to the kgdb event recording function. The event log may be + examined with gdb at a break point. Turning on this + capability also allows you to choose how many events to + keep. Kgdb always keeps the lastest events. + +choice + depends on KGDB_TS + prompt "Max number of time stamps to save?" + default KGDB_TS_128 + +config KGDB_TS_64 + bool "64" + +config KGDB_TS_128 + bool "128" + +config KGDB_TS_256 + bool "256" + +config KGDB_TS_512 + bool "512" + +config KGDB_TS_1024 + bool "1024" + +endchoice + +config STACK_OVERFLOW_TEST + bool "Turn on kernel stack overflow testing?" + depends on KGDB + default n + help + This option enables code in the front line interrupt handlers + to check for kernel stack overflow on interrupts and system + calls. This is part of the kgdb code on x86 systems. + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB + default n + help + This option enables the command line "console=kgdb" option. + When the system is booted with this option in the command line + all kernel printk output is sent to gdb (as well as to other + consoles). For this to work gdb must be connected. For this + reason, this command line option will generate a breakpoint if + gdb has not yet connected. After the gdb continue command is + given all pent up console output will be printed by gdb on the + host machine. Neither this option, nor KGDB require the + serial driver to be configured. + +config KGDB_SYSRQ + bool "Turn on SysRq 'G' command to do a break?" + depends on KGDB + default y + help + This option includes an option in the SysRq code that allows + you to enter SysRq G which generates a breakpoint to the KGDB + stub. This will work if the keyboard is alive and can + interrupt the system. Because of constraints on when the + serial port interrupt can be enabled, this code may allow you + to interrupt the system before the serial port control C is + available. Just say yes here. + --- linux-2.6.6-rc1/arch/x86_64/kernel/irq.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/arch/x86_64/kernel/irq.c 2004-04-18 22:25:31.588028368 -0700 @@ -405,6 +405,9 @@ out: spin_unlock(&desc->lock); irq_exit(); + + kgdb_process_breakpoint(); + return 1; } --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/arch/x86_64/kernel/kgdb_stub.c 2004-04-18 22:25:54.259581768 -0700 @@ -0,0 +1,2591 @@ +/* + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + */ + +/* + * Copyright (c) 2000 VERITAS Software Corporation. + * + */ +/**************************************************************************** + * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ + * + * Module name: remcom.c $ + * Revision: 1.34 $ + * Date: 91/03/09 12:29:49 $ + * Contributor: Lake Stevens Instrument Division$ + * + * Description: low level support for gdb debugger. $ + * + * Considerations: only works on target hardware $ + * + * Written by: Glenn Engel $ + * Updated by: David Grothe + * Updated by: Robert Walsh + * Updated by: wangdi + * ModuleState: Experimental $ + * + * NOTES: See Below $ + * + * Modified for 386 by Jim Kingdon, Cygnus Support. + * Compatibility with 2.1.xx kernel by David Grothe + * + * Changes to allow auto initilization. All that is needed is that it + * be linked with the kernel and a break point (int 3) be executed. + * The header file defines BREAKPOINT to allow one to do + * this. It should also be possible, once the interrupt system is up, to + * call putDebugChar("+"). Once this is done, the remote debugger should + * get our attention by sending a ^C in a packet. George Anzinger + * + * Integrated into 2.2.5 kernel by Tigran Aivazian + * Added thread support, support for multiple processors, + * support for ia-32(x86) hardware debugging. + * Amit S. Kale ( akale@veritas.com ) + * + * Modified to support debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + * X86_64 changes from Andi Kleen's patch merged by Jim Houston + * (jim.houston@ccur.com). If it works thank Andi if its broken + * blame me. + * + * To enable debugger support, two things need to happen. One, a + * call to set_debug_traps() is necessary in order to allow any breakpoints + * or error conditions to be properly intercepted and reported to gdb. + * Two, a breakpoint needs to be generated to begin communication. This + * is most easily accomplished by a call to breakpoint(). Breakpoint() + * simulates a breakpoint by executing an int 3. + * + ************* + * + * The following gdb commands are supported: + * + * command function Return value + * + * g return the value of the CPU registers hex data or ENN + * G set the value of the CPU registers OK or ENN + * + * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN + * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN + * + * c Resume at current address SNN ( signal NN) + * cAA..AA Continue at address AA..AA SNN + * + * s Step one instruction SNN + * sAA..AA Step one instruction from AA..AA SNN + * + * k kill + * + * ? What was the last sigval ? SNN (signal NN) + * + * All commands and responses are sent with a packet which includes a + * checksum. A packet consists of + * + * $#. + * + * where + * :: + * :: < two hex digits computed as modulo 256 sum of > + * + * When a packet is received, it is first acknowledged with either '+' or '-'. + * '+' indicates a successful transfer. '-' indicates a failed transfer. + * + * Example: + * + * Host: Reply: + * $m0,10#2a +$00010203040506070809101112131415#42 + * + ****************************************************************************/ +#define KGDB_VERSION "<20030915.1651.33>" +#include +#include +#include /* for strcpy */ +#include +#include +#include +#include /* for linux pt_regs struct */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define Dearly_printk(x...) +int kgdb_enabled = 0; + +/************************************************************************ + * + * external low-level support routines + */ +typedef void (*Function) (void); /* pointer to a function */ + +/* Thread reference */ +typedef unsigned char threadref[8]; + +extern int tty_putDebugChar(int); /* write a single character */ +extern int tty_getDebugChar(void); /* read and return a single char */ +extern void tty_flushDebugChar(void); /* flush pending characters */ +extern int eth_putDebugChar(int); /* write a single character */ +extern int eth_getDebugChar(void); /* read and return a single char */ +extern void eth_flushDebugChar(void); /* flush pending characters */ + +/************************************************************************/ +/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ +/* at least NUMREGBYTES*2 are needed for register packets */ +/* Longer buffer is needed to list all threads */ +#define BUFMAX 400 + +char *kgdb_version = KGDB_VERSION; + +/* debug > 0 prints ill-formed commands in valid packets & checksum errors */ +int debug_regs = 0; /* set to non-zero to print registers */ + +/* filled in by an external module */ +char *gdb_module_offsets; + +static const char hexchars[] = "0123456789abcdef"; + +/* Number of bytes of registers. */ +#define NUMREGBYTES (NUMREGS * sizeof(unsigned long)) +/* + * Note that this register image is in a different order than + * the register image that Linux produces at interrupt time. + * + * Linux's register image is defined by struct pt_regs in ptrace.h. + * Just why GDB uses a different order is a historical mystery. + * + * Could add XMM and segment registers here. + */ +enum regnames {_RAX, + _RBX, + _RCX, + _RDX, + _RSI, + _RDI, + _RBP, + _RSP, + _R8, + _R9, + _R10, + _R11, + _R12, + _R13, + _R14, + _R15, + _PC, + _PS, + NUMREGS }; + + +/*************************** ASSEMBLY CODE MACROS *************************/ +/* + * Put the error code here just in case the user cares. + * Likewise, the vector number here (since GDB only gets the signal + * number through the usual means, and that's not very specific). + * The called_from is the return address so he can tell how we entered kgdb. + * This will allow him to seperate out the various possible entries. + */ +#define REMOTE_DEBUG 0 /* set != to turn on printing (also available in info) */ + +#define PID_MAX PID_MAX_DEFAULT + +#ifdef CONFIG_SMP +void smp_send_nmi_allbutself(void); +#define IF_SMP(x) x +#undef MAX_NO_CPUS +#ifndef CONFIG_NO_KGDB_CPUS +#define CONFIG_NO_KGDB_CPUS 2 +#endif +#if CONFIG_NO_KGDB_CPUS > NR_CPUS +#define MAX_NO_CPUS NR_CPUS +#else +#define MAX_NO_CPUS CONFIG_NO_KGDB_CPUS +#endif +#define hold_init hold_on_sstep: 1, +#define MAX_CPU_MASK (unsigned long)((1LL << MAX_NO_CPUS) - 1LL) +#define NUM_CPUS num_online_cpus() +#else +#define IF_SMP(x) +#define hold_init +#undef MAX_NO_CPUS +#define MAX_NO_CPUS 1 +#define NUM_CPUS 1 +#endif +#define NOCPU (struct task_struct *)0xbad1fbad +/* *INDENT-OFF* */ +struct kgdb_info { + int used_malloc; + void *called_from; + long long entry_tsc; + int errcode; + int vector; + int print_debug_info; +#ifdef CONFIG_SMP + int hold_on_sstep; + struct { + volatile struct task_struct *task; + int pid; + int hold; + struct pt_regs *regs; + } cpus_waiting[MAX_NO_CPUS]; +#endif +} kgdb_info = {hold_init print_debug_info:REMOTE_DEBUG, vector:-1}; + +/* *INDENT-ON* */ + +#define used_m kgdb_info.used_malloc +/* + * This is little area we set aside to contain the stack we + * need to build to allow gdb to call functions. We use one + * per cpu to avoid locking issues. We will do all this work + * with interrupts off so that should take care of the protection + * issues. + */ +#define LOOKASIDE_SIZE 200 /* should be more than enough */ +#define MALLOC_MAX 200 /* Max malloc size */ +struct { + unsigned long rsp; + unsigned long array[LOOKASIDE_SIZE]; +} fn_call_lookaside[MAX_NO_CPUS]; + +static int trap_cpu; +static unsigned long OLD_esp; + +#define END_OF_LOOKASIDE &fn_call_lookaside[trap_cpu].array[LOOKASIDE_SIZE] +#define IF_BIT 0x200 +#define TF_BIT 0x100 + +#define MALLOC_ROUND 8-1 + +static char malloc_array[MALLOC_MAX]; +IF_SMP(static void to_gdb(const char *mess)); +void * +malloc(int size) +{ + + if (size <= (MALLOC_MAX - used_m)) { + int old_used = used_m; + used_m += ((size + MALLOC_ROUND) & (~MALLOC_ROUND)); + return &malloc_array[old_used]; + } else { + return NULL; + } +} + +/* + * I/O dispatch functions... + * Based upon kgdboe, either call the ethernet + * handler or the serial one.. + */ +void +putDebugChar(int c) +{ + if (!kgdboe) { + tty_putDebugChar(c); + } else { + eth_putDebugChar(c); + } +} + +int +getDebugChar(void) +{ + if (!kgdboe) { + return tty_getDebugChar(); + } else { + return eth_getDebugChar(); + } +} + +void +flushDebugChar(void) +{ + if (!kgdboe) { + tty_flushDebugChar(); + } else { + eth_flushDebugChar(); + } +} + +/* + * Gdb calls functions by pushing agruments, including a return address + * on the stack and the adjusting EIP to point to the function. The + * whole assumption in GDB is that we are on a different stack than the + * one the "user" i.e. code that hit the break point, is on. This, of + * course is not true in the kernel. Thus various dodges are needed to + * do the call without directly messing with EIP (which we can not change + * as it is just a location and not a register. To adjust it would then + * require that we move every thing below EIP up or down as needed. This + * will not work as we may well have stack relative pointer on the stack + * (such as the pointer to regs, for example). + + * So here is what we do: + * We detect gdb attempting to store into the stack area and instead, store + * into the fn_call_lookaside.array at the same relative location as if it + * were the area ESP pointed at. We also trap ESP modifications + * and uses these to adjust fn_call_lookaside.esp. On entry + * fn_call_lookaside.esp will be set to point at the last entry in + * fn_call_lookaside.array. This allows us to check if it has changed, and + * if so, on exit, we add the registers we will use to do the move and a + * trap/ interrupt return exit sequence. We then adjust the eflags in the + * regs array (remember we now have a copy in the fn_call_lookaside.array) to + * kill the interrupt bit, AND we change EIP to point at our set up stub. + * As part of the register set up we preset the registers to point at the + * begining and end of the fn_call_lookaside.array, so all the stub needs to + * do is move words from the array to the stack until ESP= the desired value + * then do the rti. This will then transfer to the desired function with + * all the correct registers. Nifty huh? + */ +extern asmlinkage void fn_call_stub(void); +extern asmlinkage void fn_rtn_stub(void); +/* *INDENT-OFF* */ +__asm__("fn_rtn_stub:\n\t" + "movq %rax,%rsp\n\t" + "fn_call_stub:\n\t" + "1:\n\t" + "addq $-8,%rbx\n\t" + "movq (%rbx), %rax\n\t" + "pushq %rax\n\t" + "cmpq %rsp,%rcx\n\t" + "jne 1b\n\t" + "popq %rax\n\t" + "popq %rbx\n\t" + "popq %rcx\n\t" + "iret \n\t"); +/* *INDENT-ON* */ +#define gdb_i386vector kgdb_info.vector +#define gdb_i386errcode kgdb_info.errcode +#define waiting_cpus kgdb_info.cpus_waiting +#define remote_debug kgdb_info.print_debug_info +#define hold_cpu(cpu) kgdb_info.cpus_waiting[cpu].hold +/* gdb locks */ + +#ifdef CONFIG_SMP +static int in_kgdb_called; +static spinlock_t waitlocks[MAX_NO_CPUS] = + {[0 ... MAX_NO_CPUS - 1] = SPIN_LOCK_UNLOCKED }; +/* + * The following array has the thread pointer of each of the "other" + * cpus. We make it global so it can be seen by gdb. + */ +volatile int in_kgdb_entry_log[MAX_NO_CPUS]; +volatile struct pt_regs *in_kgdb_here_log[MAX_NO_CPUS]; +/* +static spinlock_t continuelocks[MAX_NO_CPUS]; +*/ +spinlock_t kgdb_spinlock = SPIN_LOCK_UNLOCKED; +/* waiters on our spinlock plus us */ +static atomic_t spinlock_waiters = ATOMIC_INIT(1); +static int spinlock_count = 0; +static int spinlock_cpu = 0; +/* + * Note we use nested spin locks to account for the case where a break + * point is encountered when calling a function by user direction from + * kgdb. Also there is the memory exception recursion to account for. + * Well, yes, but this lets other cpus thru too. Lets add a + * cpu id to the lock. + */ +#define KGDB_SPIN_LOCK(x) if( spinlock_count == 0 || \ + spinlock_cpu != smp_processor_id()){\ + atomic_inc(&spinlock_waiters); \ + while (! spin_trylock(x)) {\ + in_kgdb(®s);\ + }\ + atomic_dec(&spinlock_waiters); \ + spinlock_count = 1; \ + spinlock_cpu = smp_processor_id(); \ + }else{ \ + spinlock_count++; \ + } +#define KGDB_SPIN_UNLOCK(x) if( --spinlock_count == 0) spin_unlock(x) +#else +unsigned kgdb_spinlock = 0; +#define KGDB_SPIN_LOCK(x) --*x +#define KGDB_SPIN_UNLOCK(x) ++*x +#endif + +int +hex(char ch) +{ + if ((ch >= 'a') && (ch <= 'f')) + return (ch - 'a' + 10); + if ((ch >= '0') && (ch <= '9')) + return (ch - '0'); + if ((ch >= 'A') && (ch <= 'F')) + return (ch - 'A' + 10); + return (-1); +} + +/* scan for the sequence $# */ +void +getpacket(char *buffer) +{ + unsigned char checksum; + unsigned char xmitcsum; + int i; + int count; + char ch; + + do { + /* wait around for the start character, ignore all other characters */ + while ((ch = (getDebugChar() & 0x7f)) != '$') ; + checksum = 0; + xmitcsum = -1; + + count = 0; + + /* now, read until a # or end of buffer is found */ + while (count < BUFMAX) { + ch = getDebugChar() & 0x7f; + if (ch == '#') + break; + checksum = checksum + ch; + buffer[count] = ch; + count = count + 1; + } + buffer[count] = 0; + + if (ch == '#') { + xmitcsum = hex(getDebugChar() & 0x7f) << 4; + xmitcsum += hex(getDebugChar() & 0x7f); + if ((remote_debug) && (checksum != xmitcsum)) { + printk + ("bad checksum. My count = 0x%x, sent=0x%x. buf=%s\n", + checksum, xmitcsum, buffer); + } + + if (checksum != xmitcsum) + putDebugChar('-'); /* failed checksum */ + else { + putDebugChar('+'); /* successful transfer */ + /* if a sequence char is present, reply the sequence ID */ + if (buffer[2] == ':') { + putDebugChar(buffer[0]); + putDebugChar(buffer[1]); + /* remove sequence chars from buffer */ + count = strlen(buffer); + for (i = 3; i <= count; i++) + buffer[i - 3] = buffer[i]; + } + } + } + } while (checksum != xmitcsum); + + if (remote_debug) + printk("R:%s\n", buffer); + flushDebugChar(); +} + +/* send the packet in buffer. */ + +void +putpacket(char *buffer) +{ + unsigned char checksum; + int count; + char ch; + + /* $#. */ + + if (!kgdboe) { + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + + while ((ch = buffer[count])) { + putDebugChar(ch); + checksum += ch; + count += 1; + } + + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + + } while ((getDebugChar() & 0x7f) != '+'); + } else { + /* + * For udp, we can not transfer too much bytes once. + * We only transfer MAX_SEND_COUNT size bytes each time + */ + +#define MAX_SEND_COUNT 30 + + int send_count = 0, i = 0; + char send_buf[MAX_SEND_COUNT]; + + do { + if (remote_debug) + printk("T:%s\n", buffer); + putDebugChar('$'); + checksum = 0; + count = 0; + send_count = 0; + while ((ch = buffer[count])) { + if (send_count >= MAX_SEND_COUNT) { + for(i = 0; i < MAX_SEND_COUNT; i++) { + putDebugChar(send_buf[i]); + } + flushDebugChar(); + send_count = 0; + } else { + send_buf[send_count] = ch; + checksum += ch; + count ++; + send_count++; + } + } + for(i = 0; i < send_count; i++) + putDebugChar(send_buf[i]); + putDebugChar('#'); + putDebugChar(hexchars[checksum >> 4]); + putDebugChar(hexchars[checksum % 16]); + flushDebugChar(); + } while ((getDebugChar() & 0x7f) != '+'); + } +} + +static char remcomInBuffer[BUFMAX]; +static char remcomOutBuffer[BUFMAX]; +static char lbuf[BUFMAX]; +static short error; + +void +debug_error(char *format, char *parm) +{ + if (remote_debug) + printk(format, parm); +} + +static void +print_regs(struct pt_regs *regs) +{ + printk("RAX=%016lx RBX=%016lx RCX=%016lx\n", + regs->rax, regs->rbx, regs->rcx); + printk("RDX=%016lx RSI=%016lx RDI=%016lx\n", + regs->rdx, regs->rsi, regs->rdi); + printk("RBP=%016lx PS=%016lx PC=%016lx\n", + regs->rbp, regs->eflags, regs->rip); + printk("R8=%016lx R9=%016lx R10=%016lx\n", + regs->r8, regs->r9, regs->r10); + printk("R11=%016lx R12=%016lx R13=%016lx\n", + regs->r11, regs->r12, regs->r13); + printk("R14=%016lx R15=%016lx RSP=%016lx\n", + regs->r14, regs->r15, regs->rsp); +} + +#define NEW_esp fn_call_lookaside[trap_cpu].rsp + +static void +regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + gdb_regs[_RAX] = regs->rax; + gdb_regs[_RBX] = regs->rbx; + gdb_regs[_RCX] = regs->rcx; + gdb_regs[_RDX] = regs->rdx; + gdb_regs[_RSI] = regs->rsi; + gdb_regs[_RDI] = regs->rdi; + gdb_regs[_RBP] = regs->rbp; + gdb_regs[ _PS] = regs->eflags; + gdb_regs[ _PC] = regs->rip; + gdb_regs[ _R8] = regs->r8; + gdb_regs[ _R9] = regs->r9; + gdb_regs[_R10] = regs->r10; + gdb_regs[_R11] = regs->r11; + gdb_regs[_R12] = regs->r12; + gdb_regs[_R13] = regs->r13; + gdb_regs[_R14] = regs->r14; + gdb_regs[_R15] = regs->r15; + gdb_regs[_RSP] = regs->rsp; + + /* Note, as we are a debugging the kernel, we will always + * trap in kernel code, this means no priviledge change, + * and so the pt_regs structure is not completely valid. In a non + * privilege change trap, only EFLAGS, CS and EIP are put on the stack, + * SS and ESP are not stacked, this means that the last 2 elements of + * pt_regs is not valid (they would normally refer to the user stack) + * also, using regs+1 is no good because you end up will a value that is + * 2 longs (8) too high. This used to cause stepping over functions + * to fail, so my fix is to use the address of regs->esp, which + * should point at the end of the stack frame. Note I have ignored + * completely exceptions that cause an error code to be stacked, such + * as double fault. Stuart Hughes, Zentropix. + * original code: gdb_regs[_ESP] = (int) (regs + 1) ; + + * this is now done on entry and moved to OLD_esp (as well as NEW_esp). + */ +} + +static void +gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + regs->rax = gdb_regs[_RAX] ; + regs->rbx = gdb_regs[_RBX] ; + regs->rcx = gdb_regs[_RCX] ; + regs->rdx = gdb_regs[_RDX] ; + regs->rsi = gdb_regs[_RSI] ; + regs->rdi = gdb_regs[_RDI] ; + regs->rbp = gdb_regs[_RBP] ; + regs->eflags = gdb_regs[ _PS] ; + regs->rip = gdb_regs[ _PC] ; + regs->r8 = gdb_regs[ _R8] ; + regs->r9 = gdb_regs[ _R9] ; + regs->r10 = gdb_regs[ _R10] ; + regs->r11 = gdb_regs[ _R11] ; + regs->r12 = gdb_regs[ _R12] ; + regs->r13 = gdb_regs[ _R13] ; + regs->r14 = gdb_regs[ _R14] ; + regs->r15 = gdb_regs[ _R15] ; + #if 0 /* can't change these */ + regs->rsp = gdb_regs[_RSP] ; + regs->ss = gdb_regs[ _SS] ; + regs->fs = gdb_regs[_FS]; + regs->gs = gdb_regs[_GS]; +#endif +} /* gdb_regs_to_regs */ + +int thread_list = 0; +extern void thread_return(void); + +void +get_gdb_regs(struct task_struct *p, struct pt_regs *regs, unsigned long *gdb_regs) +{ + unsigned long **rbp, *rsp, *rsp0, pc; + int count = 0; + IF_SMP(int i); + if (!p || p == current) { + regs_to_gdb_regs(gdb_regs, regs); + return; + } +#ifdef CONFIG_SMP + for (i = 0; i < MAX_NO_CPUS; i++) { + if (p == kgdb_info.cpus_waiting[i].task) { + regs_to_gdb_regs(gdb_regs, + kgdb_info.cpus_waiting[i].regs); + gdb_regs[_RSP] = + (unsigned long)&kgdb_info.cpus_waiting[i].regs->rsp; + + return; + } + } +#endif + memset(gdb_regs, 0, NUMREGBYTES); + rsp = (unsigned long *)p->thread.rsp; + rbp = (unsigned long **)rsp[0]; + rsp += 2; + gdb_regs[_PC] = (unsigned long)thread_return; + gdb_regs[_RBP] = (unsigned long)rbp; + gdb_regs[_RSP] = (unsigned long)rsp; + +/* + * This code is to give a more informative notion of where a process + * is waiting. It is used only when the user asks for a thread info + * list. If he then switches to the thread, s/he will find the task + * is in schedule, but a back trace should show the same info we come + * up with. This code was shamelessly purloined from process.c. It was + * then enhanced to provide more registers than simply the program + * counter. + */ + + if (!thread_list) { + return; + } + + if (p->state == TASK_RUNNING) + return; + rsp0 = (unsigned long *)p->thread.rsp0; + if (rsp < (unsigned long *) p->thread_info || rsp > rsp0) + return; + /* include/asm-i386/system.h:switch_to() pushes ebp last. */ + do { + if (*rbp < rsp || *rbp > rsp0) + break; + rbp = (unsigned long **)*rbp; + rsp = (unsigned long *)rbp; + pc = rsp[1]; + + if (!in_sched_functions(pc)) + break; + gdb_regs[_PC] = (unsigned long)pc; + gdb_regs[_RSP] = (unsigned long)rsp; + gdb_regs[_RBP] = (unsigned long)rbp; + } while (count++ < 16); + return; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* returns nonzero if any memory access fails. */ +int mem2hex( char* mem, char* buf, int count) +{ + int i; + unsigned char ch; + int ret = 0; + + for (i=0;i> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (ret) { + Dearly_printk("mem2hex: fault at accessing %p\n", mem); + } + return(ret); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return nonzero if any memory access fails. */ +int hex2mem( char* buf, char* mem, int count) +{ + int i; + unsigned char ch; + int ret = 0; + + for (i=0;i (OLD_esp - (unsigned int) LOOKASIDE_SIZE))) { + addr = (char *) END_OF_LOOKASIDE - ((char *) OLD_esp - addr); + } + *addr = val; +} + +/* convert the memory pointed to by mem into hex, placing result in buf */ +/* return a pointer to the last char put in buf (null) */ +/* If MAY_FAULT is non-zero, then we should set mem_err in response to + a fault; if zero treat a fault like any other fault in the stub. */ +char * +mem2hex(char *mem, char *buf, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + /* printk("%lx = ", mem) ; */ + + ch = get_char(mem++); + + /* printk("%02x\n", ch & 0xFF) ; */ + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault fetching from addr %lx\n", + (long) (mem - 1)); + *buf = 0; /* truncate buffer */ + return (buf); + } + *buf++ = hexchars[ch >> 4]; + *buf++ = hexchars[ch % 16]; + } + *buf = 0; + if (may_fault) + mem_err_expected = 0; + return (buf); +} + +/* convert the hex array pointed to by buf into binary to be placed in mem */ +/* return a pointer to the character AFTER the last byte written */ +/* NOTE: We use the may fault flag to also indicate if the write is to + * the registers (0) or "other" memory (!=0) + */ +char * +hex2mem(char *buf, char *mem, int count, int may_fault) +{ + int i; + unsigned char ch; + + if (may_fault) { + mem_err_expected = 1; + mem_err = 0; + } + for (i = 0; i < count; i++) { + ch = hex(*buf++) << 4; + ch = ch + hex(*buf++); + set_char(mem++, ch, may_fault); + + if (may_fault && mem_err) { + if (remote_debug) + printk("Mem fault storing to addr %lx\n", + (long) (mem - 1)); + return (mem); + } + } + if (may_fault) + mem_err_expected = 0; + return (mem); +} +#endif + +/**********************************************/ +/* WHILE WE FIND NICE HEX CHARS, BUILD AN INT */ +/* RETURN NUMBER OF CHARS PROCESSED */ +/**********************************************/ +int +hexToLong(char **ptr, unsigned long *value) +{ + int numChars = 0; + int hexValue; + + *value = 0; + + while (**ptr) { + hexValue = hex(**ptr); + if (hexValue >= 0) { + *value = (*value << 4) | hexValue; + numChars++; + } else + break; + + (*ptr)++; + } + + return (numChars); +} + +#define stubhex(h) hex(h) +#ifdef old_thread_list + +static int +stub_unpack_int(char *buff, int fieldlength) +{ + int nibble; + int retval = 0; + + while (fieldlength) { + nibble = stubhex(*buff++); + retval |= nibble; + fieldlength--; + if (fieldlength) + retval = retval << 4; + } + return retval; +} +#endif +static char * +pack_hex_byte(char *pkt, int byte) +{ + *pkt++ = hexchars[(byte >> 4) & 0xf]; + *pkt++ = hexchars[(byte & 0xf)]; + return pkt; +} + +#define BUF_THREAD_ID_SIZE 16 + +static char * +pack_threadid(char *pkt, threadref * id) +{ + char *limit; + unsigned char *altid; + + altid = (unsigned char *) id; + limit = pkt + BUF_THREAD_ID_SIZE; + while (pkt < limit) + pkt = pack_hex_byte(pkt, *altid++); + return pkt; +} + +#ifdef old_thread_list +static char * +unpack_byte(char *buf, int *value) +{ + *value = stub_unpack_int(buf, 2); + return buf + 2; +} + +static char * +unpack_threadid(char *inbuf, threadref * id) +{ + char *altref; + char *limit = inbuf + BUF_THREAD_ID_SIZE; + int x, y; + + altref = (char *) id; + + while (inbuf < limit) { + x = stubhex(*inbuf++); + y = stubhex(*inbuf++); + *altref++ = (x << 4) | y; + } + return inbuf; +} +#endif +void +int_to_threadref(threadref * id, int value) +{ + unsigned char *scan; + + scan = (unsigned char *) id; + { + int i = 4; + while (i--) + *scan++ = 0; + } + *scan++ = (value >> 24) & 0xff; + *scan++ = (value >> 16) & 0xff; + *scan++ = (value >> 8) & 0xff; + *scan++ = (value & 0xff); +} +int +int_to_hex_v(unsigned char * id, int value) +{ + unsigned char *start = id; + int shift; + int ch; + + for (shift = 28; shift >= 0; shift -= 4) { + if ((ch = (value >> shift) & 0xf) || (id != start)) { + *id = hexchars[ch]; + id++; + } + } + if (id == start) + *id++ = '0'; + return id - start; +} +#ifdef old_thread_list + +static int +threadref_to_int(threadref * ref) +{ + int i, value = 0; + unsigned char *scan; + + scan = (char *) ref; + scan += 4; + i = 4; + while (i-- > 0) + value = (value << 8) | ((*scan++) & 0xff); + return value; +} +#endif +static int +cmp_str(char *s1, char *s2, int count) +{ + while (count--) { + if (*s1++ != *s2++) + return 0; + } + return 1; +} + +#if 1 /* this is a hold over from 2.4 where O(1) was "sometimes" */ +extern struct task_struct *kgdb_get_idle(int cpu); +#define idle_task(cpu) kgdb_get_idle(cpu) +#else +#define idle_task(cpu) init_tasks[cpu] +#endif + +extern int kgdb_pid_init_done; + +struct task_struct * +getthread(int pid) +{ + struct task_struct *thread; + if (pid >= PID_MAX && pid <= (PID_MAX + MAX_NO_CPUS)) { + if (!cpu_online(pid - PID_MAX)) + return NULL; + + return idle_task(pid - PID_MAX); + } else { + /* + * find_task_by_pid is relatively safe all the time + * Other pid functions require lock downs which imply + * that we may be interrupting them (as we get here + * in the middle of most any lock down). + * Still we don't want to call until the table exists! + */ + if (kgdb_pid_init_done){ + thread = find_task_by_pid(pid); + if (thread) { + return thread; + } + } + } + return NULL; +} +/* *INDENT-OFF* */ +struct hw_breakpoint { + unsigned enabled; + unsigned type; + unsigned len; + unsigned long addr; +} breakinfo[4] = { {enabled:0}, + {enabled:0}, + {enabled:0}, + {enabled:0}}; +/* *INDENT-ON* */ +unsigned long hw_breakpoint_status; +void +correct_hw_break(void) +{ + int breakno; + int correctit; + int breakbit; + unsigned long dr7; + + asm volatile ("movq %%db7, %0\n":"=r" (dr7) + :); + /* *INDENT-OFF* */ + do { + unsigned long addr0, addr1, addr2, addr3; + asm volatile ("movq %%db0, %0\n" + "movq %%db1, %1\n" + "movq %%db2, %2\n" + "movq %%db3, %3\n" + :"=r" (addr0), "=r"(addr1), + "=r"(addr2), "=r"(addr3) + :); + } while (0); + /* *INDENT-ON* */ + correctit = 0; + for (breakno = 0; breakno < 3; breakno++) { + breakbit = 2 << (breakno << 1); + if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { + correctit = 1; + dr7 |= breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + dr7 |= (((breakinfo[breakno].len << 2) | + breakinfo[breakno].type) << 16) << + (breakno << 2); + switch (breakno) { + case 0: + asm volatile ("movq %0, %%dr0\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 1: + asm volatile ("movq %0, %%dr1\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 2: + asm volatile ("movq %0, %%dr2\n"::"r" + (breakinfo[breakno].addr)); + break; + + case 3: + asm volatile ("movq %0, %%dr3\n"::"r" + (breakinfo[breakno].addr)); + break; + } + } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { + correctit = 1; + dr7 &= ~breakbit; + dr7 &= ~(0xf0000 << (breakno << 2)); + } + } + if (correctit) { + asm volatile ("movq %0, %%db7\n"::"r" (dr7)); + } +} + +int +remove_hw_break(unsigned breakno) +{ + if (!breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 0; + return 0; +} + +int +set_hw_break(unsigned breakno, unsigned type, unsigned len, unsigned addr) +{ + if (breakinfo[breakno].enabled) { + return -1; + } + breakinfo[breakno].enabled = 1; + breakinfo[breakno].type = type; + breakinfo[breakno].len = len; + breakinfo[breakno].addr = addr; + return 0; +} + +#ifdef CONFIG_SMP +static int in_kgdb_console = 0; + +int +in_kgdb(struct pt_regs *regs) +{ + unsigned long flags; + int cpu; + if (!kgdb_enabled) + return 0; + cpu = smp_processor_id(); + in_kgdb_called = 1; + if (!spin_is_locked(&kgdb_spinlock)) { + if (in_kgdb_here_log[cpu] || /* we are holding this cpu */ + in_kgdb_console) { /* or we are doing slow i/o */ + return 1; + } + return 0; + } + + /* As I see it the only reason not to let all cpus spin on + * the same spin_lock is to allow selected ones to proceed. + * This would be a good thing, so we leave it this way. + * Maybe someday.... Done ! + + * in_kgdb() is called from an NMI so we don't pretend + * to have any resources, like printk() for example. + */ + + local_irq_save(flags); /* only local here, to avoid hanging */ + /* + * log arival of this cpu + * The NMI keeps on ticking. Protect against recurring more + * than once, and ignor the cpu that has the kgdb lock + */ + in_kgdb_entry_log[cpu]++; + in_kgdb_here_log[cpu] = regs; + if (cpu == spinlock_cpu || waiting_cpus[cpu].task) + goto exit_in_kgdb; + + /* + * For protection of the initilization of the spin locks by kgdb + * it locks the kgdb spinlock before it gets the wait locks set + * up. We wait here for the wait lock to be taken. If the + * kgdb lock goes away first?? Well, it could be a slow exit + * sequence where the wait lock is removed prior to the kgdb lock + * so if kgdb gets unlocked, we just exit. + */ + + while (spin_is_locked(&kgdb_spinlock) && + !spin_is_locked(waitlocks + cpu)) ; + if (!spin_is_locked(&kgdb_spinlock)) + goto exit_in_kgdb; + + waiting_cpus[cpu].task = current; + waiting_cpus[cpu].pid = (current->pid) ? : (PID_MAX + cpu); + waiting_cpus[cpu].regs = regs; + + spin_unlock_wait(waitlocks + cpu); + + /* + * log departure of this cpu + */ + waiting_cpus[cpu].task = 0; + waiting_cpus[cpu].pid = 0; + waiting_cpus[cpu].regs = 0; + correct_hw_break(); + exit_in_kgdb: + in_kgdb_here_log[cpu] = 0; + local_irq_restore(flags); + return 1; + /* + spin_unlock(continuelocks + smp_processor_id()); + */ +} + +void +smp__in_kgdb(struct pt_regs regs) +{ + ack_APIC_irq(); + in_kgdb(®s); +} +#else +int +in_kgdb(struct pt_regs *regs) +{ + return (kgdb_spinlock); +} +#endif + +void +printexceptioninfo(int exceptionNo, int errorcode, char *buffer) +{ + unsigned long dr6; + int i; + switch (exceptionNo) { + case 1: /* debug exception */ + break; + case 3: /* breakpoint */ + sprintf(buffer, "Software breakpoint"); + return; + default: + sprintf(buffer, "Details not available"); + return; + } + asm volatile ("movq %%db6, %0\n":"=r" (dr6) + :); + if (dr6 & 0x4000) { + sprintf(buffer, "Single step"); + return; + } + for (i = 0; i < 4; ++i) { + if (dr6 & (1 << i)) { + sprintf(buffer, "Hardware breakpoint %d", i); + return; + } + } + sprintf(buffer, "Unknown trap"); + return; +} + +/* + * The ThreadExtraInfo query allows us to pass an arbitrary string + * for display with the "info threads" command. + */ + +void +print_extra_info(task_t *p, char *buf) +{ + if (!p) { + sprintf(buf, "Invalid thread"); + return; + } + sprintf(buf, "0x%p %8d %4d %c %s", + (void *)p, p->parent->pid, + task_cpu(p), + (p->state == 0) ? (task_curr(p)?'R':'r') : + (p->state < 0) ? 'U' : + (p->state & TASK_UNINTERRUPTIBLE) ? 'D' : + (p->state & TASK_STOPPED || p->ptrace & PT_PTRACED) ? 'T' : + (p->state & (TASK_ZOMBIE | TASK_DEAD)) ? 'Z' : + (p->state & TASK_INTERRUPTIBLE) ? 'S' : '?', + p->comm); +} + +/* + * This function does all command procesing for interfacing to gdb. + * + * NOTE: The INT nn instruction leaves the state of the interrupt + * enable flag UNCHANGED. That means that when this routine + * is entered via a breakpoint (INT 3) instruction from code + * that has interrupts enabled, then interrupts will STILL BE + * enabled when this routine is entered. The first thing that + * we do here is disable interrupts so as to prevent recursive + * entries and bothersome serial interrupts while we are + * trying to run the serial port in polled mode. + * + * For kernel version 2.1.xx the kgdb_cli() actually gets a spin lock so + * it is always necessary to do a restore_flags before returning + * so as to let go of that lock. + */ +int +kgdb_handle_exception(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs) +{ + struct task_struct *usethread = NULL; + struct task_struct *thread_list_start = 0, *thread = NULL; + struct task_struct *p; + unsigned long addr, length; + unsigned long breakno, breaktype; + char *ptr; + unsigned long newPC; + threadref thref; + unsigned long threadid, tmpid; + int thread_min = PID_MAX + MAX_NO_CPUS; +#ifdef old_thread_list + int maxthreads; +#endif + int nothreads; + unsigned long flags; + unsigned long gdb_regs[NUMREGS]; + unsigned long dr6; + IF_SMP(int entry_state = 0); /* 0, ok, 1, no nmi, 2 sync failed */ +#define NO_NMI 1 +#define NO_SYNC 2 +#define regs (*linux_regs) + /* + * If the entry is not from the kernel then return to the Linux + * trap handler and let it process the interrupt normally. + */ + if ((linux_regs->eflags & VM_MASK) || (3 & linux_regs->cs)) { + printk("ignoring non-kernel exception\n"); + print_regs(®s); + return (0); + } + /* + * If we're using eth mode, set the 'mode' in the netdevice. + */ + + if (kgdboe) + netpoll_set_trap(1); + + local_irq_save(flags); + + /* Get kgdb spinlock */ + + KGDB_SPIN_LOCK(&kgdb_spinlock); + rdtscll(kgdb_info.entry_tsc); + /* + * We depend on this spinlock and the NMI watch dog to control the + * other cpus. They will arrive at "in_kgdb()" as a result of the + * NMI and will wait there for the following spin locks to be + * released. + */ +#ifdef CONFIG_SMP + +#if 0 + if (cpu_callout_map & ~MAX_CPU_MASK) { + printk("kgdb : too many cpus, possibly not mapped" + " in contiguous space, change MAX_NO_CPUS" + " in kgdb_stub and make new kernel.\n" + " cpu_callout_map is %lx\n", cpu_callout_map); + goto exit_just_unlock; + } +#endif + if (spinlock_count == 1) { + int time, end_time, dum; + int i; + int cpu_logged_in[MAX_NO_CPUS] = {[0 ... MAX_NO_CPUS - 1] = (0) + }; + if (remote_debug) { + printk("kgdb : cpu %d entry, syncing others\n", + smp_processor_id()); + } + for (i = 0; i < MAX_NO_CPUS; i++) { + /* + * Use trylock as we may already hold the lock if + * we are holding the cpu. Net result is all + * locked. + */ + spin_trylock(&waitlocks[i]); + } + for (i = 0; i < MAX_NO_CPUS; i++) + cpu_logged_in[i] = 0; + /* + * Wait for their arrival. We know the watch dog is active if + * in_kgdb() has ever been called, as it is always called on a + * watchdog tick. + */ + rdtsc(dum, time); + end_time = time + 2; /* Note: we use the High order bits! */ + i = 1; + if (num_online_cpus() > 1) { + int me_in_kgdb = in_kgdb_entry_log[smp_processor_id()]; + smp_send_nmi_allbutself(); + + while (i < num_online_cpus() && time != end_time) { + int j; + for (j = 0; j < MAX_NO_CPUS; j++) { + if (waiting_cpus[j].task && + waiting_cpus[j].task != NOCPU && + !cpu_logged_in[j]) { + i++; + cpu_logged_in[j] = 1; + if (remote_debug) { + printk + ("kgdb : cpu %d arrived at kgdb\n", + j); + } + break; + } else if (!waiting_cpus[j].task && + !cpu_online(j)) { + waiting_cpus[j].task = NOCPU; + cpu_logged_in[j] = 1; + waiting_cpus[j].hold = 1; + break; + } + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + + int wait = 100000; + while (wait--) ; + if (!waiting_cpus[j].task && + in_kgdb_here_log[j]) { + printk + ("kgdb : cpu %d stall" + " in in_kgdb\n", + j); + i++; + cpu_logged_in[j] = 1; + waiting_cpus[j].task = + (struct task_struct + *) 1; + } + } + } + + if (in_kgdb_entry_log[smp_processor_id()] > + (me_in_kgdb + 10)) { + break; + } + + rdtsc(dum, time); + } + if (i < num_online_cpus()) { + printk + ("kgdb : time out, proceeding without sync\n"); +#if 0 + printk("kgdb : Waiting_cpus: 0 = %d, 1 = %d\n", + waiting_cpus[0].task != 0, + waiting_cpus[1].task != 0); + printk("kgdb : Cpu_logged in: 0 = %d, 1 = %d\n", + cpu_logged_in[0], cpu_logged_in[1]); + printk + ("kgdb : in_kgdb_here_log in: 0 = %d, 1 = %d\n", + in_kgdb_here_log[0] != 0, + in_kgdb_here_log[1] != 0); +#endif + entry_state = NO_SYNC; + } else { +#if 0 + int ent = + in_kgdb_entry_log[smp_processor_id()] - + me_in_kgdb; + printk("kgdb : sync after %d entries\n", ent); +#endif + } + } else { + if (remote_debug) { + printk + ("kgdb : %d cpus, but watchdog not active\n" + "proceeding without locking down other cpus\n", + (int)num_online_cpus()); + entry_state = NO_NMI; + } + } + } +#endif + + if (remote_debug) { + unsigned long *lp = (unsigned long *) &linux_regs; + + printk("handle_exception(exceptionVector=%d, " + "signo=%d, err_code=%d, linux_regs=%p)\n", + exceptionVector, signo, err_code, linux_regs); + if (debug_regs) { + print_regs(®s); + printk("Stk: %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[0], lp[1], lp[2], lp[3], + lp[4], lp[5], lp[6], lp[7]); + printk(" %8lx %8lx %8lx %8lx" + " %8lx %8lx %8lx %8lx\n", + lp[8], lp[9], lp[10], lp[11], + lp[12], lp[13], lp[14], lp[15]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[16], lp[17], lp[18], lp[19], + lp[20], lp[21], lp[22], lp[23]); + printk(" %8lx %8lx %8lx %8lx " + "%8lx %8lx %8lx %8lx\n", + lp[24], lp[25], lp[26], lp[27], + lp[28], lp[29], lp[30], lp[31]); + } + } + + /* Disable hardware debugging while we are in kgdb */ + /* Get the debug register status register */ +/* *INDENT-OFF* */ + __asm__("movq %0,%%db7" + : /* no output */ + :"r"(0UL)); + + asm volatile ("movq %%db6, %0\n" + :"=r" (hw_breakpoint_status) + :); + +#if 0 +/* *INDENT-ON* */ + switch (exceptionVector) { + case 0: /* divide error */ + case 1: /* debug exception */ + case 2: /* NMI */ + case 3: /* breakpoint */ + case 4: /* overflow */ + case 5: /* bounds check */ + case 6: /* invalid opcode */ + case 7: /* device not available */ + case 8: /* double fault (errcode) */ + case 10: /* invalid TSS (errcode) */ + case 12: /* stack fault (errcode) */ + case 16: /* floating point error */ + case 17: /* alignment check (errcode) */ + default: /* any undocumented */ + break; + case 11: /* segment not present (errcode) */ + case 13: /* general protection (errcode) */ + case 14: /* page fault (special errcode) */ + case 19: /* cache flush denied */ + if (mem_err_expected) { + /* + * This fault occured because of the + * get_char or set_char routines. These + * two routines use either eax of edx to + * indirectly reference the location in + * memory that they are working with. + * For a page fault, when we return the + * instruction will be retried, so we + * have to make sure that these + * registers point to valid memory. + */ + mem_err = 1; /* set mem error flag */ + mem_err_expected = 0; + mem_err_cnt++; /* helps in debugging */ + /* make valid address */ + regs.eax = (long) &garbage_loc; + /* make valid address */ + regs.edx = (long) &garbage_loc; + if (remote_debug) + printk("Return after memory error: " + "mem_err_cnt=%d\n", mem_err_cnt); + if (debug_regs) + print_regs(®s); + goto exit_kgdb; + } + break; + } +#endif + if (remote_debug) + printk("kgdb : entered kgdb on cpu %d\n", smp_processor_id()); + + gdb_i386vector = exceptionVector; + gdb_i386errcode = err_code; + kgdb_info.called_from = __builtin_return_address(0); +#ifdef CONFIG_SMP + /* + * OK, we can now communicate, lets tell gdb about the sync. + * but only if we had a problem. + */ + switch (entry_state) { + case NO_NMI: + to_gdb("NMI not active, other cpus not stopped\n"); + break; + case NO_SYNC: + to_gdb("Some cpus not stopped, see 'kgdb_info' for details\n"); + default:; + } + +#endif +/* + * Set up the gdb function call area. + */ + trap_cpu = smp_processor_id(); + OLD_esp = NEW_esp = (unsigned long) (&linux_regs->rsp); + + IF_SMP(once_again:) + /* reply to host that an exception has occurred */ + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + + putpacket(remcomOutBuffer); + + while (1 == 1) { + error = 0; + remcomOutBuffer[0] = 0; + getpacket(remcomInBuffer); + switch (remcomInBuffer[0]) { + case '?': + remcomOutBuffer[0] = 'S'; + remcomOutBuffer[1] = hexchars[signo >> 4]; + remcomOutBuffer[2] = hexchars[signo % 16]; + remcomOutBuffer[3] = 0; + break; + case 'd': + remote_debug = !(remote_debug); /* toggle debug flag */ + printk("Remote debug %s\n", + remote_debug ? "on" : "off"); + break; + case 'g': /* return the value of the CPU registers */ + get_gdb_regs(usethread, ®s, gdb_regs); + mem2hex((char *) gdb_regs, + remcomOutBuffer, NUMREGBYTES); + break; + case 'G': /* set the value of the CPU registers - return OK */ + hex2mem(&remcomInBuffer[1], + (char *) gdb_regs, NUMREGBYTES); + if (!usethread || usethread == current) { + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "E00"); + } + break; + + case 'P':{ /* set the value of a single CPU register - + return OK */ + /* + * For some reason, gdb wants to talk about psudo + * registers (greater than 15). + */ + unsigned long regno; + + ptr = &remcomInBuffer[1]; + regs_to_gdb_regs(gdb_regs, ®s); + if ((!usethread || usethread == current) && + hexToLong(&ptr, ®no) && + *ptr++ == '=' && (regno >= 0)) { + if (regno >= NUMREGS) + break; + hex2mem(ptr, (char *) &gdb_regs[regno], + 8); + gdb_regs_to_regs(gdb_regs, ®s); + strcpy(remcomOutBuffer, "OK"); + break; + } + strcpy(remcomOutBuffer, "E01"); + break; + } + + /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ + case 'm': + /* TRY TO READ %x,%x. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr) && + (*(ptr++) == ',') && (hexToLong(&ptr, &length))) { + ptr = 0; + /* + * hex doubles the byte count + */ + if (length > (BUFMAX / 2)) + length = BUFMAX / 2; + if (mem2hex((char *) addr, + remcomOutBuffer, length)) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } + } + + if (ptr) { + strcpy(remcomOutBuffer, "E01"); + debug_error + ("malformed read memory command: %s\n", + remcomInBuffer); + } + break; + + /* MAA..AA,LLLL: + Write LLLL bytes at address AA.AA return OK */ + case 'M': + /* TRY TO READ '%x,%x:'. IF SUCCEED, SET PTR = 0 */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr) && + (*(ptr++) == ',') && + (hexToLong(&ptr, &length)) && (*(ptr++) == ':')) { + if (hex2mem(ptr, (char *) addr, length)) { + strcpy(remcomOutBuffer, "E03"); + debug_error("memory fault\n", NULL); + } else { + strcpy(remcomOutBuffer, "OK"); + } + + ptr = 0; + } + if (ptr) { + strcpy(remcomOutBuffer, "E02"); + debug_error + ("malformed write memory command: %s\n", + remcomInBuffer); + } + break; + case 'S': + remcomInBuffer[0] = 's'; + case 'C': + /* Csig;AA..AA where ;AA..AA is optional + * continue with signal + * Since signals are meaning less to us, delete that + * part and then fall into the 'c' code. + */ + ptr = &remcomInBuffer[1]; + length = 2; + while (*ptr && *ptr != ';') { + length++; + ptr++; + } + if (*ptr) { + do { + ptr++; + *(ptr - length++) = *ptr; + } while (*ptr); + } else { + remcomInBuffer[1] = 0; + } + + /* cAA..AA Continue at address AA..AA(optional) */ + /* sAA..AA Step one instruction from AA..AA(optional) */ + /* D detach, reply OK and then continue */ + case 'c': + case 's': + case 'D': + + /* try to read optional parameter, + pc unchanged if no parm */ + ptr = &remcomInBuffer[1]; + if (hexToLong(&ptr, &addr)) { + if (remote_debug) + printk("Changing EIP to 0x%lx\n", addr); + + regs.rip = addr; + } + + newPC = regs.rip; + + /* clear the trace bit */ + regs.eflags &= 0xfffffeff; + + /* set the trace bit if we're stepping */ + if (remcomInBuffer[0] == 's') + regs.eflags |= 0x100; + + /* detach is a friendly version of continue. Note that + debugging is still enabled (e.g hit control C) + */ + if (remcomInBuffer[0] == 'D') { + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + } + + if (remote_debug) { + printk("Resuming execution\n"); + print_regs(®s); + } + asm volatile ("movq %%db6, %0\n":"=r" (dr6) + :); + if (!(dr6 & 0x4000)) { + for (breakno = 0; breakno < 4; ++breakno) { + if (dr6 & (1 << breakno) && + (breakinfo[breakno].type == 0)) { + /* Set restore flag */ + regs.eflags |= 0x10000; + break; + } + } + } + + if (kgdboe) + netpoll_set_trap(0); + + correct_hw_break(); + asm volatile ("movq %0, %%db6\n"::"r" (0UL)); + goto exit_kgdb; + + /* kill the program */ + case 'k': /* do nothing */ + break; + + /* query */ + case 'q': + nothreads = 0; + switch (remcomInBuffer[1]) { + case 'f': + threadid = 1; + thread_list = 2; + thread_list_start = (usethread ? : current); + case 's': + if (!cmp_str(&remcomInBuffer[2], + "ThreadInfo", 10)) + break; + + remcomOutBuffer[nothreads++] = 'm'; + for (; threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + nothreads += int_to_hex_v( + &remcomOutBuffer[ + nothreads], + threadid); + if (thread_min > threadid) + thread_min = threadid; + remcomOutBuffer[ + nothreads] = ','; + nothreads++; + if (nothreads > BUFMAX - 10) + break; + } + } + if (remcomOutBuffer[nothreads - 1] == 'm') { + remcomOutBuffer[nothreads - 1] = 'l'; + } else { + nothreads--; + } + remcomOutBuffer[nothreads] = 0; + break; + +#ifdef old_thread_list /* Old thread info request */ + case 'L': + /* List threads */ + thread_list = 2; + thread_list_start = (usethread ? : current); + unpack_byte(remcomInBuffer + 3, &maxthreads); + unpack_threadid(remcomInBuffer + 5, &thref); + do { + int buf_thread_limit = + (BUFMAX - 22) / BUF_THREAD_ID_SIZE; + if (maxthreads > buf_thread_limit) { + maxthreads = buf_thread_limit; + } + } while (0); + remcomOutBuffer[0] = 'q'; + remcomOutBuffer[1] = 'M'; + remcomOutBuffer[4] = '0'; + pack_threadid(remcomOutBuffer + 5, &thref); + + /* If start flag set start at 0. */ + if (remcomInBuffer[2] == '1') + threadid = 0; + else + threadid = threadref_to_int(&thref); + for (nothreads = 0; + nothreads < maxthreads && + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + thread = getthread(threadid); + if (thread) { + int_to_threadref(&thref, + threadid); + pack_threadid(remcomOutBuffer + + 21 + + nothreads * 16, + &thref); + nothreads++; + if (thread_min > threadid) + thread_min = threadid; + } + } + + if (threadid == PID_MAX + MAX_NO_CPUS) { + remcomOutBuffer[4] = '1'; + } + pack_hex_byte(remcomOutBuffer + 2, nothreads); + remcomOutBuffer[21 + nothreads * 16] = '\0'; + break; +#endif + case 'C': + /* Current thread id */ + remcomOutBuffer[0] = 'Q'; + remcomOutBuffer[1] = 'C'; + threadid = current->pid; + if (!threadid) { + /* + * idle thread + */ + for (threadid = PID_MAX; + threadid < PID_MAX + MAX_NO_CPUS; + threadid++) { + if (current == + idle_task(threadid - + PID_MAX)) + break; + } + } + int_to_threadref(&thref, threadid); + pack_threadid(remcomOutBuffer + 2, &thref); + remcomOutBuffer[18] = '\0'; + break; + + case 'E': + /* Print exception info */ + printexceptioninfo(exceptionVector, + err_code, remcomOutBuffer); + break; + case 'T': + ptr = &remcomInBuffer[0]; + if (strncmp(ptr, "qThreadExtraInfo,", + strlen("qThreadExtraInfo,")) == 0) { + ptr += strlen("qThreadExtraInfo,"); + hexToLong(&ptr, &tmpid); + p = getthread(tmpid); + print_extra_info(p, lbuf); + mem2hex(lbuf, remcomOutBuffer, + strlen(lbuf)); + } + break; +#if 0 + case 'T':{ + char * nptr; + /* Thread extra info */ + if (!cmp_str(&remcomInBuffer[2], + "hreadExtraInfo,", 15)) { + break; + } + ptr = &remcomInBuffer[17]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + nptr = &thread->comm[0]; + length = 0; + ptr = &remcomOutBuffer[0]; + do { + length++; + ptr = pack_hex_byte(ptr, *nptr++); + } while (*nptr && length < 16); + /* + * would like that 16 to be the size of + * task_struct.comm but don't know the + * syntax.. + */ + *ptr = 0; + } +#endif + } + break; + + /* task related */ + case 'H': + switch (remcomInBuffer[1]) { + case 'g': + ptr = &remcomInBuffer[2]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + if (!thread) { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + break; + } + /* + * Just in case I forget what this is all about, + * the "thread info" command to gdb causes it + * to ask for a thread list. It then switches + * to each thread and asks for the registers. + * For this (and only this) usage, we want to + * fudge the registers of tasks not on the run + * list (i.e. waiting) to show the routine that + * called schedule. Also, gdb, is a minimalist + * in that if the current thread is the last + * it will not re-read the info when done. + * This means that in this case we must show + * the real registers. So here is how we do it: + * Each entry we keep track of the min + * thread in the list (the last that gdb will) + * get info for. We also keep track of the + * starting thread. + * "thread_list" is cleared when switching back + * to the min thread if it is was current, or + * if it was not current, thread_list is set + * to 1. When the switch to current comes, + * if thread_list is 1, clear it, else do + * nothing. + */ + usethread = thread; + if ((thread_list == 1) && + (thread == thread_list_start)) { + thread_list = 0; + } + if (thread_list && (threadid == thread_min)) { + if (thread == thread_list_start) { + thread_list = 0; + } else { + thread_list = 1; + } + } + /* follow through */ + case 'c': + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + break; + } + break; + + /* Query thread status */ + case 'T': + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &threadid); + thread = getthread(threadid); + if (thread) { + remcomOutBuffer[0] = 'O'; + remcomOutBuffer[1] = 'K'; + remcomOutBuffer[2] = '\0'; + if (thread_min > threadid) + thread_min = threadid; + } else { + remcomOutBuffer[0] = 'E'; + remcomOutBuffer[1] = '\0'; + } + break; + + case 'Y': /* set up a hardware breakpoint */ + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &breakno); + ptr++; + hexToLong(&ptr, &breaktype); + ptr++; + hexToLong(&ptr, &length); + ptr++; + hexToLong(&ptr, &addr); + if (set_hw_break(breakno & 0x3, + breaktype & 0x3, + length & 0x3, addr) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + /* Remove hardware breakpoint */ + case 'y': + ptr = &remcomInBuffer[1]; + hexToLong(&ptr, &breakno); + if (remove_hw_break(breakno & 0x3) == 0) { + strcpy(remcomOutBuffer, "OK"); + } else { + strcpy(remcomOutBuffer, "ERROR"); + } + break; + + case 'r': /* reboot */ + strcpy(remcomOutBuffer, "OK"); + putpacket(remcomOutBuffer); + /*to_gdb("Rebooting\n"); */ + /* triplefault no return from here */ + { + static long no_idt[2]; + __asm__ __volatile__("lidt %0"::"m"(no_idt[0])); + BREAKPOINT; + } + + } /* switch */ + + /* reply to the request */ + putpacket(remcomOutBuffer); + } /* while(1==1) */ + /* + * reached by goto only. + */ + exit_kgdb: + /* + * Here is where we set up to trap a gdb function call. NEW_esp + * will be changed if we are trying to do this. We handle both + * adding and subtracting, thus allowing gdb to put grung on + * the stack which it removes later. + */ + if (NEW_esp != OLD_esp) { + unsigned long *ptr = END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) + ptr -= (OLD_esp - NEW_esp) / sizeof (unsigned long); + *--ptr = linux_regs->eflags; + *--ptr = linux_regs->cs; + *--ptr = linux_regs->rip; + *--ptr = linux_regs->rcx; + *--ptr = linux_regs->rbx; + *--ptr = linux_regs->rax; + linux_regs->rcx = NEW_esp - (sizeof (unsigned long) * 6); + linux_regs->rbx = (unsigned long) END_OF_LOOKASIDE; + if (NEW_esp < OLD_esp) { + linux_regs->rip = (unsigned long) fn_call_stub; + } else { + linux_regs->rip = (unsigned long) fn_rtn_stub; + linux_regs->rax = NEW_esp; + } + linux_regs->eflags &= ~(IF_BIT | TF_BIT); + } +#ifdef CONFIG_SMP + /* + * Release gdb wait locks + * Sanity check time. Must have at least one cpu to run. Also single + * step must not be done if the current cpu is on hold. + */ + if (spinlock_count == 1) { + int ss_hold = (regs.eflags & 0x100) && kgdb_info.hold_on_sstep; + int cpu_avail = 0; + int i; + + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!cpu_online(i)) + break; + if (!hold_cpu(i)) { + cpu_avail = 1; + } + } + /* + * Early in the bring up there will be NO cpus on line... + */ + if (!cpu_avail && !cpus_empty(cpu_online_map)) { + to_gdb("No cpus unblocked, see 'kgdb_info.hold_cpu'\n"); + goto once_again; + } + if (hold_cpu(smp_processor_id()) && (regs.eflags & 0x100)) { + to_gdb + ("Current cpu must be unblocked to single step\n"); + goto once_again; + } + if (!(ss_hold)) { + int i; + for (i = 0; i < MAX_NO_CPUS; i++) { + if (!hold_cpu(i)) { + spin_unlock(&waitlocks[i]); + } + } + } else { + spin_unlock(&waitlocks[smp_processor_id()]); + } + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + /* + * If this cpu is on hold, this is where we + * do it. Note, the NMI will pull us out of here, + * but will return as the above lock is not held. + * We will stay here till another cpu releases the lock for us. + */ + spin_unlock_wait(waitlocks + smp_processor_id()); + local_irq_restore(flags); + return (1); + } +#if 0 +exit_just_unlock: +#endif +#endif + /* Release kgdb spinlock */ + KGDB_SPIN_UNLOCK(&kgdb_spinlock); + local_irq_restore(flags); + return (1); +} + +#undef regs +static int kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + struct die_args *d = ptr; + + if (!kgdb_enabled || (cmd == DIE_DEBUG && user_mode(d->regs))) + return NOTIFY_DONE; + if (cmd == DIE_NMI_IPI) { + if (in_kgdb(d->regs)) + return NOTIFY_BAD; + } else if (kgdb_handle_exception(d->trapnr, d->signr, d->err, d->regs)) + return NOTIFY_BAD; /* skip */ + + return NOTIFY_DONE; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + .priority = 0, +}; + +void set_debug_traps(void) +{ + static int initialized = 0; + + if (!initialized) { + initialized = 1; + notifier_chain_register(&die_chain, &kgdb_notifier); + } +} + +/* + * Provide the command line "gdb" initial break + */ +int __init kgdb_initial_break(char * str) +{ + if (*str == '\0'){ + breakpoint(); + return 1; + } + return 0; +} +__setup("gdb",kgdb_initial_break); + +/* This function will generate a breakpoint exception. It is used at the + beginning of a program to sync up with a debugger and can be used + otherwise as a quick means to stop program execution and "break" into + the debugger. */ +/* But really, just use the BREAKPOINT macro. We will handle the int stuff + */ + +void breakpoint(void) +{ + + set_debug_traps(); + kgdb_enabled = 1; +#if 0 + /* + * These calls were not enough to allow breakpoint to be + * called before trap_init(). I moved the argument parsing + * after trap_init() and it seems to work. + */ + set_intr_usr_gate(3,&int3); /* disable ints on trap */ + set_intr_gate(1,&debug); + set_intr_gate(14,&page_fault); +#endif + + BREAKPOINT; +} + +#ifdef later +/* + * possibly we should not go thru the traps.c code at all? Someday. + */ +void +do_kgdb_int3(struct pt_regs *regs, long error_code) +{ + kgdb_handle_exception(3, 5, error_code, regs); + return; +} +#endif +#undef regs +#ifdef CONFIG_TRAP_BAD_SYSCALL_EXITS +asmlinkage void +bad_sys_call_exit(int stuff) +{ + struct pt_regs *regs = (struct pt_regs *) &stuff; + printk("Sys call %d return with %x preempt_count\n", + (int) regs->orig_eax, preempt_count()); +} +#endif +#ifdef CONFIG_STACK_OVERFLOW_TEST +#include +asmlinkage void +stack_overflow(void) +{ +#ifdef BREAKPOINT + BREAKPOINT; +#else + printk("Kernel stack overflow, looping forever\n"); +#endif + while (1) { + } +} +#endif + +#if defined(CONFIG_SMP) || defined(CONFIG_KGDB_CONSOLE) +char gdbconbuf[BUFMAX]; + +static void +kgdb_gdb_message(const char *s, unsigned count) +{ + int i; + int wcount; + char *bufptr; + /* + * This takes care of NMI while spining out chars to gdb + */ + IF_SMP(in_kgdb_console = 1); + gdbconbuf[0] = 'O'; + bufptr = gdbconbuf + 1; + while (count > 0) { + if ((count << 1) > (BUFMAX - 2)) { + wcount = (BUFMAX - 2) >> 1; + } else { + wcount = count; + } + count -= wcount; + for (i = 0; i < wcount; i++) { + bufptr = pack_hex_byte(bufptr, s[i]); + } + *bufptr = '\0'; + s += wcount; + + putpacket(gdbconbuf); + + } + IF_SMP(in_kgdb_console = 0); +} +#endif +#ifdef CONFIG_SMP +static void +to_gdb(const char *s) +{ + int count = 0; + while (s[count] && (count++ < BUFMAX)) ; + kgdb_gdb_message(s, count); +} +#endif +#ifdef CONFIG_KGDB_CONSOLE +#include +#include +#include +#include + +void +kgdb_console_write(struct console *co, const char *s, unsigned count) +{ + + if (gdb_i386vector == -1) { + /* + * We have not yet talked to gdb. What to do... + * lets break, on continue we can do the write. + * But first tell him whats up. Uh, well no can do, + * as this IS the console. Oh well... + * We do need to wait or the messages will be lost. + * Other option would be to tell the above code to + * ignore this breakpoint and do an auto return, + * but that might confuse gdb. Also this happens + * early enough in boot up that we don't have the traps + * set up yet, so... + */ + breakpoint(); + } + kgdb_gdb_message(s, count); +} + +/* + * ------------------------------------------------------------ + * Serial KGDB driver + * ------------------------------------------------------------ + */ + +static struct console kgdbcons = { + name:"kgdb", + write:kgdb_console_write, +#ifdef CONFIG_KGDB_USER_CONSOLE + device:kgdb_console_device, +#endif + flags:CON_PRINTBUFFER | CON_ENABLED, + index:-1, +}; + +/* + * The trick here is that this file gets linked before printk.o + * That means we get to peer at the console info in the command + * line before it does. If we are up, we register, otherwise, + * do nothing. By returning 0, we allow printk to look also. + */ +static int kgdb_console_enabled; + +int __init +kgdb_console_init(char *str) +{ + if ((strncmp(str, "kgdb", 4) == 0) || (strncmp(str, "gdb", 3) == 0)) { + register_console(&kgdbcons); + kgdb_console_enabled = 1; + } + return 0; /* let others look at the string */ +} + +__setup("console=", kgdb_console_init); + +#ifdef CONFIG_KGDB_USER_CONSOLE +static kdev_t kgdb_console_device(struct console *c); +/* This stuff sort of works, but it knocks out telnet devices + * we are leaving it here in case we (or you) find time to figure it out + * better.. + */ + +/* + * We need a real char device as well for when the console is opened for user + * space activities. + */ + +static int +kgdb_consdev_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static ssize_t +kgdb_consdev_write(struct file *file, const char *buf, + size_t count, loff_t * ppos) +{ + int size, ret = 0; + static char kbuf[128]; + static DECLARE_MUTEX(sem); + + /* We are not reentrant... */ + if (down_interruptible(&sem)) + return -ERESTARTSYS; + + while (count > 0) { + /* need to copy the data from user space */ + size = count; + if (size > sizeof (kbuf)) + size = sizeof (kbuf); + if (copy_from_user(kbuf, buf, size)) { + ret = -EFAULT; + break;; + } + kgdb_console_write(&kgdbcons, kbuf, size); + count -= size; + ret += size; + buf += size; + } + + up(&sem); + + return ret; +} + +struct file_operations kgdb_consdev_fops = { + open:kgdb_consdev_open, + write:kgdb_consdev_write +}; +static kdev_t +kgdb_console_device(struct console *c) +{ + return MKDEV(TTYAUX_MAJOR, 1); +} + +/* + * This routine gets called from the serial stub in the i386/lib + * This is so it is done late in bring up (just before the console open). + */ +void +kgdb_console_finit(void) +{ + if (kgdb_console_enabled) { + char *cptr = cdevname(MKDEV(TTYAUX_MAJOR, 1)); + char *cp = cptr; + while (*cptr && *cptr != '(') + cptr++; + *cptr = 0; + unregister_chrdev(TTYAUX_MAJOR, cp); + register_chrdev(TTYAUX_MAJOR, "kgdb", &kgdb_consdev_fops); + } +} +#endif +#endif +#ifdef CONFIG_KGDB_TS +#include /* time stamp code */ +#include /* in_interrupt */ +#ifdef CONFIG_KGDB_TS_64 +#define DATA_POINTS 64 +#endif +#ifdef CONFIG_KGDB_TS_128 +#define DATA_POINTS 128 +#endif +#ifdef CONFIG_KGDB_TS_256 +#define DATA_POINTS 256 +#endif +#ifdef CONFIG_KGDB_TS_512 +#define DATA_POINTS 512 +#endif +#ifdef CONFIG_KGDB_TS_1024 +#define DATA_POINTS 1024 +#endif +#ifndef DATA_POINTS +#define DATA_POINTS 128 /* must be a power of two */ +#endif +#define INDEX_MASK (DATA_POINTS - 1) +#if (INDEX_MASK & DATA_POINTS) +#error "CONFIG_KGDB_TS_COUNT must be a power of 2" +#endif +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + int data0; + int data1; +}; +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + struct task_struct *task; + long long at_time; + int from_ln; + char *in_src; + void *from; + int *with_shpf; + struct task_struct *t1; + struct task_struct *t2; +}; +struct kgdb_and_then_struct kgdb_data[DATA_POINTS]; + +struct kgdb_and_then_struct *kgdb_and_then = &kgdb_data[0]; +int kgdb_and_then_count; + +void +kgdb_tstamp(int line, char *source, int data0, int data1) +{ + static spinlock_t ts_spin = SPIN_LOCK_UNLOCKED; + unsigned long flags; + + local_irq_save(flags); + spin_lock(&ts_spin); + rdtscll(kgdb_and_then->at_time); +#ifdef CONFIG_SMP + kgdb_and_then->on_cpu = smp_processor_id(); +#endif + kgdb_and_then->task = current; + kgdb_and_then->from_ln = line; + kgdb_and_then->in_src = source; + kgdb_and_then->from = __builtin_return_address(0); + kgdb_and_then->with_shpf = (int *)(long)(((flags & IF_BIT) >> 9) | + (preempt_count() << 8)); + kgdb_and_then->data0 = data0; + kgdb_and_then->data1 = data1; + kgdb_and_then = &kgdb_data[++kgdb_and_then_count & INDEX_MASK]; + spin_unlock(&ts_spin); + local_irq_restore(flags); +#ifdef CONFIG_PREEMPT + +#endif + return; +} +#endif +typedef int gdb_debug_hook(int exceptionVector, + int signo, int err_code, struct pt_regs *linux_regs); +gdb_debug_hook *linux_debug_hook = &kgdb_handle_exception; /* histerical reasons... */ + +static int kgdb_need_breakpoint[NR_CPUS]; + +void kgdb_schedule_breakpoint(void) +{ + kgdb_need_breakpoint[smp_processor_id()] = 1; +} + +void kgdb_process_breakpoint(void) +{ + /* + * Handle a breakpoint queued from inside network driver code + * to avoid reentrancy issues + */ + if (kgdb_need_breakpoint[smp_processor_id()]) { + kgdb_need_breakpoint[smp_processor_id()] = 0; + kgdb_enabled = 1; + BREAKPOINT; + } +} + --- linux-2.6.6-rc1/arch/x86_64/kernel/Makefile 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/x86_64/kernel/Makefile 2004-04-18 22:25:31.600026544 -0700 @@ -8,7 +8,7 @@ obj-y := process.o semaphore.o signal.o ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o warmreboot.o -obj-y += mce.o acpi/ +obj-y += mce.o obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ obj-$(CONFIG_ACPI_BOOT) += acpi/ @@ -27,6 +27,7 @@ obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_KGDB) += kgdb_stub.o obj-y += topology.o --- linux-2.6.6-rc1/arch/x86_64/kernel/process.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/x86_64/kernel/process.c 2004-04-18 22:25:54.260581616 -0700 @@ -573,12 +573,6 @@ asmlinkage long sys_vfork(struct pt_regs NULL, NULL); } -/* - * These bracket the sleeping functions.. - */ -#define first_sched ((unsigned long) scheduling_functions_start_here) -#define last_sched ((unsigned long) scheduling_functions_end_here) - unsigned long get_wchan(struct task_struct *p) { unsigned long stack; @@ -595,14 +589,12 @@ unsigned long get_wchan(struct task_stru if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) return 0; rip = *(u64 *)(fp+8); - if (rip < first_sched || rip >= last_sched) + if (!in_sched_functions(rip)) return rip; fp = *(u64 *)fp; } while (count++ < 16); return 0; } -#undef last_sched -#undef first_sched long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) { --- linux-2.6.6-rc1/arch/x86_64/kernel/setup.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/arch/x86_64/kernel/setup.c 2004-04-18 22:25:35.902372488 -0700 @@ -100,7 +100,6 @@ extern int root_mountflags; extern char _text, _etext, _edata, _end; char command_line[COMMAND_LINE_SIZE]; -char saved_command_line[COMMAND_LINE_SIZE]; struct resource standard_io_resources[] = { { "dma1", 0x00, 0x1f, IORESOURCE_BUSY | IORESOURCE_IO }, --- linux-2.6.6-rc1/arch/x86_64/kernel/signal.c 2004-01-09 00:04:31.000000000 -0800 +++ 25/arch/x86_64/kernel/signal.c 2004-04-18 22:25:45.740876808 -0700 @@ -236,7 +236,8 @@ get_stack(struct k_sigaction *ka, struct return (void *)round_down(rsp - size, 16); } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static void setup_rt_frame(int sig, struct k_sigaction *ka_copy, + siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe *frame; @@ -245,7 +246,7 @@ static void setup_rt_frame(int sig, stru struct task_struct *me = current; if (me->used_math) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka_copy, regs, sizeof(struct _fpstate)); frame = (void *)round_down((u64)fp - sizeof(struct rt_sigframe), 16) - 8; if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) { @@ -255,14 +256,14 @@ static void setup_rt_frame(int sig, stru if (save_i387(fp) < 0) err |= -1; } else { - frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; + frame = get_stack(ka_copy, regs, sizeof(struct rt_sigframe)) - 8; } if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) { goto give_sigsegv; } - if (ka->sa.sa_flags & SA_SIGINFO) { + if (ka_copy->sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, info); if (err) { goto give_sigsegv; @@ -288,10 +289,10 @@ static void setup_rt_frame(int sig, stru /* Set up to return from userspace. If provided, use a stub already in userspace. */ /* x86-64 should always use SA_RESTORER. */ - if (ka->sa.sa_flags & SA_RESTORER) { - err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); + if (ka_copy->sa.sa_flags & SA_RESTORER) { + err |= __put_user(ka_copy->sa.sa_restorer, &frame->pretcode); } else { - printk("%s forgot to set SA_RESTORER for signal %d.\n", me->comm, sig); + /* could use a vstub here */ goto give_sigsegv; } @@ -317,7 +318,7 @@ static void setup_rt_frame(int sig, stru next argument after the signal number on the stack. */ regs->rsi = (unsigned long)&frame->info; regs->rdx = (unsigned long)&frame->uc; - regs->rip = (unsigned long) ka->sa.sa_handler; + regs->rip = (unsigned long) ka_copy->sa.sa_handler; regs->rsp = (unsigned long)frame; @@ -333,7 +334,7 @@ static void setup_rt_frame(int sig, stru give_sigsegv: if (sig == SIGSEGV) - ka->sa.sa_handler = SIG_DFL; + current->sighand->action[SIGSEGV-1].sa.sa_handler = SIG_DFL; signal_fault(regs,frame,"signal deliver"); } @@ -342,11 +343,10 @@ give_sigsegv: */ static void -handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset, - struct pt_regs * regs) +handle_signal(unsigned long sig, siginfo_t *info, + struct k_sigaction *ka_copy, sigset_t *oldset, + struct pt_regs * regs) { - struct k_sigaction *ka = ¤t->sighand->action[sig-1]; - #if DEBUG_SIG printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig, regs->rip, regs->rsp, regs); @@ -362,7 +362,7 @@ handle_signal(unsigned long sig, siginfo break; case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { + if (!(ka_copy->sa.sa_flags & SA_RESTART)) { regs->rax = -EINTR; break; } @@ -375,20 +375,18 @@ handle_signal(unsigned long sig, siginfo #ifdef CONFIG_IA32_EMULATION if (test_thread_flag(TIF_IA32)) { - if (ka->sa.sa_flags & SA_SIGINFO) - ia32_setup_rt_frame(sig, ka, info, oldset, regs); + if (ka_copy->sa.sa_flags & SA_SIGINFO) + ia32_setup_rt_frame(sig, ka_copy, info, oldset, regs); else - ia32_setup_frame(sig, ka, oldset, regs); + ia32_setup_frame(sig, ka_copy, oldset, regs); } else #endif - setup_rt_frame(sig, ka, info, oldset, regs); - - if (ka->sa.sa_flags & SA_ONESHOT) - ka->sa.sa_handler = SIG_DFL; + setup_rt_frame(sig, ka_copy, info, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (!(ka_copy->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked,¤t->blocked, + &ka_copy->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); @@ -402,6 +400,7 @@ handle_signal(unsigned long sig, siginfo */ int do_signal(struct pt_regs *regs, sigset_t *oldset) { + struct k_sigaction ka_copy; siginfo_t info; int signr; @@ -423,7 +422,7 @@ int do_signal(struct pt_regs *regs, sigs if (!oldset) oldset = ¤t->blocked; - signr = get_signal_to_deliver(&info, regs, NULL); + signr = get_signal_to_deliver(&info, &ka_copy, regs, NULL); if (signr > 0) { /* Reenable any watchpoints before delivering the * signal to user space. The processor register will @@ -434,7 +433,7 @@ int do_signal(struct pt_regs *regs, sigs asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg7)); /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, oldset, regs); + handle_signal(signr, &info, &ka_copy, oldset, regs); return 1; } --- linux-2.6.6-rc1/arch/x86_64/kernel/smp.c 2003-11-23 19:03:00.000000000 -0800 +++ 25/arch/x86_64/kernel/smp.c 2004-04-18 22:25:31.600026544 -0700 @@ -362,6 +362,18 @@ void smp_send_reschedule(int cpu) send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); } +#ifdef CONFIG_KGDB +/* + * By using the NMI code instead of a vector we just sneak thru the + * word generator coming out with just what we want. AND it does + * not matter if clustered_apic_mode is set or not. + */ +void smp_send_nmi_allbutself(void) +{ + send_IPI_allbutself(APIC_DM_NMI); +} +#endif + /* * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. --- linux-2.6.6-rc1/arch/x86_64/kernel/traps.c 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/x86_64/kernel/traps.c 2004-04-18 22:25:31.601026392 -0700 @@ -45,6 +45,9 @@ #include #include +#ifdef CONFIG_KGDB +#include +#endif extern struct gate_struct idt_table[256]; --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/arch/x86_64/lib/kgdb_serial.c 2004-04-18 22:25:31.604025936 -0700 @@ -0,0 +1,490 @@ +/* + * Serial interface GDB stub + * + * Written (hacked together) by David Grothe (dave@gcom.com) + * Modified to allow invokation early in boot see also + * kgdb.h for instructions by George Anzinger(george@mvista.com) + * Modified to handle debugging over ethernet by Robert Walsh + * and wangdi , based on + * code by San Mehat. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_KGDB_USER_CONSOLE +extern void kgdb_console_finit(void); +#endif +#define PRNT_off +#define TEST_EXISTANCE +#ifdef PRNT +#define dbprintk(s) printk s +#else +#define dbprintk(s) +#endif +#define TEST_INTERRUPT_off +#ifdef TEST_INTERRUPT +#define intprintk(s) printk s +#else +#define intprintk(s) +#endif + +#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT) + +#define GDB_BUF_SIZE 512 /* power of 2, please */ + +static char gdb_buf[GDB_BUF_SIZE]; +static int gdb_buf_in_inx; +static atomic_t gdb_buf_in_cnt; +static int gdb_buf_out_inx; + +struct async_struct *gdb_async_info; +static int gdb_async_irq; + +#define outb_px(a,b) outb_p(b,a) + +static void program_uart(struct async_struct *info); +static void write_char(struct async_struct *info, int chr); +/* + * Get a byte from the hardware data buffer and return it + */ +static int +read_data_bfr(struct async_struct *info) +{ + char it = inb_p(info->port + UART_LSR); + + if (it & UART_LSR_DR) + return (inb_p(info->port + UART_RX)); + /* + * If we have a framing error assume somebody messed with + * our uart. Reprogram it and send '-' both ways... + */ + if (it & 0xc) { + program_uart(info); + write_char(info, '-'); + return ('-'); + } + return (-1); + +} /* read_data_bfr */ + +/* + * Get a char if available, return -1 if nothing available. + * Empty the receive buffer first, then look at the interface hardware. + + * Locking here is a bit of a problem. We MUST not lock out communication + * if we are trying to talk to gdb about a kgdb entry. ON the other hand + * we can loose chars in the console pass thru if we don't lock. It is also + * possible that we could hold the lock or be waiting for it when kgdb + * NEEDS to talk. Since kgdb locks down the world, it does not need locks. + * We do, of course have possible issues with interrupting a uart operation, + * but we will just depend on the uart status to help keep that straight. + + */ +static spinlock_t uart_interrupt_lock = SPIN_LOCK_UNLOCKED; +#ifdef CONFIG_SMP +extern spinlock_t kgdb_spinlock; +#endif + +static int +read_char(struct async_struct *info) +{ + int chr; + unsigned long flags; + local_irq_save(flags); +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_lock(&uart_interrupt_lock); + } +#endif + if (atomic_read(&gdb_buf_in_cnt) != 0) { /* intr routine has q'd chars */ + chr = gdb_buf[gdb_buf_out_inx++]; + gdb_buf_out_inx &= (GDB_BUF_SIZE - 1); + atomic_dec(&gdb_buf_in_cnt); + } else { + chr = read_data_bfr(info); + } +#ifdef CONFIG_SMP + if (!spin_is_locked(&kgdb_spinlock)) { + spin_unlock(&uart_interrupt_lock); + } +#endif + local_irq_restore(flags); + return (chr); +} + +/* + * Wait until the interface can accept a char, then write it. + */ +static void +write_char(struct async_struct *info, int chr) +{ + while (!(inb_p(info->port + UART_LSR) & UART_LSR_THRE)) ; + + outb_p(chr, info->port + UART_TX); + +} /* write_char */ + +/* + * Mostly we don't need a spinlock, but since the console goes + * thru here with interrutps on, well, we need to catch those + * chars. + */ +/* + * This is the receiver interrupt routine for the GDB stub. + * It will receive a limited number of characters of input + * from the gdb host machine and save them up in a buffer. + * + * When the gdb stub routine tty_getDebugChar() is called it + * draws characters out of the buffer until it is empty and + * then reads directly from the serial port. + * + * We do not attempt to write chars from the interrupt routine + * since the stubs do all of that via tty_putDebugChar() which + * writes one byte after waiting for the interface to become + * ready. + * + * The debug stubs like to run with interrupts disabled since, + * after all, they run as a consequence of a breakpoint in + * the kernel. + * + * Perhaps someone who knows more about the tty driver than I + * care to learn can make this work for any low level serial + * driver. + */ +static irqreturn_t +gdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) +{ + struct async_struct *info; + unsigned long flags; + + info = gdb_async_info; + if (!info || !info->tty || irq != gdb_async_irq) + return IRQ_NONE; + + local_irq_save(flags); + spin_lock(&uart_interrupt_lock); + do { + int chr = read_data_bfr(info); + intprintk(("Debug char on int: %x hex\n", chr)); + if (chr < 0) + continue; + + if (chr == 3) { /* Ctrl-C means remote interrupt */ + BREAKPOINT; + continue; + } + + if (atomic_read(&gdb_buf_in_cnt) >= GDB_BUF_SIZE) { + /* buffer overflow tosses early char */ + read_char(info); + } + gdb_buf[gdb_buf_in_inx++] = chr; + gdb_buf_in_inx &= (GDB_BUF_SIZE - 1); + } while (inb_p(info->port + UART_IIR) & UART_IIR_RDI); + spin_unlock(&uart_interrupt_lock); + local_irq_restore(flags); + return IRQ_HANDLED; +} /* gdb_interrupt */ + +/* + * Just a NULL routine for testing. + */ +void +gdb_null(void) +{ +} /* gdb_null */ + +/* These structure are filled in with values defined in asm/kgdb_local.h + */ +static struct serial_state state = SB_STATE; +static struct async_struct local_info = SB_INFO; +static int ok_to_enable_ints = 0; +static void kgdb_enable_ints_now(void); + +extern char *kgdb_version; +/* + * Hook an IRQ for KGDB. + * + * This routine is called from tty_putDebugChar, below. + */ +static int ints_disabled = 1; +int +gdb_hook_interrupt(struct async_struct *info, int verb) +{ + struct serial_state *state = info->state; + unsigned long flags; + int port; +#ifdef TEST_EXISTANCE + int scratch, scratch2; +#endif + + /* The above fails if memory managment is not set up yet. + * Rather than fail the set up, just keep track of the fact + * and pick up the interrupt thing later. + */ + gdb_async_info = info; + port = gdb_async_info->port; + gdb_async_irq = state->irq; + if (verb) { + printk("kgdb %s : port =%x, IRQ=%d, divisor =%d\n", + kgdb_version, + port, + gdb_async_irq, gdb_async_info->state->custom_divisor); + } + local_irq_save(flags); +#ifdef TEST_EXISTANCE + /* Existance test */ + /* Should not need all this, but just in case.... */ + + scratch = inb_p(port + UART_IER); + outb_px(port + UART_IER, 0); + outb_px(0xff, 0x080); + scratch2 = inb_p(port + UART_IER); + outb_px(port + UART_IER, scratch); + if (scratch2) { + printk + ("gdb_hook_interrupt: Could not clear IER, not a UART!\n"); + local_irq_restore(flags); + return 1; /* We failed; there's nothing here */ + } + scratch2 = inb_p(port + UART_LCR); + outb_px(port + UART_LCR, 0xBF); /* set up for StarTech test */ + outb_px(port + UART_EFR, 0); /* EFR is the same as FCR */ + outb_px(port + UART_LCR, 0); + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO); + scratch = inb_p(port + UART_IIR) >> 6; + if (scratch == 1) { + printk("gdb_hook_interrupt: Undefined UART type!" + " Not a UART! \n"); + local_irq_restore(flags); + return 1; + } else { + dbprintk(("gdb_hook_interrupt: UART type " + "is %d where 0=16450, 2=16550 3=16550A\n", scratch)); + } + scratch = inb_p(port + UART_MCR); + outb_px(port + UART_MCR, UART_MCR_LOOP | scratch); + outb_px(port + UART_MCR, UART_MCR_LOOP | 0x0A); + scratch2 = inb_p(port + UART_MSR) & 0xF0; + outb_px(port + UART_MCR, scratch); + if (scratch2 != 0x90) { + printk("gdb_hook_interrupt: " + "Loop back test failed! Not a UART!\n"); + local_irq_restore(flags); + return scratch2 + 1000; /* force 0 to fail */ + } +#endif /* test existance */ + program_uart(info); + local_irq_restore(flags); + + return (0); + +} /* gdb_hook_interrupt */ + +static void +program_uart(struct async_struct *info) +{ + int port = info->port; + + (void) inb_p(port + UART_RX); + outb_px(port + UART_IER, 0); + + (void) inb_p(port + UART_RX); /* serial driver comments say */ + (void) inb_p(port + UART_IIR); /* this clears the interrupt regs */ + (void) inb_p(port + UART_MSR); + outb_px(port + UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); + outb_px(port + UART_DLL, info->state->custom_divisor & 0xff); /* LS */ + outb_px(port + UART_DLM, info->state->custom_divisor >> 8); /* MS */ + outb_px(port + UART_MCR, info->MCR); + + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1 | UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR); /* set fcr */ + outb_px(port + UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ + outb_px(port + UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1); /* set fcr */ + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + return; +} + +/* + * tty_getDebugChar + * + * This is a GDB stub routine. It waits for a character from the + * serial interface and then returns it. If there is no serial + * interface connection then it returns a bogus value which will + * almost certainly cause the system to hang. In the + */ +int kgdb_in_isr = 0; +int kgdb_in_lsr = 0; +extern spinlock_t kgdb_spinlock; + +/* Caller takes needed protections */ + +int +tty_getDebugChar(void) +{ + volatile int chr, dum, time, end_time; + + dbprintk(("tty_getDebugChar(port %x): ", gdb_async_info->port)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + /* + * This trick says if we wait a very long time and get + * no char, return the -1 and let the upper level deal + * with it. + */ + rdtsc(dum, time); + end_time = time + 2; + while (((chr = read_char(gdb_async_info)) == -1) && + (end_time - time) > 0) { + rdtsc(dum, time); + }; + /* + * This covers our butts if some other code messes with + * our uart, hay, it happens :o) + */ + if (chr == -1) + program_uart(gdb_async_info); + + dbprintk(("%c\n", chr > ' ' && chr < 0x7F ? chr : ' ')); + return (chr); + +} /* tty_getDebugChar */ + +static int count = 3; +static spinlock_t one_at_atime = SPIN_LOCK_UNLOCKED; + +static int __init +kgdb_enable_ints(void) +{ + set_debug_traps(); + if (kgdboe) { + return 0; + } + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 1); + } + ok_to_enable_ints = 1; + kgdb_enable_ints_now(); +#ifdef CONFIG_KGDB_USER_CONSOLE + kgdb_console_finit(); +#endif + return 0; +} + +#ifdef CONFIG_SERIAL_8250 +void shutdown_for_kgdb(struct async_struct *gdb_async_info); +#endif + +#define kgdb_mem_init_done() (1) + +static void +kgdb_enable_ints_now(void) +{ + if (!spin_trylock(&one_at_atime)) + return; + if (!ints_disabled) + goto exit; + if (kgdb_mem_init_done() && + ints_disabled) { /* don't try till mem init */ +#ifdef CONFIG_SERIAL_8250 + /* + * The ifdef here allows the system to be configured + * without the serial driver. + * Don't make it a module, however, it will steal the port + */ + shutdown_for_kgdb(gdb_async_info); +#endif + ints_disabled = request_irq(gdb_async_info->state->irq, + gdb_interrupt, + IRQ_T(gdb_async_info), + "KGDB-stub", NULL); + intprintk(("KGDB: request_irq returned %d\n", ints_disabled)); + } + if (!ints_disabled) { + intprintk(("KGDB: Sending %d to port %x offset %d\n", + gdb_async_info->IER, + (int) gdb_async_info->port, UART_IER)); + outb_px(gdb_async_info->port + UART_IER, gdb_async_info->IER); + } + exit: + spin_unlock(&one_at_atime); +} + +/* + * tty_putDebugChar + * + * This is a GDB stub routine. It waits until the interface is ready + * to transmit a char and then sends it. If there is no serial + * interface connection then it simply returns to its caller, having + * pretended to send the char. Caller takes needed protections. + */ +void +tty_putDebugChar(int chr) +{ + dbprintk(("tty_putDebugChar(port %x): chr=%02x '%c', ints_on=%d\n", + gdb_async_info->port, + chr, + chr > ' ' && chr < 0x7F ? chr : ' ', ints_disabled ? 0 : 1)); + + if (gdb_async_info == NULL) { + gdb_hook_interrupt(&local_info, 0); + } + + write_char(gdb_async_info, chr); /* this routine will wait */ + count = (chr == '#') ? 0 : count + 1; + if ((count == 2)) { /* try to enable after */ + if (ints_disabled & ok_to_enable_ints) + kgdb_enable_ints_now(); /* try to enable after */ + + /* We do this a lot because, well we really want to get these + * interrupts. The serial driver will clear these bits when it + * initializes the chip. Every thing else it does is ok, + * but this. + */ + if (!ints_disabled) { + outb_px(gdb_async_info->port + UART_IER, + gdb_async_info->IER); + } + } + +} /* tty_putDebugChar */ + +/* + * This does nothing for the serial port, since it doesn't buffer. + */ + +void tty_flushDebugChar(void) +{ +} + +module_init(kgdb_enable_ints); --- linux-2.6.6-rc1/arch/x86_64/lib/Makefile 2004-04-03 20:39:11.000000000 -0800 +++ 25/arch/x86_64/lib/Makefile 2004-04-18 22:25:31.604025936 -0700 @@ -10,3 +10,4 @@ lib-y := csum-partial.o csum-copy.o csum lib-y += memcpy.o memmove.o memset.o copy_user.o lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o +lib-$(CONFIG_KGDB) += kgdb_serial.o --- linux-2.6.6-rc1/crypto/Kconfig 2004-04-03 20:39:11.000000000 -0800 +++ 25/crypto/Kconfig 2004-04-18 22:25:24.670080056 -0700 @@ -146,9 +146,10 @@ config CRYPTO_ARC4 help ARC4 cipher algorithm. - This is a stream cipher using keys ranging from 8 bits to 2048 - bits in length. ARC4 is commonly used in protocols such as WEP - and SSL. + ARC4 is a stream cipher using keys ranging from 8 bits to 2048 + bits in length. This algorithm is required for driver-based + WEP, but it should not be for other purposes because of the + weakness of the algorithm. config CRYPTO_DEFLATE tristate "Deflate compression algorithm" --- linux-2.6.6-rc1/Documentation/binfmt_misc.txt 2003-10-08 15:07:08.000000000 -0700 +++ 25/Documentation/binfmt_misc.txt 2004-04-18 22:25:42.868313504 -0700 @@ -15,7 +15,7 @@ First you must mount binfmt_misc: mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc To actually register a new binary type, you have to set up a string looking like -:name:type:offset:magic:mask:interpreter: (where you can choose the ':' upon +:name:type:offset:magic:mask:interpreter:flags (where you can choose the ':' upon your needs) and echo it to /proc/sys/fs/binfmt_misc/register. Here is what the fields mean: - 'name' is an identifier string. A new /proc file will be created with this @@ -34,6 +34,28 @@ Here is what the fields mean: The mask is anded with the byte sequence of the file. - 'interpreter' is the program that should be invoked with the binary as first argument (specify the full path) + - 'flags' is an optional field that controls several aspects of the invocation + of the interpreter. It is a string of capital letters, each controls a certain + aspect. The following flags are supported - + 'P' - preserve-argv[0]. Legacy behavior of binfmt_misc is to overwrite the + original argv[0] with the full path to the binary. When this flag is + included, binfmt_misc will add an argument to the argument vector for + this purpose, thus preserving the original argv[0]. + 'O' - open-binary. Legacy behavior of binfmt_misc is to pass the full path + of the binary to the interpreter as an argument. When this flag is + included, binfmt_misc will open the file for reading and pass its + descriptor as an argument, instead of the full path, thus allowing + the interpreter to execute non-readable binaries. This feature should + be used with care - the interpreter has to be trusted not to emit + the contents of the non-readable binary. + 'C' - credentials. Currently, the behavior of binfmt_misc is to calculate + the credentials and security token of the new process according to + the interpreter. When this flag is included, these attributes are + calculated according to the binary. It also implies the 'O' flag. + This feature should be used with care as the interpreter + will run with root permissions when a setuid binary owned by root + is run with binfmt_misc. + There are some restrictions: - the whole register string may not exceed 255 characters @@ -83,9 +105,9 @@ If you want to pass special arguments to write a wrapper script for it. See Documentation/java.txt for an example. -Your interpreter should NOT look in the PATH for the filename; the -kernel passes it the full filename to use. Using the PATH can cause -unexpected behaviour and be a security hazard. +Your interpreter should NOT look in the PATH for the filename; the kernel +passes it the full filename (or the file descriptor) to use. Using $PATH can +cause unexpected behaviour and can be a security hazard. There is a web page about binfmt_misc at --- linux-2.6.6-rc1/Documentation/DMA-mapping.txt 2004-04-03 20:39:10.000000000 -0800 +++ 25/Documentation/DMA-mapping.txt 2004-04-18 22:25:24.628086440 -0700 @@ -132,7 +132,7 @@ exactly why. The standard 32-bit addressing PCI device would do something like this: - if (pci_set_dma_mask(pdev, 0xffffffff)) { + if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { printk(KERN_WARNING "mydev: No suitable DMA available.\n"); goto ignore_this_device; @@ -151,9 +151,9 @@ all 64-bits when accessing streaming DMA int using_dac; - if (!pci_set_dma_mask(pdev, 0xffffffffffffffff)) { + if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { using_dac = 1; - } else if (!pci_set_dma_mask(pdev, 0xffffffff)) { + } else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { using_dac = 0; } else { printk(KERN_WARNING @@ -166,14 +166,14 @@ the case would look like this: int using_dac, consistent_using_dac; - if (!pci_set_dma_mask(pdev, 0xffffffffffffffff)) { + if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { using_dac = 1; consistent_using_dac = 1; - pci_set_consistent_dma_mask(pdev, 0xffffffffffffffff) - } else if (!pci_set_dma_mask(pdev, 0xffffffff)) { + pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); + } else if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { using_dac = 0; consistent_using_dac = 0; - pci_set_consistent_dma_mask(pdev, 0xffffffff) + pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); } else { printk(KERN_WARNING "mydev: No suitable DMA available.\n"); @@ -215,7 +215,7 @@ most specific mask. Here is pseudo-code showing how this might be done: - #define PLAYBACK_ADDRESS_BITS 0xffffffff + #define PLAYBACK_ADDRESS_BITS DMA_32BIT_MASK #define RECORD_ADDRESS_BITS 0x00ffffff struct my_sound_card *card; --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/i386/kgdb/andthen 2004-04-18 22:25:30.324220496 -0700 @@ -0,0 +1,100 @@ + +define set_andthen + set var $thp=0 + set var $thp=(struct kgdb_and_then_struct *)&kgdb_data[0] + set var $at_size = (sizeof kgdb_data)/(sizeof *$thp) + set var $at_oc=kgdb_and_then_count + set var $at_cc=$at_oc +end + +define andthen_next + set var $at_cc=$arg0 +end + +define andthen + andthen_set_edge + if ($at_cc >= $at_oc) + printf "Outside window. Window size is %d\n",($at_oc-$at_low) + else + printf "%d: ",$at_cc + output *($thp+($at_cc++ % $at_size )) + printf "\n" + end +end +define andthen_set_edge + set var $at_oc=kgdb_and_then_count + set var $at_low = $at_oc - $at_size + if ($at_low < 0 ) + set var $at_low = 0 + end + if (( $at_cc > $at_oc) || ($at_cc < $at_low)) + printf "Count outside of window, setting count to " + if ($at_cc >= $at_oc) + set var $at_cc = $at_oc + else + set var $at_cc = $at_low + end + printf "%d\n",$at_cc + end +end + +define beforethat + andthen_set_edge + if ($at_cc <= $at_low) + printf "Outside window. Window size is %d\n",($at_oc-$at_low) + else + printf "%d: ",$at_cc-1 + output *($thp+(--$at_cc % $at_size )) + printf "\n" + end +end + +document andthen_next + andthen_next + . sets the number of the event to display next. If this event + . is not in the event pool, either andthen or beforethat will + . correct it to the nearest event pool edge. The event pool + . ends at the last event recorded and begins + . prior to that. If beforethat is used next, it will display + . event -1. +. + andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + + +document andthen + andthen +. displays the next event in the list. sets up to display +. the oldest saved event first. +. (optional) count of the event to display. +. note the number of events saved is specified at configure time. +. if events are saved between calls to andthen the index will change +. but the displayed event will be the next one (unless the event buffer +. is overrun). +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + +document set_andthen + set_andthen +. sets up to use the and commands. +. if you have defined your own struct, use the above and +. then enter the following: +. p $thp=(struct kgdb_and_then_structX *)&kgdb_data[0] +. where is the name of your structure. +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end + +document beforethat + beforethat +. displays the next prior event in the list. sets up to +. display the last occuring event first. +. +. note the number of events saved is specified at configure time. +. if events are saved between calls to beforethat the index will change +. but the displayed event will be the next one (unless the event buffer +. is overrun). +. +. andthen commands are: set_andthen, andthen_next, andthen and beforethat +end --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/i386/kgdb/debug-nmi.txt 2004-04-18 22:25:30.325220344 -0700 @@ -0,0 +1,37 @@ +Subject: Debugging with NMI +Date: Mon, 12 Jul 1999 11:28:31 -0500 +From: David Grothe +Organization: Gcom, Inc +To: David Grothe + +Kernel hackers: + +Maybe this is old hat, but it is new to me -- + +On an ISA bus machine, if you short out the A1 and B1 pins of an ISA +slot you will generate an NMI to the CPU. This interrupts even a +machine that is hung in a loop with interrupts disabled. Used in +conjunction with kgdb < +ftp://ftp.gcom.com/pub/linux/src/kgdb-2.3.35/kgdb-2.3.35.tgz > you can +gain debugger control of a machine that is hung in the kernel! Even +without kgdb the kernel will print a stack trace so you can find out +where it was hung. + +The A1/B1 pins are directly opposite one another and the farthest pins +towards the bracket end of the ISA bus socket. You can stick a paper +clip or multi-meter probe between them to short them out. + +I had a spare ISA bus to PC104 bus adapter around. The PC104 end of the +board consists of two rows of wire wrap pins. So I wired a push button +between the A1/B1 pins and now have an ISA board that I can stick into +any ISA bus slot for debugger entry. + +Microsoft has a circuit diagram of a PCI card at +http://www.microsoft.com/hwdev/DEBUGGING/DMPSW.HTM. If you want to +build one you will have to mail them and ask for the PAL equations. +Nobody makes one comercially. + +[THIS TIP COMES WITH NO WARRANTY WHATSOEVER. It works for me, but if +your machine catches fire, it is your problem, not mine.] + +-- Dave (the kgdb guy) --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdb-globals.txt 2004-04-18 22:25:30.325220344 -0700 @@ -0,0 +1,71 @@ +Sender: akale@veritas.com +Date: Fri, 23 Jun 2000 19:26:35 +0530 +From: "Amit S. Kale" +Organization: Veritas Software (India) +To: Dave Grothe , linux-kernel@vger.rutgers.edu +CC: David Milburn , + "Edouard G. Parmelan" , + ezannoni@cygnus.com, Keith Owens +Subject: Re: Module debugging using kgdb + +Dave Grothe wrote: +> +> Amit: +> +> There is a 2.4.0 version of kgdb on our ftp site: +> ftp://ftp.gcom.com/pub/linux/src/kgdb. I mirrored your version of gdb +> and loadmodule.sh there. +> +> Have a look at the README file and see if I go it right. If not, send +> me some corrections and I will update it. +> +> Does your version of gdb solve the global variable problem? + +Yes. +Thanks to Elena Zanoni, gdb (developement version) can now calculate +correctly addresses of dynamically loaded object files. I have not been +following gdb developement for sometime and am not sure when symbol +address calculation fix is going to appear in a gdb stable version. + +Elena, any idea when the fix will make it to a prebuilt gdb from a +redhat release? + +For the time being I have built a gdb developement version. It can be +used for module debugging with loadmodule.sh script. + +The problem with calculating of module addresses with previous versions +of gdb was as follows: +gdb did not use base address of a section while calculating address of +a symbol in the section in an object file loaded via 'add-symbol-file'. +It used address of .text segment instead. Due to this addresses of +symbols in .data, .bss etc. (e.g. global variables) were calculated incorrectly. + +Above mentioned fix allow gdb to use base address of a segment while +calculating address of a symbol in it. It adds a parameter '-s' to +'add-symbol-file' command for specifying base address of a segment. + +loadmodule.sh script works as follows. + +1. Copy a module file to target machine. +2. Load the module on the target machine using insmod with -m parameter. +insmod produces a module load map which contains base addresses of all +sections in the module and addresses of symbols in the module file. +3. Find all sections and their base addresses in the module from +the module map. +4. Generate a script that loads the module file. The script uses +'add-symbol-file' and specifies address of text segment followed by +addresses of all segments in the module. + +Here is an example gdb script produced by loadmodule.sh script. + +add-symbol-file foo 0xd082c060 -s .text.lock 0xd08cbfb5 +-s .fixup 0xd08cfbdf -s .rodata 0xd08cfde0 -s __ex_table 0xd08e3b38 +-s .data 0xd08e3d00 -s .bss 0xd08ec8c0 -s __ksymtab 0xd08ee838 + +With this command gdb can calculate addresses of symbols in ANY segment +in a module file. + +Regards. +-- +Amit Kale +Veritas Software ( http://www.veritas.com ) --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit 2004-04-18 22:25:30.326220192 -0700 @@ -0,0 +1,14 @@ +shell echo -e "\003" >/dev/ttyS0 +set remotebaud 38400 +target remote /dev/ttyS0 +define si +stepi +printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx +printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp +x/i $eip +end +define ni +nexti +printf "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n", $eax, $ebx, $ecx, $edx +printf "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n", $esi, $edi, $ebp, $esp +x/i $eip --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit.hw 2004-04-18 22:25:30.326220192 -0700 @@ -0,0 +1,117 @@ + +#Using ia-32 hardware breakpoints. +# +#4 hardware breakpoints are available in ia-32 processors. These breakpoints +#do not need code modification. They are set using debug registers. +# +#Each hardware breakpoint can be of one of the +#three types: execution, write, access. +#1. An Execution breakpoint is triggered when code at the breakpoint address is +#executed. +#2. A write breakpoint ( aka watchpoints ) is triggered when memory location +#at the breakpoint address is written. +#3. An access breakpoint is triggered when memory location at the breakpoint +#address is either read or written. +# +#As hardware breakpoints are available in limited number, use software +#breakpoints ( br command in gdb ) instead of execution hardware breakpoints. +# +#Length of an access or a write breakpoint defines length of the datatype to +#be watched. Length is 1 for char, 2 short , 3 int. +# +#For placing execution, write and access breakpoints, use commands +#hwebrk, hwwbrk, hwabrk +#To remove a breakpoint use hwrmbrk command. +# +#These commands take following types of arguments. For arguments associated +#with each command, use help command. +#1. breakpointno: 0 to 3 +#2. length: 1 to 3 +#3. address: Memory location in hex ( without 0x ) e.g c015e9bc +# +#Use the command exinfo to find which hardware breakpoint occured. + +#hwebrk breakpointno address +define hwebrk + maintenance packet Y$arg0,0,0,$arg1 +end +document hwebrk + hwebrk
+ Places a hardware execution breakpoint + = 0 - 3 +
= Hex digits without leading "0x". +end + +#hwwbrk breakpointno length address +define hwwbrk + maintenance packet Y$arg0,1,$arg1,$arg2 +end +document hwwbrk + hwwbrk
+ Places a hardware write breakpoint + = 0 - 3 + = 1 (1 byte), 2 (2 byte), 3 (4 byte) +
= Hex digits without leading "0x". +end + +#hwabrk breakpointno length address +define hwabrk + maintenance packet Y$arg0,1,$arg1,$arg2 +end +document hwabrk + hwabrk
+ Places a hardware access breakpoint + = 0 - 3 + = 1 (1 byte), 2 (2 byte), 3 (4 byte) +
= Hex digits without leading "0x". +end + +#hwrmbrk breakpointno +define hwrmbrk + maintenance packet y$arg0 +end +document hwrmbrk + hwrmbrk + = 0 - 3 + Removes a hardware breakpoint +end + +define reboot + maintenance packet r +end +#exinfo +define exinfo + maintenance packet qE +end +document exinfo + exinfo + Gives information about a breakpoint. +end +define get_th + p $th=(struct thread_info *)((int)$esp & ~8191) +end +document get_th + get_tu + Gets and prints the current thread_info pointer, Defines th to be it. +end +define get_cu + p $cu=((struct thread_info *)((int)$esp & ~8191))->task +end +document get_cu + get_cu + Gets and print the "current" value. Defines $cu to be it. +end +define int_off + set var $flags=$eflags + set $eflags=$eflags&~0x200 + end +define int_on + set var $eflags|=$flags&0x200 + end +document int_off + saves the current interrupt state and clears the processor interrupt + flag. Use int_on to restore the saved flag. +end +document int_on + Restores the interrupt flag saved by int_off. +end --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/i386/kgdb/gdbinit-modules 2004-04-18 22:25:30.328219888 -0700 @@ -0,0 +1,146 @@ +# +# Usefull GDB user-command to debug Linux Kernel Modules with gdbstub. +# +# This don't work for Linux-2.0 or older. +# +# Author Edouard G. Parmelan +# +# +# Fri Apr 30 20:33:29 CEST 1999 +# First public release. +# +# Major cleanup after experiment Linux-2.0 kernel without success. +# Symbols of a module are not in the correct order, I can't explain +# why :( +# +# Fri Mar 19 15:41:40 CET 1999 +# Initial version. +# +# Thu Jan 6 16:29:03 CST 2000 +# A little fixing by Dave Grothe +# +# Mon Jun 19 09:33:13 CDT 2000 +# Alignment changes from Edouard Parmelan +# +# The basic idea is to find where insmod load the module and inform +# GDB to load the symbol table of the module with the GDB command +# ``add-symbol-file
''. +# +# The Linux kernel holds the list of all loaded modules in module_list, +# this list end with &kernel_module (exactly with module->next == NULL, +# but the last module is not a real module). +# +# Insmod allocates the struct module before the object file. Since +# Linux-2.1, this structure contain his size. The real address of +# the object file is then (char*)module + module->size_of_struct. +# +# You can use three user functions ``mod-list'', ``mod-print-symbols'' +# and ``add-module-symbols''. +# +# mod-list list all loaded modules with the format: +# +# +# As soon as you have found the address of your module, you can +# print its exported symbols (mod-print-symbols) or inform GDB to add +# symbols from your module file (mod-add-symbols). +# +# The argument that you give to mod-print-symbols or mod-add-symbols +# is the from the mod-list command. +# +# When using the mod-add-symbols command you must also give the full +# pathname of the modules object code file. +# +# The command mod-add-lis is an example of how to make this easier. +# You can edit this macro to contain the path name of your own +# favorite module and then use it as a shorthand to load it. You +# still need the module-address, however. +# +# The internal function ``mod-validate'' set the GDB variable $mod +# as a ``struct module*'' if the kernel known the module otherwise +# $mod is set to NULL. This ensure to not add symbols for a wrong +# address. +# +# Have a nice hacking day ! +# +# +define mod-list + set $mod = (struct module*)module_list + # the last module is the kernel, ignore it + while $mod != &kernel_module + printf "%p\t%s\n", (long)$mod, ($mod)->name + set $mod = $mod->next + end +end +document mod-list +List all modules in the form: +Use the as the argument for the other +mod-commands: mod-print-symbols, mod-add-symbols. +end + +define mod-validate + set $mod = (struct module*)module_list + while ($mod != $arg0) && ($mod != &kernel_module) + set $mod = $mod->next + end + if $mod == &kernel_module + set $mod = 0 + printf "%p is not a module\n", $arg0 + end +end +document mod-validate +mod-validate +Internal user-command used to validate the module parameter. +If is a real loaded module, set $mod to it otherwise set $mod to 0. +end + + +define mod-print-symbols + mod-validate $arg0 + if $mod != 0 + set $i = 0 + while $i < $mod->nsyms + set $sym = $mod->syms[$i] + printf "%p\t%s\n", $sym->value, $sym->name + set $i = $i + 1 + end + end +end +document mod-print-symbols +mod-print-symbols +Print all exported symbols of the module. see mod-list +end + + +define mod-add-symbols-align + mod-validate $arg0 + if $mod != 0 + set $mod_base = ($mod->size_of_struct + (long)$mod) + if ($arg2 != 0) && (($mod_base & ($arg2 - 1)) != 0) + set $mod_base = ($mod_base | ($arg2 - 1)) + 1 + end + add-symbol-file $arg1 $mod_base + end +end +document mod-add-symbols-align +mod-add-symbols-align +Load the symbols table of the module from the object file where +first section aligment is . +To retreive alignment, use `objdump -h '. +end + +define mod-add-symbols + mod-add-symbols-align $arg0 $arg1 sizeof(long) +end +document mod-add-symbols +mod-add-symbols +Load the symbols table of the module from the object file. +Default alignment is 4. See mod-add-symbols-align. +end + +define mod-add-lis + mod-add-symbols-align $arg0 /usr/src/LiS/streams.o 16 +end +document mod-add-lis +mod-add-lis +Does mod-add-symbols /usr/src/LiS/streams.o +end --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/i386/kgdb/kgdbeth.txt 2004-04-18 22:25:30.862138720 -0700 @@ -0,0 +1,92 @@ +KGDB over ethernet +================== + +Authors +------- + +Robert Walsh (2.6 port) +wangdi (2.6 port) +Matt Mackall (netpoll api) +San Mehat (original 2.4 code) + + +Introduction +------------ + +KGDB supports debugging over ethernet (kgdboe) via polling of a given +network interface. Most cards should be supported automatically. +Debugging facilities are available as soon as the network driver and +kgdboe have initialized. Unfortunately, this is too late in the boot +process for debugging some issues, but works quite well for many +others. This should not interfere with normal network usage and +doesn't require a dedicated NIC. + +Terminology +----------- + +This document uses the following terms: + + TARGET: the machine being debugged. + HOST: the machine running gdb. + + +Usage +----- + +You need to use the following command-line option on the TARGET kernel: + + kgdboe=[tgt-port]@/[dev],[host-port]@/[host-macaddr] + + where + tgt-port source for UDP packets (defaults to 6443) + tgt-ip source IP to use (interface address) + dev network interface (eth0) + host-port HOST UDP port (6442) (not really used) + host-ip IP address for HOST machine + host-macaddr ethernet MAC address for HOST (ff:ff:ff:ff:ff:ff) + + examples: + + kgdboe=7000@192.168.0.1/eth1,7001@192.168.0.2/00:05:3C:04:47:5D + this machine is 192.168.0.1 on eth1 + remote machine is 192.168.0.2 with MAC address 00:05:3C:04:47:5D + listen for gdb packets on port 7000 + send unsolicited gdb packets to port 7001 + + kgdboe=@192.168.0.1/,@192.168.0.2/ + this machine is 192.168.0.1 on default interface eth0 + remote machine is 192.168.0.2, use default broadcast MAC address + listen for gdb packets on default port 6443 + send unsolicited gdb packets to port 6442 + +Only packets originating from the configured HOST IP address will be +accepted by the debugger. + +On the HOST side, run gdb as normal and use a remote UDP host as the +target: + + % gdb ./vmlinux + GNU gdb Red Hat Linux (5.3post-0.20021129.18rh) + Copyright 2003 Free Software Foundation, Inc. + GDB is free software, covered by the GNU General Public License, and you are + welcome to change it and/or distribute copies of it under certain conditions. + Type "show copying" to see the conditions. + There is absolutely no warranty for GDB. Type "show warranty" for details. + This GDB was configured as "i386-redhat-linux-gnu"... + (gdb) target remote udp:HOSTNAME:6443 + +You can now continue as if you were debugging over a serial line. + +Limitations +----------- + +The current release of this code is exclusive of using kgdb on a +serial interface, so you must boot without the kgdboe option to use +serial debugging. Trying to debug the network driver while using it +will prove interesting. + +Bug reports +----------- + +Send bug reports to Robert Walsh and Matt +Mackall . --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/i386/kgdb/kgdb.txt 2004-04-18 22:25:30.333219128 -0700 @@ -0,0 +1,775 @@ +Last edit: <20030806.1637.12> +This file has information specific to the i386 kgdb option. Other +platforms with the kgdb option may behave in a similar fashion. + +New features: +============ +20030806.1557.37 +This version was made against the 2.6.0-test2 kernel. We have made the +following changes: + +- The getthread() code in the stub calls find_task_by_pid(). It fails + if we are early in the bring up such that the pid arrays have yet to + be allocated. We have added a line to kernel/pid.c to make + "kgdb_pid_init_done" true once the arrays are allocated. This way the + getthread() code knows not to call. This is only used by the thread + debugging stuff and threads will not yet exist at this point in the + boot. + +- For some reason, gdb was not asking for a new thread list when the + "info thread" command was given. We changed to the newer version of + the thread info command and gdb now seems to ask when needed. Result, + we now get all threads in the thread list. + +- We now respond to the ThreadExtraInfo request from gdb with the thread + name from task_struct .comm. This then appears in the thread list. + Thoughts on additional options for this are welcome. Things such as + "has BKL" and "Preempted" come to mind. I think we could have a flag + word that could enable different bits of info here. + +- We now honor, sort of, the C and S commands. These are continue and + single set after delivering a signal. We ignore the signal and do the + requested action. This only happens when we told gdb that a signal + was the reason for entry, which is only done on memory faults. The + result is that you can now continue into the Oops. + +- We changed the -g to -gdwarf-2. This seems to be the same as -ggdb, + but it is more exact on what language to use. + +- We added two dwarf2 include files and a bit of code at the end of + entry.S. This does not yet work, so it is disabled. Still we want to + keep track of the code and "maybe" someone out there can fix it. + +- Randy Dunlap sent some fix ups for this file which are now merged. + +- Hugh Dickins sent a fix to a bit of code in traps.c that prevents a + compiler warning if CONFIG_KGDB is off (now who would do that :). + +- Andrew Morton sent a fix for the serial driver which is now merged. + +- Andrew also sent a change to the stub around the cpu managment code + which is also merged. + +- Andrew also sent a patch to make "f" as well as "g" work as SysRq + commands to enter kgdb, merged. + +- If CONFIG_KGDB and CONFIG_DEBUG_SPINLOCKS are both set we added a + "who" field to the spinlock data struct. This is filled with + "current" when ever the spinlock suceeds. Useful if you want to know + who has the lock. + +_ And last, but not least, we fixed the "get_cu" macro to properly get + the current value of "current". + +New features: +============ +20030505.1827.27 +We are starting to align with the sourceforge version, at least in +commands. To this end, the boot command string to start kgdb at +boot time has been changed from "kgdb" to "gdb". + +Andrew Morton sent a couple of patches which are now included as follows: +1.) We now return a flag to the interrupt handler. +2.) We no longer use smp_num_cpus (a conflict with the lock meter). +3.) And from William Lee Irwin III code to make + sure high-mem is set up before we attempt to register our interrupt + handler. +We now include asm/kgdb.h from config.h so you will most likely never +have to include it. It also 'NULLS' the kgdb macros you might have in +your code when CONFIG_KGDB is not defined. This allows you to just +turn off CONFIG_KGDB to turn off all the kgdb_ts() calls and such. +This include is conditioned on the machine being an x86 so as to not +mess with other archs. + +20020801.1129.03 +This is currently the version for the 2.4.18 (and beyond?) kernel. + +We have several new "features" beginning with this version: + +1.) Kgdb now syncs the "other" CPUs with a cross-CPU NMI. No more + waiting and it will pull that guy out of an IRQ off spin lock :) + +2.) We doctored up the code that tells where a task is waiting and + included it so that the "info thread" command will show a bit more + than "schedule()". Try it... + +3.) Added the ability to call a function from gdb. All the standard gdb + issues apply, i.e. if you hit a breakpoint in the function, you are + not allowed to call another (gdb limitation, not kgdb). To help + this capability we added a memory allocation function. Gdb does not + return this memory (it is used for strings that you pass to that function + you are calling from gdb) so we fixed up a way to allow you to + manually return the memory (see below). + +4.) Kgdb time stamps (kgdb_ts()) are enhanced to expand what was the + interrupt flag to now also include the preemption count and the + "in_interrupt" info. The flag is now called "with_pif" to indicate + the order, preempt_count, in_interrupt, flag. The preempt_count is + shifted left by 4 bits so you can read the count in hex by dropping + the low order digit. In_interrupt is in bit 1, and the flag is in + bit 0. + +5.) The command: "p kgdb_info" is now expanded and prints something + like: +(gdb) p kgdb_info +$2 = {used_malloc = 0, called_from = 0xc0107506, entry_tsc = 67468627259, + errcode = 0, vector = 3, print_debug_info = 0, hold_on_sstep = 1, + cpus_waiting = {{task = 0xc027a000, pid = 32768, hold = 0, + regs = 0xc027bf84}, {task = 0x0, pid = 0, hold = 0, regs = 0x0}}} + + Things to note here: a.) used_malloc is the amount of memory that + has been malloc'ed to do calls from gdb. You can reclaim this + memory like this: "p kgdb_info.used_malloc=0" Cool, huh? b.) + cpus_waiting is now "sized" by the number of CPUs you enter at + configure time in the kgdb configure section. This is NOT used + anywhere else in the system, but it is "nice" here. c.) The task's + "pid" is now in the structure. This is the pid you will need to use + to decode to the thread id to get gdb to look at that thread. + Remember that the "info thread" command prints a list of threads + wherein it numbers each thread with its reference number followed + by the thread's pid. Note that the per-CPU idle threads actually + have pids of 0 (yes, there is more than one pid 0 in an SMP system). + To avoid confusion, kgdb numbers these threads with numbers beyond + the MAX_PID. That is why you see 32768 and above. + +6.) A subtle change, we now provide the complete register set for tasks + that are active on the other CPUs. This allows better trace back on + those tasks. + + And, let's mention what we could not fix. Back-trace from all but the + thread that we trapped will, most likely, have a bogus entry in it. + The problem is that gdb does not recognize the entry code for + functions that use "current" near (at all?) the entry. The compiler + is putting the "current" decode as the first two instructions of the + function where gdb expects to find %ebp changing code. Back trace + also has trouble with interrupt frames. I am talking with Daniel + Jacobowitz about some way to fix this, but don't hold your breath. + +20011220.0050.35 +Major enhancement with this version is the ability to hold one or more +CPUs in an SMP system while allowing the others to continue. Also, by +default only the current CPU is enabled on single-step commands (please +note that gdb issues single-step commands at times other than when you +use the si command). + +Another change is to collect some useful information in +a global structure called "kgdb_info". You should be able to just: + +p kgdb_info + +although I have seen cases where the first time this is done gdb just +prints the first member but prints the whole structure if you then enter +CR (carriage return or enter). This also works: + +p *&kgdb_info + +Here is a sample: +(gdb) p kgdb_info +$4 = {called_from = 0xc010732c, entry_tsc = 32804123790856, errcode = 0, + vector = 3, print_debug_info = 0} + +"Called_from" is the return address from the current entry into kgdb. +Sometimes it is useful to know why you are in kgdb, for example, was +it an NMI or a real breakpoint? The simple way to interrogate this +return address is: + +l *0xc010732c + +which will print the surrounding few lines of source code. + +"Entry_tsc" is the CPU TSC on entry to kgdb (useful to compare to the +kgdb_ts entries). + +"errcode" and "vector" are other entry parameters which may be helpful on +some traps. + +"print_debug_info" is the internal debugging kgdb print enable flag. Yes, +you can modify it. + +In SMP systems kgdb_info also includes the "cpus_waiting" structure and +"hold_on_step": + +(gdb) p kgdb_info +$7 = {called_from = 0xc0112739, entry_tsc = 1034936624074, errcode = 0, + vector = 2, print_debug_info = 0, hold_on_sstep = 1, cpus_waiting = {{ + task = 0x0, hold = 0, regs = 0x0}, {task = 0xc71b8000, hold = 0, + regs = 0xc71b9f70}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}, {task = 0x0, hold = 0, regs = 0x0}, {task = 0x0, + hold = 0, regs = 0x0}}} + +"Cpus_waiting" has an entry for each CPU other than the current one that +has been stopped. Each entry contains the task_struct address for that +CPU, the address of the regs for that task and a hold flag. All these +have the proper typing so that, for example: + +p *kgdb_info.cpus_waiting[1].regs + +will print the registers for CPU 1. + +"Hold_on_sstep" is a new feature with this version and comes up set or +true. What this means is that whenever kgdb is asked to single-step all +other CPUs are held (i.e. not allowed to execute). The flag applies to +all but the current CPU and, again, can be changed: + +p kgdb_info.hold_on_sstep=0 + +restores the old behavior of letting all CPUs run during single-stepping. + +Likewise, each CPU has a "hold" flag, which if set, locks that CPU out +of execution. Note that this has some risk in cases where the CPUs need +to communicate with each other. If kgdb finds no CPU available on exit, +it will push a message thru gdb and stay in kgdb. Note that it is legal +to hold the current CPU as long as at least one CPU can execute. + +20010621.1117.09 +This version implements an event queue. Events are signaled by calling +a function in the kgdb stub and may be examined from gdb. See EVENTS +below for details. This version also tightens up the interrupt and SMP +handling to not allow interrupts on the way to kgdb from a breakpoint +trap. It is fine to allow these interrupts for user code, but not +system debugging. + +Version +======= + +This version of the kgdb package was developed and tested on +kernel version 2.4.16. It will not install on any earlier kernels. +It is possible that it will continue to work on later versions +of 2.4 and then versions of 2.5 (I hope). + + +Debugging Setup +=============== + +Designate one machine as the "development" machine. This is the +machine on which you run your compiles and which has your source +code for the kernel. Designate a second machine as the "target" +machine. This is the machine that will run your experimental +kernel. + +The two machines will be connected together via a serial line out +one or the other of the COM ports of the PC. You will need the +appropriate modem eliminator (null modem) cable(s) for this. + +Decide on which tty port you want the machines to communicate, then +connect them up back-to-back using the null modem cable. COM1 is +/dev/ttyS0 and COM2 is /dev/ttyS1. You should test this connection +with the two machines prior to trying to debug a kernel. Once you +have it working, on the TARGET machine, enter: + +setserial /dev/ttyS0 (or what ever tty you are using) + +and record the port address and the IRQ number. + +On the DEVELOPMENT machine you need to apply the patch for the kgdb +hooks. You have probably already done that if you are reading this +file. + +On your DEVELOPMENT machine, go to your kernel source directory and do +"make Xconfig" where X is one of "x", "menu", or "". If you are +configuring in the standard serial driver, it must not be a module. +Either yes or no is ok, but making the serial driver a module means it +will initialize after kgdb has set up the UART interrupt code and may +cause a failure of the control-C option discussed below. The configure +question for the serial driver is under the "Character devices" heading +and is: + +"Standard/generic (8250/16550 and compatible UARTs) serial support" + +Go down to the kernel debugging menu item and open it up. Enable the +kernel kgdb stub code by selecting that item. You can also choose to +turn on the "-ggdb -O1" compile options. The -ggdb causes the compiler +to put more debug info (like local symbols) in the object file. On the +i386 -g and -ggdb are the same so this option just reduces to "O1". The +-O1 reduces the optimization level. This may be helpful in some cases, +be aware, however, that this may also mask the problem you are looking +for. + +The baud rate. Default is 115200. What ever you choose be sure that +the host machine is set to the same speed. I recommend the default. + +The port. This is the I/O address of the serial UART that you should +have gotten using setserial as described above. The standard COM1 port +(3f8) using IRQ 4 is default. COM2 is 2f8 which by convention uses IRQ +3. + +The port IRQ (see above). + +Stack overflow test. This option makes a minor change in the trap, +system call and interrupt code to detect stack overflow and transfer +control to kgdb if it happens. (Some platforms have this in the +baseline code, but the i386 does not.) + +You can also configure the system to recognize the boot option +"console=kgdb" which if given will cause all console output during +booting to be put thru gdb as well as other consoles. This option +requires that gdb and kgdb be connected prior to sending console output +so, if they are not, a breakpoint is executed to force the connection. +This will happen before any kernel output (it is going thru gdb, right), +and will stall the boot until the connection is made. + +You can also configure in a patch to SysRq to enable the kGdb SysRq. +This request generates a breakpoint. Since the serial port IRQ line is +set up after any serial drivers, it is possible that this command will +work when the control-C will not. + +Save and exit the Xconfig program. Then do "make clean" , "make dep" +and "make bzImage" (or whatever target you want to make). This gets the +kernel compiled with the "-g" option set -- necessary for debugging. + +You have just built the kernel on your DEVELOPMENT machine that you +intend to run on your TARGET machine. + +To install this new kernel, use the following installation procedure. +Remember, you are on the DEVELOPMENT machine patching the kernel source +for the kernel that you intend to run on the TARGET machine. + +Copy this kernel to your target machine using your usual procedures. I +usually arrange to copy development: +/usr/src/linux/arch/i386/boot/bzImage to /vmlinuz on the TARGET machine +via a LAN based NFS access. That is, I run the cp command on the target +and copy from the development machine via the LAN. Run Lilo (see "man +lilo" for details on how to set this up) on the new kernel on the target +machine so that it will boot! Then boot the kernel on the target +machine. + +On the DEVELOPMENT machine, create a file called .gdbinit in the +directory /usr/src/linux. An example .gdbinit file looks like this: + +shell echo -e "\003" >/dev/ttyS0 +set remotebaud 38400 (or what ever speed you have chosen) +target remote /dev/ttyS0 + + +Change the "echo" and "target" definition so that it specifies the tty +port that you intend to use. Change the "remotebaud" definition to +match the data rate that you are going to use for the com line. + +You are now ready to try it out. + +Boot your target machine with "kgdb" in the boot command i.e. something +like: + +lilo> test kgdb + +or if you also want console output thru gdb: + +lilo> test kgdb console=kgdb + +You should see the lilo message saying it has loaded the kernel and then +all output stops. The kgdb stub is trying to connect with gdb. Start +gdb something like this: + + +On your DEVELOPMENT machine, cd /usr/src/linux and enter "gdb vmlinux". +When gdb gets the symbols loaded it will read your .gdbinit file and, if +everything is working correctly, you should see gdb print out a few +lines indicating that a breakpoint has been taken. It will actually +show a line of code in the target kernel inside the kgdb activation +code. + +The gdb interaction should look something like this: + + linux-dev:/usr/src/linux# gdb vmlinux + GDB is free software and you are welcome to distribute copies of it + under certain conditions; type "show copying" to see the conditions. + There is absolutely no warranty for GDB; type "show warranty" for details. + GDB 4.15.1 (i486-slackware-linux), + Copyright 1995 Free Software Foundation, Inc... + breakpoint () at i386-stub.c:750 + 750 } + (gdb) + +You can now use whatever gdb commands you like to set breakpoints. +Enter "continue" to start your target machine executing again. At this +point the target system will run at full speed until it encounters +your breakpoint or gets a segment violation in the kernel, or whatever. + +If you have the kgdb console enabled when you continue, gdb will print +out all the console messages. + +The above example caused a breakpoint relatively early in the boot +process. For the i386 kgdb it is possible to code a break instruction +as the first C-language point in init/main.c, i.e. as the first instruction +in start_kernel(). This could be done as follows: + +#include + breakpoint(); + +This breakpoint() is really a function that sets up the breakpoint and +single-step hardware trap cells and then executes a breakpoint. Any +early hard coded breakpoint will need to use this function. Once the +trap cells are set up they need not be set again, but doing it again +does not hurt anything, so you don't need to be concerned about which +breakpoint is hit first. Once the trap cells are set up (and the kernel +sets them up in due course even if breakpoint() is never called) the +macro: + +BREAKPOINT; + +will generate an inline breakpoint. This may be more useful as it stops +the processor at the instruction instead of in a function a step removed +from the location of interest. In either case must be +included to define both breakpoint() and BREAKPOINT. + +Triggering kgdbstub at other times +================================== + +Often you don't need to enter the debugger until much later in the boot +or even after the machine has been running for some time. Once the +kernel is booted and interrupts are on, you can force the system to +enter the debugger by sending a control-C to the debug port. This is +what the first line of the recommended .gdbinit file does. This allows +you to start gdb any time after the system is up as well as when the +system is already at a breakpoint. (In the case where the system is +already at a breakpoint the control-C is not needed, however, it will +be ignored by the target so no harm is done. Also note the the echo +command assumes that the port speed is already set. This will be true +once gdb has connected, but it is best to set the port speed before you +run gdb.) + +Another simple way to do this is to put the following file in you ~/bin +directory: + +#!/bin/bash +echo -e "\003" > /dev/ttyS0 + +Here, the ttyS0 should be replaced with what ever port you are using. +The "\003" is control-C. Once you are connected with gdb, you can enter +control-C at the command prompt. + +An alternative way to get control to the debugger is to enable the kGdb +SysRq command. Then you would enter Alt-SysRq-g (all three keys at the +same time, but push them down in the order given). To refresh your +memory of the available SysRq commands try Alt-SysRq-=. Actually any +undefined command could replace the "=", but I like to KNOW that what I +am pushing will never be defined. + +Debugging hints +=============== + +You can break into the target machine at any time from the development +machine by typing ^C (see above paragraph). If the target machine has +interrupts enabled this will stop it in the kernel and enter the +debugger. + +There is unfortunately no way of breaking into the kernel if it is +in a loop with interrupts disabled, so if this happens to you then +you need to place exploratory breakpoints or printk's into the kernel +to find out where it is looping. The exploratory breakpoints can be +entered either thru gdb or hard coded into the source. This is very +handy if you do something like: + +if () BREAKPOINT; + + +There is a copy of an e-mail in the Documentation/i386/kgdb/ directory +(debug-nmi.txt) which describes how to create an NMI on an ISA bus +machine using a paper clip. I have a sophisticated version of this made +by wiring a push button switch into a PC104/ISA bus adapter card. The +adapter card nicely furnishes wire wrap pins for all the ISA bus +signals. + +When you are done debugging the kernel on the target machine it is a +good idea to leave it in a running state. This makes reboots faster, +bypassing the fsck. So do a gdb "continue" as the last gdb command if +this is possible. To terminate gdb itself on the development machine +and leave the target machine running, first clear all breakpoints and +continue, then type ^Z to suspend gdb and then kill it with "kill %1" or +something similar. + +If gdbstub Does Not Work +======================== + +If it doesn't work, you will have to troubleshoot it. Do the easy +things first like double checking your cabling and data rates. You +might try some non-kernel based programs to see if the back-to-back +connection works properly. Just something simple like cat /etc/hosts +>/dev/ttyS0 on one machine and cat /dev/ttyS0 on the other will tell you +if you can send data from one machine to the other. Make sure it works +in both directions. There is no point in tearing out your hair in the +kernel if the line doesn't work. + +All of the real action takes place in the file +/usr/src/linux/arch/i386/kernel/kgdb_stub.c. That is the code on the target +machine that interacts with gdb on the development machine. In gdb you can +turn on a debug switch with the following command: + + set remotedebug + +This will print out the protocol messages that gdb is exchanging with +the target machine. + +Another place to look is /usr/src/arch/i386/lib/kgdb_serial.c. This is +the code that talks to the serial port on the target side. There might +be a problem there. In particular there is a section of this code that +tests the UART which will tell you what UART you have if you define +"PRNT" (just remove "_off" from the #define PRNT_off). To view this +report you will need to boot the system without any beakpoints. This +allows the kernel to run to the point where it calls kgdb to set up +interrupts. At this time kgdb will test the UART and print out the type +it finds. (You need to wait so that the printks are actually being +printed. Early in the boot they are cached, waiting for the console to +be enabled. Also, if kgdb is entered thru a breakpoint it is possible +to cause a dead lock by calling printk when the console is locked. The +stub thus avoids doing printks from breakpoints, especially in the +serial code.) At this time, if the UART fails to do the expected thing, +kgdb will print out (using printk) information on what failed. (These +messages will be buried in all the other boot up messages. Look for +lines that start with "gdb_hook_interrupt:". You may want to use dmesg +once the system is up to view the log. If this fails or if you still +don't connect, review your answers for the port address. Use: + +setserial /dev/ttyS0 + +to get the current port and IRQ information. This command will also +tell you what the system found for the UART type. The stub recognizes +the following UART types: + +16450, 16550, and 16550A + +If you are really desperate you can use printk debugging in the +kgdbstub code in the target kernel until you get it working. In particular, +there is a global variable in /usr/src/linux/arch/i386/kernel/kgdb_stub.c +named "remote_debug". Compile your kernel with this set to 1, rather +than 0 and the debug stub will print out lots of stuff as it does +what it does. Likewise there are debug printks in the kgdb_serial.c +code that can be turned on with simple changes in the macro defines. + + +Debugging Loadable Modules +========================== + +This technique comes courtesy of Edouard Parmelan + + +When you run gdb, enter the command + +source gdbinit-modules + +This will read in a file of gdb macros that was installed in your +kernel source directory when kgdb was installed. This file implements +the following commands: + +mod-list + Lists the loaded modules in the form + +mod-print-symbols + Prints all the symbols in the indicated module. + +mod-add-symbols + Loads the symbols from the object file and associates them + with the indicated module. + +After you have loaded the module that you want to debug, use the command +mod-list to find the of your module. Then use that +address in the mod-add-symbols command to load your module's symbols. +From that point onward you can debug your module as if it were a part +of the kernel. + +The file gdbinit-modules also contains a command named mod-add-lis as +an example of how to construct a command of your own to load your +favorite module. The idea is to "can" the pathname of the module +in the command so you don't have to type so much. + +Threads +======= + +Each process in a target machine is seen as a gdb thread. gdb thread +related commands (info threads, thread n) can be used. + +ia-32 hardware breakpoints +========================== + +kgdb stub contains support for hardware breakpoints using debugging features +of ia-32(x86) processors. These breakpoints do not need code modification. +They use debugging registers. 4 hardware breakpoints are available in ia-32 +processors. + +Each hardware breakpoint can be of one of the following three types. + +1. Execution breakpoint - An Execution breakpoint is triggered when code + at the breakpoint address is executed. + + As limited number of hardware breakpoints are available, it is + advisable to use software breakpoints ( break command ) instead + of execution hardware breakpoints, unless modification of code + is to be avoided. + +2. Write breakpoint - A write breakpoint is triggered when memory + location at the breakpoint address is written. + + A write or can be placed for data of variable length. Length of + a write breakpoint indicates length of the datatype to be + watched. Length is 1 for 1 byte data , 2 for 2 byte data, 3 for + 4 byte data. + +3. Access breakpoint - An access breakpoint is triggered when memory + location at the breakpoint address is either read or written. + + Access breakpoints also have lengths similar to write breakpoints. + +IO breakpoints in ia-32 are not supported. + +Since gdb stub at present does not use the protocol used by gdb for hardware +breakpoints, hardware breakpoints are accessed through gdb macros. gdb macros +for hardware breakpoints are described below. + +hwebrk - Places an execution breakpoint + hwebrk breakpointno address +hwwbrk - Places a write breakpoint + hwwbrk breakpointno length address +hwabrk - Places an access breakpoint + hwabrk breakpointno length address +hwrmbrk - Removes a breakpoint + hwrmbrk breakpointno +exinfo - Tells whether a software or hardware breakpoint has occurred. + Prints number of the hardware breakpoint if a hardware breakpoint has + occurred. + +Arguments required by these commands are as follows +breakpointno - 0 to 3 +length - 1 to 3 +address - Memory location in hex digits ( without 0x ) e.g c015e9bc + +SMP support +========== + +When a breakpoint occurs or user issues a break ( Ctrl + C ) to gdb +client, all the processors are forced to enter the debugger. Current +thread corresponds to the thread running on the processor where +breakpoint occurred. Threads running on other processor(s) appear +similar to other non-running threads in the 'info threads' output. +Within the kgdb stub there is a structure "waiting_cpus" in which kgdb +records the values of "current" and "regs" for each CPU other than the +one that hit the breakpoint. "current" is a pointer to the task +structure for the task that CPU is running, while "regs" points to the +saved registers for the task. This structure can be examined with the +gdb "p" command. + +ia-32 hardware debugging registers on all processors are set to same +values. Hence any hardware breakpoints may occur on any processor. + +gdb troubleshooting +=================== + +1. gdb hangs +Kill it. restart gdb. Connect to target machine. + +2. gdb cannot connect to target machine (after killing a gdb and +restarting another) If the target machine was not inside debugger when +you killed gdb, gdb cannot connect because the target machine won't +respond. In this case echo "Ctrl+C"(ASCII 3) to the serial line. +e.g. echo -e "\003" > /dev/ttyS1 +This forces that target machine into the debugger, after which you +can connect. + +3. gdb cannot connect even after echoing Ctrl+C into serial line +Try changing serial line settings min to 1 and time to 0 +e.g. stty min 1 time 0 < /dev/ttyS1 +Try echoing again + +Check serial line speed and set it to correct value if required +e.g. stty ispeed 115200 ospeed 115200 < /dev/ttyS1 + +EVENTS +====== + +Ever want to know the order of things happening? Which CPU did what and +when? How did the spinlock get the way it is? Then events are for +you. Events are defined by calls to an event collection interface and +saved for later examination. In this case, kgdb events are saved by a +very fast bit of code in kgdb which is fully SMP and interrupt protected +and they are examined by using gdb to display them. Kgdb keeps only +the last N events, where N must be a power of two and is defined at +configure time. + + +Events are signaled to kgdb by calling: + +kgdb_ts(data0,data1) + +For each call kgdb records each call in an array along with other info. +Here is the array definition: + +struct kgdb_and_then_struct { +#ifdef CONFIG_SMP + int on_cpu; +#endif + long long at_time; + int from_ln; + char * in_src; + void *from; + int with_if; + int data0; + int data1; +}; + +For SMP machines the CPU is recorded, for all machines the TSC is +recorded (gets a time stamp) as well as the line number and source file +the call was made from. The address of the (from), the "if" (interrupt +flag) and the two data items are also recorded. The macro kgdb_ts casts +the types to int, so you can put any 32-bit values here. There is a +configure option to select the number of events you want to keep. A +nice number might be 128, but you can keep up to 1024 if you want. The +number must be a power of two. An "andthen" macro library is provided +for gdb to help you look at these events. It is also possible to define +a different structure for the event storage and cast the data to this +structure. For example the following structure is defined in kgdb: + +struct kgdb_and_then_struct2 { +#ifdef CONFIG_SMP + int on_cpu; +#endif + long long at_time; + int from_ln; + char * in_src; + void *from; + int with_if; + struct task_struct *t1; + struct task_struct *t2; +}; + +If you use this for display, the data elements will be displayed as +pointers to task_struct entries. You may want to define your own +structure to use in casting. You should only change the last two items +and you must keep the structure size the same. Kgdb will handle these +as 32-bit ints, but within that constraint you can define a structure to +cast to any 32-bit quantity. This need only be available to gdb and is +only used for casting in the display code. + +Final Items +=========== + +I picked up this code from Amit S. Kale and enhanced it. + +If you make some really cool modification to this stuff, or if you +fix a bug, please let me know. + +George Anzinger + + +Amit S. Kale + + +(First kgdb by David Grothe ) + +(modified by Tigran Aivazian ) + Putting gdbstub into the kernel config menu. + +(modified by Scott Foehner ) + Hooks for entering gdbstub at boot time. + +(modified by Amit S. Kale ) + Threads, ia-32 hw debugging, mp support, console support, + nmi watchdog handling. + +(modified by George Anzinger ) + Extended threads to include the idle threads. + Enhancements to allow breakpoint() at first C code. + Use of module_init() and __setup() to automate the configure. + Enhanced the cpu "collection" code to work in early bring-up. + Added ability to call functions from gdb + Print info thread stuff without going back to schedule() + Now collect the "other" cpus with an IPI/ NMI. --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/i386/kgdb/loadmodule.sh 2004-04-18 22:25:30.334218976 -0700 @@ -0,0 +1,78 @@ +#/bin/sh +# This script loads a module on a target machine and generates a gdb script. +# source generated gdb script to load the module file at appropriate addresses +# in gdb. +# +# Usage: +# Loading the module on target machine and generating gdb script) +# [foo]$ loadmodule.sh +# +# Loading the module file into gdb +# (gdb) source +# +# Modify following variables according to your setup. +# TESTMACHINE - Name of the target machine +# GDBSCRIPTS - The directory where a gdb script will be generated +# +# Author: Amit S. Kale (akale@veritas.com). +# +# If you run into problems, please check files pointed to by following +# variables. +# ERRFILE - /tmp/.errs contains stderr output of insmod +# MAPFILE - /tmp/.map contains stdout output of insmod +# GDBSCRIPT - $GDBSCRIPTS/load gdb script. + +TESTMACHINE=foo +GDBSCRIPTS=/home/bar + +if [ $# -lt 1 ] ; then { + echo Usage: $0 modulefile + exit +} ; fi + +MODULEFILE=$1 +MODULEFILEBASENAME=`basename $1` + +if [ $MODULEFILE = $MODULEFILEBASENAME ] ; then { + MODULEFILE=`pwd`/$MODULEFILE +} fi + +ERRFILE=/tmp/$MODULEFILEBASENAME.errs +MAPFILE=/tmp/$MODULEFILEBASENAME.map +GDBSCRIPT=$GDBSCRIPTS/load$MODULEFILEBASENAME + +function findaddr() { + local ADDR=0x$(echo "$SEGMENTS" | \ + grep "$1" | sed 's/^[^ ]*[ ]*[^ ]*[ ]*//' | \ + sed 's/[ ]*[^ ]*$//') + echo $ADDR +} + +function checkerrs() { + if [ "`cat $ERRFILE`" != "" ] ; then { + cat $ERRFILE + exit + } fi +} + +#load the module +echo Copying $MODULEFILE to $TESTMACHINE +rcp $MODULEFILE root@${TESTMACHINE}: + +echo Loading module $MODULEFILE +rsh -l root $TESTMACHINE /sbin/insmod -m ./`basename $MODULEFILE` \ + > $MAPFILE 2> $ERRFILE +checkerrs + +SEGMENTS=`head -n 11 $MAPFILE | tail -n 10` +TEXTADDR=$(findaddr "\\.text[^.]") +LOADSTRING="add-symbol-file $MODULEFILE $TEXTADDR" +SEGADDRS=`echo "$SEGMENTS" | awk '//{ + if ($1 != ".text" && $1 != ".this" && + $1 != ".kstrtab" && $1 != ".kmodtab") { + print " -s " $1 " 0x" $3 " " + } +}'` +LOADSTRING="$LOADSTRING $SEGADDRS" +echo Generating script $GDBSCRIPT +echo $LOADSTRING > $GDBSCRIPT --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/must-fix.txt 2004-04-18 22:25:34.330611432 -0700 @@ -0,0 +1,288 @@ + +Must-fix bugs +============= + +drivers/char/ +~~~~~~~~~~~~~ + +o TTY locking is broken. + + o see FIXME in do_tty_hangup(). This causes ppp BUGs in local_bh_enable() + + o Other problems: aviro, dipankar, Alan have details. + + o somebody will have to document the tty driver and ldisc API + +drivers/tty +~~~~~~~~~~~ + +o viro: tty_driver refcounting, tty/misc/upper levels of sound still not + completely fixed. + +drivers/block/ +~~~~~~~~~~~~~~ + +o loop.c: Concurrent write access on block devices might cause a deadlock + of the complete system. See: + http://marc.theaimsgroup.com/?l=linux-kernel&m=106275365925769&w== + http://bugzilla.kernel.org/show_bug.cgi?id=1198 + Thread of possible fix: + http://www.kerneli.org/pipermail/cryptoapi-devel/2003-October/000676.html + + (Fruhwirth Clemens) + +o ideraid hasn't been ported to 2.5 at all yet. + + We need to understand whether the proposed BIO split code will suffice + for this. + +drivers/input/ +~~~~~~~~~~~~~~ + +o rmk: unconverted keyboard/mouse drivers (there's a deadline of 2.6.0 + currently on these remaining in my/Linus' tree.) + +o viro: large absence of locking. + +o viro: parport is nearly as bad as that and there the code is more hairy. + IMO parport is more of "figure out what API changes are needed for its + users, get them done ASAP, then fix generic layer at leisure" + +o (Albert Cahalan) Lots of people (check Google) get this message from the + kernel: + + psmouse.c: Lost synchronization, throwing 2 bytes away. + + (the number of bytes will be 1, 2, or 3) + + At work, I get it when there is heavy NFS traffic. The mouse goes crazy, + jumping around and doing random cut-and-paste all over everything. This + is with a decently fast and modern PC. + +o There seem to be too many reports of keyboards and mice failing or acting + strangely. + + +drivers/misc/ +~~~~~~~~~~~~~ + +o rmk: UCB1[23]00 drivers, currently sitting in drivers/misc in the ARM + tree. (touchscreen, audio, gpio, type device.) + + These need to be moved out of drivers/misc/ and into real places + +o viro: actually, misc.c has a good chance to die. With cdev-cidr that's + trivial. + +drivers/net/ +~~~~~~~~~~~~ + +drivers/net/irda/ +~~~~~~~~~~~~~~~~~ + + (Jean Tourrilhes) + +o irport need to be converted to sir-kthread + +o dongle drivers need to be converted to sir-dev (in progress) + +o new drivers (irtty-sir/smsc-ircc2/donauboe) need more testing (in progress) + + +drivers/pci/ +~~~~~~~~~~~~ + +o alan: Some cardbus crashes the system + + (bugzilla, please?) + +drivers/pcmcia/ +~~~~~~~~~~~~~~~ + +o alan: This is a locking disaster. + + (rmk, brodo: in progress) + +drivers/pld/ +~~~~~~~~~~~~ + +o rmk: EPXA (ARM platform) PLD hotswap drivers (drivers/pld) + + (rmk: will work out what to do here. maybe drivers/arm/) + +drivers/video/ +~~~~~~~~~~~~~~ + +o Lots of drivers don't compile, others do but don't work. + +drivers/scsi/ +~~~~~~~~~~~~~ + +o Convert am53c974, dpt_i2o, initio and pci2220i to DMA-mapping + +o Make inia100, cpqfc, pci2000 and dc390t compile + +o Convert + + wd33c99 based: a2091 a3000 gpv11 mvme174 sgiwd93 + + 53c7xx based: amiga7xxx bvme6000 mvme16x initio am53c974 pci2000 + pci2220i dc390t + + To new error handling + + It also might be possible to shift the 53c7xx based drivers over to + 53c700 which does the new EH stuff, but I don't have the hardware to check + such a shift. + + For the non-compiling stuff, I've probably missed a few that just aren't + compilable on my platforms, so any updates would be welcome. Also, are + some of our non-compiling or unconverted drivers obsolete? + +fs/ +~~~ + +o AIO/direct-IO writes can race with truncate and wreck filesystems. + (Badari has a patch) + +o viro: fs/char_dev.c needs removal of aeb stuff and merge of cdev-cidr. + In progress. + +o forward-port sct's O_DIRECT fixes (Badari has a patch) + +o viro: there is some generic stuff for namei/namespace/super, but that's a + slow-merge and can go in 2.6 just fine + +o trond: NFS has a mmap-versus-truncate problem (fixed? needs testing) + +o trond: NFSv4 client, bugs in lockd, RPSEC_GSS for NFSv[23], some atomic open + bits. more info: http://www.fys.uio.no/~trondmy/src/Linux-2.6.x/2.6.0-test11/ + +kernel/sched.c +~~~~~~~~~~~~~~ + +o Starvation, general interactivity need close monitoring. + +o SMT aware scheduler (Ingo, Rusty, Nick have implementations) + +kernel/ +~~~~~~~ + +o Alan: 32bit uid support is *still* broken for process accounting. + + Create a 32bit uid, turn accounting on. Shock horror it doesn't work + because the field is 16bit. We need an acct structure flag day for 2.6 + IMHO + + (alan has patch) + +o viro: core sysctl code is racy. And its interaction wiuth sysfs + +o (ingo) rwsems (on x86) are limited to 32766 waiting processes. This + means that setting pid_max to above 32K is unsafe :-( + + An option is to use CONFIG_RWSEM_GENERIC_SPINLOCK variant all the time, + for all archs, and not inline any part of the ops. + +lib/kobject.c +~~~~~~~~~~~~~ + +o kobject refcounting (comments from Al Viro): + + _anything_ can grab a temporary reference to kobject. IOW, if kobject is + embedded into something that could be freed - it _MUST_ have a destructor + and that destructor _MUST_ be the destructor for containing object. + + Any violation of the above (and we already have a bunch of those) is a + user-triggerable memory corruption. + + We can tolerate it for a while in 2.5 (e.g. during work on susbsystem we + can decide to switch to that way of handling objects and have subsystem + vulnerable for a while), but all such windows must be closed before 2.6 + and during 2.6 we can't open them at all. + +o All block drivers which control multiple gendisks with a single + request_queue are broken, due to one-to-one assumptions in the request + queue sysfs hookup. + +mm/ +~~~ + +o GFP_DMA32 (or something like that). Lots of ideas. jejb, zaitcev, + willy, arjan, wli. + + Specifically, 64-bit systems need to be able to enforce 32-bit addressing + limits for device metadata like network cards' ring buffers and SCSI + command descriptors. + +o access_process_vm() doesn't flush right. We probably need new flushing + primitives to do this (davem?) + + +modules +~~~~~~~ + + (Rusty) + +net/ +~~~~ + + (davem) + +o UDP apps can in theory deadlock, because the ip_append_data path can end + up sleeping while the socket lock is held. + + It is OK to sleep with the socket held held, normally. But in this case + the sleep happens while waiting for socket memory/space to become + available, if another context needs to take the socket lock to free up the + space we could hang. + + I sent a rough patch on how to fix this to Alexey, and he is analyzing + the situation. I expect a final fix from him next week or so. + +o Semantics for IPSEC during operations such as TCP connect suck currently. + + When we first try to connect to a destination, we may need to ask the + IPSEC key management daemon to resolve the IPSEC routes for us. For the + purposes of what the kernel needs to do, you can think of it like ARP. We + can't send the packet out properly until we resolve the path. + + What happens now for IPSEC is basically this: + + O_NONBLOCK: returns -EAGAIN over and over until route is resolved + + !O_NONBLOCK: Sleeps until route is resolved + + These semantics are total crap. The solution, which Alexey is working + on, is to allow incomplete routes to exist. These "incomplete" routes + merely put the packet onto a "resolution queue", and once the key manager + does it's thing we finish the output of the packet. This is precisely how + ARP works. + + I don't know when Alexey will be done with this. + +net/*/netfilter/ +~~~~~~~~~~~~~~~~ + + (Rusty) + +sound/ +~~~~~~ + +global +~~~~~~ + +o viro: 64-bit dev_t (not a mustfix for 2.6.0). 32-bit dev_t is done, 64-bit + means extra work on nfsd/raid/etc. + +o alan: Forward port 2.4 fixes + - Chris Wright: Security fixes including execve holes, execve vs proc races + +o There are about 60 or 70 security related checks that need doing + (copy_user etc) from Stanford tools. (badari is looking into this, and + hollisb) + +o A couple of hundred real looking bugzilla bugs + +o viro: cdev rework. Mostly done. + --- linux-2.6.6-rc1/Documentation/networking/00-INDEX 2003-08-08 22:55:10.000000000 -0700 +++ 25/Documentation/networking/00-INDEX 2004-04-18 22:25:24.628086440 -0700 @@ -111,8 +111,6 @@ tlan.txt - ThunderLAN (Compaq Netelligent 10/100, Olicom OC-2xxx) driver info. tms380tr.txt - SysKonnect Token Ring ISA/PCI adapter driver info. -tulip.txt - - info on using DEC 21040/21041/21140 based PCI Ethernet cards. tuntap.txt - TUN/TAP device driver, allowing user space Rx/Tx of packets. vortex.txt --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/sched-domains.txt 2004-04-18 22:25:37.452136888 -0700 @@ -0,0 +1,55 @@ +Each CPU has a "base" scheduling domain (struct sched_domain). These are +accessed via cpu_sched_domain(i) and this_sched_domain() macros. The domain +hierarchy is built from these base domains via the ->parent pointer. ->parent +MUST be NULL terminated, and domain structures should be per-CPU as they +are locklessly updated. + +Each scheduling domain spans a number of CPUs (stored in the ->span field). +A domain's span MUST be a superset of it child's span, and a base domain +for CPU i MUST span at least i. The top domain for each CPU will generally +span all CPUs in the system although strictly it doesn't have to, but this +could lead to a case where some CPUs will never be given tasks to run unless +the CPUs allowed mask is explicitly set. A sched domain's span means "balance +process load among these CPUs". + +Each scheduling domain must have one or more CPU groups (struct sched_group) +which are organised as a circular one way linked list from the ->groups +pointer. The union of cpumasks of these groups MUST be the same as the +domain's span. The intersection of cpumasks from any two of these groups +MUST be the empty set. The group pointed to by the ->groups pointer MUST +contain the CPU to which the domain belongs. Groups may be shared among +CPUs as they contain read only data after they have been set up. + +Balancing within a sched domain occurs between groups. That is, each group +is treated as one entity. The load of a group is defined as the sum of the +load of each of its member CPUs, and only when the load of a group becomes +out of balance are tasks moved between groups. + +In kernel/sched.c, rebalance_tick is run periodically on each CPU. This +function takes its CPU's base sched domain and checks to see if has reached +its rebalance interval. If so, then it will run load_balance on that domain. +rebalance_tick then checks the parent sched_domain (if it exists), and the +parent of the parent and so forth. + +*** Implementing sched domains *** +The "base" domain will "span" the first level of the hierarchy. In the case +of SMT, you'll span all siblings of the physical CPU, with each group being +a single virtual CPU. + +In SMP, the parent of the base domain will span all physical CPUs in the +node. Each group being a single physical CPU. Then with NUMA, the parent +of the SMP domain will span the entire machine, with each group having the +cpumask of a node. Or, you could do multi-level NUMA or Opteron, for example, +might have just one domain covering its one NUMA level. + +The implementor should read comments in include/linux/sched.h: +struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of +the specifics and what to tune. + +Implementors should change the line +#undef SCHED_DOMAIN_DEBUG +to +#define SCHED_DOMAIN_DEBUG +in kernel/sched.c as this enables an error checking parse of the sched domains +which should catch most possible errors (described above). It also prints out +the domain structure in a visual format. --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/Documentation/should-fix.txt 2004-04-18 22:25:34.331611280 -0700 @@ -0,0 +1,545 @@ +Not-ready features and speedups +=============================== + +Legend: + +PRI1: We're totally lame if this doesn't get in +PRI2: Would be nice +PRI3: Not very important + +drivers/block/ +~~~~~~~~~~~~~~ + +o viro: paride drivers need a big cleanup. Partially done, but ATAPI drivers + need serious work and bug fixing. + + PRI2 + +drivers/char/rtc/ +~~~~~~~~~~~~~~~~~ + +o rmk, trini: add support for alarms to the existing generic rtc driver. + + PRI2 + +console drivers +~~~~~~~~~~~~~~~ + (Pavel Machek ) + +o There are few must-fix bugs in cursor handling. + +o Play with gpm selection for a while and your cursor gets corrupted with + random dots. Ouch. + +device mapper +~~~~~~~~~~~~~ + +o ioctl interface cleanup patch is ready (redo the structure layouts) + + PRI1 + +o A port of the 2.4 snapshot and mirror targets is in progress + + PRI1 + +o the fs interface to dm needs to be redone. gregkh was going to work on + this. viro is interested in seeing work thus-far. + + PRI2 + +drivers/net/wireless/ +~~~~~~~~~~~~~~~~~~~~~ + + (Jean Tourrilhes ) + +o get HostAP driver in the kernel. No consolidation of the 802.11 + management across driver can happen until this one is in (which is probably + 2.7.X material). I think Jouni is mostly ready but didn't find time for + it. + + PRI2 + +o get more wireless drivers into the kernel. The most "integrable" drivers + at this point seem the NWN driver, Pavel's Spectrum driver. + + PRI1 + +drivers/usb/gadget/ +~~~~~~~~~~~~~~~~~~~ + +o rmk: SA11xx USB client/gadget code (David B has been doing some work on + this, and keeps trying to prod me, but unfortunately I haven't had the time + to look at his work, sorry David.) + + PRI3 + +fs/ +~~~ + +o ext3 and ext2 block allocators have serious failure modes - interleaved + allocations. + + PRI3 + +o Integrate Chris Mason's 2.4 reiserfs ordered data and data journaling + patches. They make reiserfs a lot safer. + + Ordered: PRI2 + data journalled: PRI3 + +o viro: convert more filesystems to use lib/parser.c for options. + + PRI2 + +o aio: fs IO isn't async at present. suparna has restart patches, they're + in -mm. Need to get Ben to review/comment. + + PRI1. + +o drepper: various filesystems use ->pid wrongly + + PRI1 + +o hch: devfs: there's a fundamental lookup vs devfsd race that's only + fixable by introducing a lookup vs devfs deadlock. I can't see how this is + fixable without getting rid of the current devfsd design. Mandrake seems + to have a workaround for this so this is at least not triggered so easily, + but that's not what I'd consider a fix.. + + PRI2 + +kernel/ +~~~~~~~ + +o rusty: Zippel's Reference count simplification. Tricky code, but cuts + about 120 lines from module.c. Patch exists, needs stressing. + + PRI3 + +o rusty: Fix module-failed-init races by starting module "disabled". Patch + exists, requires some subsystems (ie. add_partition) to explicitly say + "make module live now". Without patch we are no worse off than 2.4 etc. + + PRI1 + +o Integrate userspace irq balancing daemon. + + PRI2 + +o kexec. Seems to work, was in -mm. + + PRI3 + +o rmk: lib/inflate.c must not use static variables (causes these to be + referenced via GOTOFF relocations in PIC decompressor. We have a PIC + decompressor to avoid having to hard code a per platform zImage link + address into the makefiles.) + + PRI2 + +o klibc merge? + + PRI2 + +mm/ +~~~ + +o dropbehind for large files + + PRI2 + +net/ +~~~~ + + (davem) + +o Real serious use of IPSEC is hampered by lack of MPLS support. MPLS is a + switching technology that works by switching based upon fixed length labels + prepended to packets. Many people use this and IPSEC to implement VPNs + over public networks, it is also used for things like traffic engineering. + + A good reference site is: + + http://www.mplsrc.com/ + + Anyways, an existing (crappy) implementation exists. I've almost + completed a rewrite, I should have something in the tree next week. + + PRI1 + +o Sometimes we generate IP fragments when it truly isn't necessary. + + The way IP fragmentation is specified, each fragment must be modulo 8 + bytes in length. So suppose the device has an MTU that is not 0 modulo 8, + ethernet even classifies in this way. 1500 == (8 * 187) + 4 + + Our IP fragmenting engine can fragment on packets that are sized within + the last modulo 8 bytes of the MTU. This happens in obscure cases, but it + does happen. + + I've proposed a fix to Alexey, whereby very late in the output path we + check the packet, if we fragmented but the data length would fit into the + MTU we unfragment the packet. + + This is low priority, because technically it creates suboptimal behavior + rather than mis-operation. + + PRI1 + +net/*/netfilter/ +~~~~~~~~~~~~~~~~ + +o Lots of misc. cleanups, which are happening slowly. + + PRI2 + +power management +~~~~~~~~~~~~~~~~ + +o Pat and Pavel disagree over swsusp. Need to sort that out. + + PRI2 + +o Frame buffer restore codepaths (that requires some deep PCI magic) + + PRI2 + +o XFree86 hooks + + PRI2 + +o AGP restoration + + PRI2 + +o DRI restoration + + (davej/Alan: not super-critical, can crash laptop on restore. davej + looking into it.) + + PRI2 + +o IDE suspend/resume without races (Ben is looking at this a little) + + PRI2 + +o Pat: There are already CPU device structures; MTRRs should be a + dynamically registered interface of CPUs, which implies there needs + to be some other glue to know that there are MTRRs that need to be + saved/restored. + + PRI1 + +global +~~~~~~ + +o We need a kernel side API for reporting error events to userspace (could + be async to 2.6 itself) + + (Prototype core based on netlink exists) + + PRI2 + +o Kai: Introduce a sane, easy and standard way to build external modules + - make clean and make modules_install are both broken + + PRI2 + +drivers +~~~~~~~ + +o Alan: Cardbus/PCMCIA requires all Russell's stuff is merged to do + multiheader right and so on + + PRI1 + +drivers/acpi/ +~~~~~~~~~~~~~ + +o Fix acpi for all newer IBM Thinkpads see + http://bugme.osdl.org/show_bug.cgi?id=1038 for more information + +o alan: VIA APIC stuff is one bit of this, there are also some other + reports that were caused by ACPI not setting level v edge trigger some + times + + PRI1 + +o mochel: it seems the acpi irq routing code could use a serious rewrite. + + grover: The problem is the ACPI irq routing code is trying to piggyback + on the existing MPS-specific data structures, and it's generally a hack. + So yes mochel is right, but it is also purging MPS-ities from common code + as well. I've done some preliminary work in this area and it doesn't seem + to break anything (yet) but a rewrite in this area imho should not be + rushed out the door. And, I think the above bugs can be fixed w/o the + rewrite. + + PRI2 + +o mochel: ACPI suspend doesn't work. Important, not cricital. Pat is + working it. + + PRI2 + +drivers/block/ +~~~~~~~~~~~~~~ + +o More testing of floppy + + PRI3 + +drivers/char/ +~~~~~~~~~~~~~ + + +drivers/ide/ +~~~~~~~~~~~~ + + (Alan) + +o IDE PIO has occasional unexplained PIO disk eating reports + + PRI1 + +o IDE has multiple zillions of races/hangs in 2.5 still + + PRI1 + +o IDE scsi needs rewriting + + PRI2 + +o IDE needs significant reworking to handle Simplex right + + PRI2 + +o IDE hotplug handling for 2.5 is completely broken still + + PRI2 + +o There are lots of other IDE bugs that wont go away until the taskfile + stuff is included, the locking bugs that allow any user to hang the IDE + layer in 2.5, and some other updates are forward ported. (esp. HPT372N). + + PRI1 + +drivers/isdn/ +~~~~~~~~~~~~~ + + (Kai, rmk) + +o isdn_tty locking is completely broken (cli() and friends) + + PRI2 + +o fix other drivers + + PRI2 + +o lots more cleanups, adaption to recent APIs etc + + PRI3 + +o fixup tty-based ISDN drivers which provide TIOCM* ioctls (see my recent + 3-set patch for serial stuff) + + Alternatively, we could re-introduce the fallback to driver ioctl parsing + for these if not enough drivers get updated. + + PRI3 + +drivers/net/ +~~~~~~~~~~~~ + +o davej: Either Wireless network drivers or PCMCIA broke somewhen. A + configuration that worked fine under 2.4 doesn't receive any packets. Need + to look into this more to make sure I don't have any misconfiguration that + just 'happened to work' under 2.4 + + PRI1 + +drivers/scsi/ +~~~~~~~~~~~~~ + +o jejb: qlogic - + + o Merge the feral driver. It covers all qlogic chips: 1020 all the way + up to 23xxx. http://linux-scsi.bkbits.net/scsi-isp-2.5 + + o qla2xxx: only for FC chips. Has significant build issues. hch + promises to send me a "must fix" list for this. + http://linux-scsi.bkbits.net/scsi-qla2xxx-2.5 + + PRI2 + +o hch, Mike Anderson, Badari Pulavarty: scsi locking issues + + o there are lots of members of struct Scsi_Host/scsi_device/scsi_cmnd + with very unclear locking, many of them probably want to become + atomic_t's or bitmaps (for the 1bit bitfields). + + o there's lots of volatile abuse in the scsi code that needs to be + thought about. + + o there's some global variables incremented without any locks + + PRI2 + +sound/ +~~~~~~ + +o rmk: several OSS drivers for SA11xx-based hardware in need of + ALSA-ification and L3 bus support code for these. + +o rmk: need to complete ALSA-ification of the WaveArtist driver for both + NetWinder and other stuff (there's some fairly fundamental differences in + the way the mixer needs to be handled for the NetWinder.) + + (Issues with forward-porting 2.4 bugfixes.) + (Killing off OSS is 2.7 material) + +PRI2 + +arch/i386/ +~~~~~~~~~~ + +o Also PC9800 merge needs finishing to the point we want for 2.6 (not all). + + PRI3 + +o davej: PAT support (for mtrr exhaustion w/ AGP) + + PRI2 + +o 2.5.x won't boot on some 440GX + + alan: Problem understood now, feasible fix in 2.4/2.4-ac. (440GX has two + IRQ routers, we use the $PIR table with the PIIX, but the 440GX doesnt use + the PIIX for its IRQ routing). Fall back to BIOS for 440GX works and Intel + concurs. + + PRI1 + +o 2.5.x doesn't handle VIA APIC right yet. + + 1. We must write the PCI_INTERRUPT_LINE + + 2. We have quirk handlers that seem to trash it. + + PRI1 + +o ECC driver questions are not yet sorted (DaveJ is working on this) (Dan + Hollis) + + alan: ECC - I have some test bits from Dan's stuff - they need no kernel + core changes for most platforms. That means we can treat it as a random + driver merge. + + PRI3 + +o alan: 2.4 has some fixes for tsc handling bugs. One where some bioses in + SMM mode mess up our toggle on the time high/low or mangle the counter and + one where a few chips need religious use of _p for timer access and we + don't do that. This is forward porting little bits of fixup. + + ACPI HZ stuff we can't trap - a lot of ACPI is implemented as outb's + triggering SMM traps + + PRI1 + +arch/x86_64/ +~~~~~~~~~~~~ + + (Andi) + +o time handling is broken. Need to move up 2.4 time.c code. + + PRI1 + +o NMI watchdog seems to tick too fast + + PRI2 + +o need to coredump 64bit vsyscall code with dwarf2 + + PRI2 + +o move 64bit signal trampolines into vsyscall code and add dwarf2 for it. + (in progress) + + PRI1 + +o describe kernel assembly with dwarf2 annotations for kgdb + + PRI3 + +arch/alpha/ +~~~~~~~~~~~ + +o rth: Ptrace writes are broken. This means we can't (reliably) set + breakpoints or modify variables from gdb. + + PRI1 + +arch/arm/ +~~~~~~~~~ + +o rmk: missing raw keyboard translation tables for all ARM machines. + Haven't even looked into this at all. This could be messy since there + isn't an ARM architecture standard. I'm presently hoping that it won't be + an issue. If it does, I guess we'll see drivers/char/keyboard.c explode. + + PRI2 + +arch/others/ +~~~~~~~~~~~~ + +o SH needs resyncing, as do some other ports. SH64 needs merging. + No impact on mainstream platforms hopefully. + + PRI2 + +arch/s390/ +~~~~~~~~~ + +o A nastly memory management problem causes random crashes. These appear + to be fixed/hidden by the objrmap patch, more investigation is needed. + + PRI1 + +drivers/s390/ +~~~~~~~~~~~~~ + +o Early userspace and 64 bit dev_t will allow the removal of most of + dasd_devmap.c and dasd_genhd.c. + + PRI2 + +o The 3270 console driver needs to be replaced with a working one + (prototype is there, needs to be finished). + + PRI2 + +o Minor interface changes are pending in cio/ when the z990 machines are + out. + + PRI2 + +o Jan Glauber is working on a fix for the timer issues related to running + on virtualized CPUs (wall-clock vs. cpu time). + + PRI1 + +o a block device driver for ramdisks shared among virtual machines + + PRI3 + +o driver for crypto hardware + + PRI3 + +o 'claw' network device driver + + PRI3 + --- linux-2.6.6-rc1/drivers/atm/ambassador.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/drivers/atm/ambassador.c 2004-04-18 22:25:24.672079752 -0700 @@ -329,7 +329,7 @@ static const unsigned long onegigmask = /********** access to adapter **********/ static inline void wr_plain (const amb_dev * dev, size_t addr, u32 data) { - PRINTD (DBG_FLOW|DBG_REGS, "wr: %08x <- %08x", addr, data); + PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x", addr, data); #ifdef AMB_MMIO dev->membase[addr / sizeof(u32)] = data; #else @@ -343,13 +343,13 @@ static inline u32 rd_plain (const amb_de #else u32 data = inl (dev->iobase + addr); #endif - PRINTD (DBG_FLOW|DBG_REGS, "rd: %08x -> %08x", addr, data); + PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x", addr, data); return data; } static inline void wr_mem (const amb_dev * dev, size_t addr, u32 data) { u32 be = cpu_to_be32 (data); - PRINTD (DBG_FLOW|DBG_REGS, "wr: %08x <- %08x b[%08x]", addr, data, be); + PRINTD (DBG_FLOW|DBG_REGS, "wr: %08zx <- %08x b[%08x]", addr, data, be); #ifdef AMB_MMIO dev->membase[addr / sizeof(u32)] = be; #else @@ -364,7 +364,7 @@ static inline u32 rd_mem (const amb_dev u32 be = inl (dev->iobase + addr); #endif u32 data = be32_to_cpu (be); - PRINTD (DBG_FLOW|DBG_REGS, "rd: %08x -> %08x b[%08x]", addr, data, be); + PRINTD (DBG_FLOW|DBG_REGS, "rd: %08zx -> %08x b[%08x]", addr, data, be); return data; } --- linux-2.6.6-rc1/drivers/atm/firestream.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/drivers/atm/firestream.c 2004-04-18 22:25:51.437010864 -0700 @@ -576,7 +576,7 @@ static inline void write_fs (struct fs_d } -static inline u32 read_fs (struct fs_dev *dev, int offset) +static inline u32 read_fs (struct fs_dev *dev, int offset) { return readl (dev->base + offset); } @@ -1380,7 +1380,7 @@ static void __devinit *aligned_kmalloc ( if (alignment <= 0x10) { t = kmalloc (size, flags); - if ((unsigned int)t & (alignment-1)) { + if ((unsigned long)t & (alignment-1)) { printk ("Kmalloc doesn't align things correctly! %p\n", t); kfree (t); return aligned_kmalloc (size, flags, alignment * 4); @@ -1496,7 +1496,7 @@ static void top_off_fp (struct fs_dev *d ne->skb = skb; ne->fp = fp; - qe = (struct FS_BPENTRY *) (read_fs (dev, FP_EA(fp->offset))); + qe = (struct FS_BPENTRY *)(long)(read_fs (dev, FP_EA(fp->offset))); fs_dprintk (FS_DEBUG_QUEUE, "link at %p\n", qe); if (qe) { qe = bus_to_virt ((long) qe); --- linux-2.6.6-rc1/drivers/atm/nicstar.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/atm/nicstar.c 2004-04-18 22:25:24.675079296 -0700 @@ -757,7 +757,7 @@ static int __devinit ns_init_card(int i, for (j = 0; j < NUM_HB; j++) { struct sk_buff *hb; - hb = alloc_skb(NS_HBUFSIZE, GFP_KERNEL); + hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); if (hb == NULL) { printk("nicstar%d: can't allocate %dth of %d huge buffers.\n", @@ -777,7 +777,7 @@ static int __devinit ns_init_card(int i, for (j = 0; j < NUM_LB; j++) { struct sk_buff *lb; - lb = alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); + lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); if (lb == NULL) { printk("nicstar%d: can't allocate %dth of %d large buffers.\n", @@ -813,7 +813,7 @@ static int __devinit ns_init_card(int i, for (j = 0; j < NUM_SB; j++) { struct sk_buff *sb; - sb = alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); + sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); if (sb == NULL) { printk("nicstar%d: can't allocate %dth of %d small buffers.\n", @@ -1315,7 +1315,7 @@ static irqreturn_t ns_irq_handler(int ir card->index); for (i = 0; i < card->sbnr.min; i++) { - sb = alloc_skb(NS_SMSKBSIZE, GFP_ATOMIC); + sb = dev_alloc_skb(NS_SMSKBSIZE); if (sb == NULL) { writel(readl(card->membase + CFG) & ~NS_CFG_EFBIE, card->membase + CFG); @@ -1341,7 +1341,7 @@ static irqreturn_t ns_irq_handler(int ir card->index); for (i = 0; i < card->lbnr.min; i++) { - lb = alloc_skb(NS_LGSKBSIZE, GFP_ATOMIC); + lb = dev_alloc_skb(NS_LGSKBSIZE); if (lb == NULL) { writel(readl(card->membase + CFG) & ~NS_CFG_EFBIE, card->membase + CFG); @@ -2178,7 +2178,7 @@ static void dequeue_rx(ns_dev *card, ns_ cell = skb->data; for (i = ns_rsqe_cellcount(rsqe); i; i--) { - if ((sb = alloc_skb(NS_SMSKBSIZE, GFP_ATOMIC)) == NULL) + if ((sb = dev_alloc_skb(NS_SMSKBSIZE)) == NULL) { printk("nicstar%d: Can't allocate buffers for aal0.\n", card->index); @@ -2410,7 +2410,7 @@ static void dequeue_rx(ns_dev *card, ns_ if (hb == NULL) /* No buffers in the queue */ { - hb = alloc_skb(NS_HBUFSIZE, GFP_ATOMIC); + hb = dev_alloc_skb(NS_HBUFSIZE); if (hb == NULL) { printk("nicstar%d: Out of huge buffers.\n", card->index); @@ -2424,7 +2424,7 @@ static void dequeue_rx(ns_dev *card, ns_ else if (card->hbpool.count < card->hbnr.min) { struct sk_buff *new_hb; - if ((new_hb = alloc_skb(NS_HBUFSIZE, GFP_ATOMIC)) != NULL) + if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) { skb_queue_tail(&card->hbpool.queue, new_hb); card->hbpool.count++; @@ -2435,14 +2435,14 @@ static void dequeue_rx(ns_dev *card, ns_ if (--card->hbpool.count < card->hbnr.min) { struct sk_buff *new_hb; - if ((new_hb = alloc_skb(NS_HBUFSIZE, GFP_ATOMIC)) != NULL) + if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) { skb_queue_tail(&card->hbpool.queue, new_hb); card->hbpool.count++; } if (card->hbpool.count < card->hbnr.min) { - if ((new_hb = alloc_skb(NS_HBUFSIZE, GFP_ATOMIC)) != NULL) + if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) { skb_queue_tail(&card->hbpool.queue, new_hb); card->hbpool.count++; @@ -2524,7 +2524,7 @@ static void ns_sb_destructor(struct sk_b do { - sb = alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); + sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); if (sb == NULL) break; skb_queue_tail(&card->sbpool.queue, sb); @@ -2547,7 +2547,7 @@ static void ns_lb_destructor(struct sk_b do { - lb = alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); + lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); if (lb == NULL) break; skb_queue_tail(&card->lbpool.queue, lb); @@ -2566,7 +2566,7 @@ static void ns_hb_destructor(struct sk_b while (card->hbpool.count < card->hbnr.init) { - hb = alloc_skb(NS_HBUFSIZE, GFP_KERNEL); + hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); if (hb == NULL) break; skb_queue_tail(&card->hbpool.queue, hb); @@ -2638,7 +2638,7 @@ static void dequeue_sm_buf(ns_dev *card, if (card->sbfqc < card->sbnr.init) { struct sk_buff *new_sb; - if ((new_sb = alloc_skb(NS_SMSKBSIZE, GFP_ATOMIC)) != NULL) + if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) { skb_queue_tail(&card->sbpool.queue, new_sb); skb_reserve(new_sb, NS_AAL0_HEADER); @@ -2650,7 +2650,7 @@ static void dequeue_sm_buf(ns_dev *card, #endif /* NS_USE_DESTRUCTORS */ { struct sk_buff *new_sb; - if ((new_sb = alloc_skb(NS_SMSKBSIZE, GFP_ATOMIC)) != NULL) + if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) { skb_queue_tail(&card->sbpool.queue, new_sb); skb_reserve(new_sb, NS_AAL0_HEADER); @@ -2671,7 +2671,7 @@ static void dequeue_lg_buf(ns_dev *card, if (card->lbfqc < card->lbnr.init) { struct sk_buff *new_lb; - if ((new_lb = alloc_skb(NS_LGSKBSIZE, GFP_ATOMIC)) != NULL) + if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) { skb_queue_tail(&card->lbpool.queue, new_lb); skb_reserve(new_lb, NS_SMBUFSIZE); @@ -2683,7 +2683,7 @@ static void dequeue_lg_buf(ns_dev *card, #endif /* NS_USE_DESTRUCTORS */ { struct sk_buff *new_lb; - if ((new_lb = alloc_skb(NS_LGSKBSIZE, GFP_ATOMIC)) != NULL) + if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) { skb_queue_tail(&card->lbpool.queue, new_lb); skb_reserve(new_lb, NS_SMBUFSIZE); @@ -2877,7 +2877,7 @@ static int ns_ioctl(struct atm_dev *dev, { struct sk_buff *sb; - sb = alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); + sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); if (sb == NULL) return -ENOMEM; skb_queue_tail(&card->sbpool.queue, sb); @@ -2891,7 +2891,7 @@ static int ns_ioctl(struct atm_dev *dev, { struct sk_buff *lb; - lb = alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); + lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); if (lb == NULL) return -ENOMEM; skb_queue_tail(&card->lbpool.queue, lb); @@ -2920,7 +2920,7 @@ static int ns_ioctl(struct atm_dev *dev, { struct sk_buff *hb; - hb = alloc_skb(NS_HBUFSIZE, GFP_KERNEL); + hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); if (hb == NULL) return -ENOMEM; ns_grab_int_lock(card, flags); --- linux-2.6.6-rc1/drivers/base/class.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/base/class.c 2004-04-18 22:25:24.676079144 -0700 @@ -155,8 +155,7 @@ static int class_device_dev_link(struct static void class_device_dev_unlink(struct class_device * class_dev) { - if (class_dev->dev) - sysfs_remove_link(&class_dev->kobj, "device"); + sysfs_remove_link(&class_dev->kobj, "device"); } static int class_device_driver_link(struct class_device * class_dev) @@ -169,8 +168,7 @@ static int class_device_driver_link(stru static void class_device_driver_unlink(struct class_device * class_dev) { - if ((class_dev->dev) && (class_dev->dev->driver)) - sysfs_remove_link(&class_dev->kobj, "driver"); + sysfs_remove_link(&class_dev->kobj, "driver"); } --- linux-2.6.6-rc1/drivers/base/Kconfig 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/base/Kconfig 2004-04-18 22:25:24.675079296 -0700 @@ -9,14 +9,14 @@ config FW_LOADER the kernel tree does. config DEBUG_DRIVER - bool "Driver Core verbose debug messages" - depends on DEBUG_KERNEL - help - Say Y here if you want the Driver core to produce a bunch of - debug messages to the system log. Select this if you are having a - problem with the driver core and want to see more of what is - going on. + bool "Driver Core verbose debug messages" + depends on DEBUG_KERNEL + help + Say Y here if you want the Driver core to produce a bunch of + debug messages to the system log. Select this if you are having a + problem with the driver core and want to see more of what is + going on. - If you are unsure about this, say N here. + If you are unsure about this, say N here. endmenu --- linux-2.6.6-rc1/drivers/base/node.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/base/node.c 2004-04-18 22:25:49.539299360 -0700 @@ -30,13 +30,20 @@ static ssize_t node_read_cpumap(struct s static SYSDEV_ATTR(cpumap,S_IRUGO,node_read_cpumap,NULL); +/* Can be overwritten by architecture specific code. */ +int __attribute__((weak)) hugetlb_report_node_meminfo(int node, char *buf) +{ + return 0; +} + #define K(x) ((x) << (PAGE_SHIFT - 10)) static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) { + int n; int nid = dev->id; struct sysinfo i; si_meminfo_node(&i, nid); - return sprintf(buf, "\n" + n = sprintf(buf, "\n" "Node %d MemTotal: %8lu kB\n" "Node %d MemFree: %8lu kB\n" "Node %d MemUsed: %8lu kB\n" @@ -51,10 +58,52 @@ static ssize_t node_read_meminfo(struct nid, K(i.freehigh), nid, K(i.totalram-i.totalhigh), nid, K(i.freeram-i.freehigh)); + n += hugetlb_report_node_meminfo(nid, buf + n); + return n; } + #undef K static SYSDEV_ATTR(meminfo,S_IRUGO,node_read_meminfo,NULL); +static ssize_t node_read_numastat(struct sys_device * dev, char * buf) +{ + unsigned long numa_hit, numa_miss, interleave_hit, numa_foreign; + unsigned long local_node, other_node; + int i, cpu; + pg_data_t *pg = NODE_DATA(dev->id); + numa_hit = 0; + numa_miss = 0; + interleave_hit = 0; + numa_foreign = 0; + local_node = 0; + other_node = 0; + for (i = 0; i < MAX_NR_ZONES; i++) { + struct zone *z = &pg->node_zones[i]; + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct per_cpu_pageset *ps = &z->pageset[cpu]; + numa_hit += ps->numa_hit; + numa_miss += ps->numa_miss; + numa_foreign += ps->numa_foreign; + interleave_hit += ps->interleave_hit; + local_node += ps->local_node; + other_node += ps->other_node; + } + } + return sprintf(buf, + "numa_hit %lu\n" + "numa_miss %lu\n" + "numa_foreign %lu\n" + "interleave_hit %lu\n" + "local_node %lu\n" + "other_node %lu\n", + numa_hit, + numa_miss, + numa_foreign, + interleave_hit, + local_node, + other_node); +} +static SYSDEV_ATTR(numastat,S_IRUGO,node_read_numastat,NULL); /* * register_node - Setup a driverfs device for a node. @@ -74,6 +123,7 @@ int __init register_node(struct node *no if (!error){ sysdev_create_file(&node->sysdev, &attr_cpumap); sysdev_create_file(&node->sysdev, &attr_meminfo); + sysdev_create_file(&node->sysdev, &attr_numastat); } return error; } --- linux-2.6.6-rc1/drivers/block/as-iosched.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/drivers/block/as-iosched.c 2004-04-18 22:25:57.188136560 -0700 @@ -43,7 +43,7 @@ * read_batch_expire describes how long we will allow a stream of reads to * persist before looking to see whether it is time to switch over to writes. */ -#define default_read_batch_expire (HZ / 4) +#define default_read_batch_expire (HZ / 2) /* * write_batch_expire describes how long we want a stream of writes to run for. @@ -51,7 +51,7 @@ * See, the problem is: we can send a lot of writes to disk cache / TCQ in * a short amount of time... */ -#define default_write_batch_expire (HZ / 16) +#define default_write_batch_expire (HZ / 8) /* * max time we may wait to anticipate a read (default around 6ms) --- linux-2.6.6-rc1/drivers/block/cciss.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/block/cciss.c 2004-04-18 22:25:48.166508056 -0700 @@ -988,7 +988,7 @@ static int revalidate_allvol(ctlr_info_t drive_info_struct *drv = &(host->drv[i]); if (!drv->nr_blocks) continue; - blk_queue_hardsect_size(host->queue, drv->block_size); + blk_queue_hardsect_size(drv->queue, drv->block_size); set_capacity(disk, drv->nr_blocks); add_disk(disk); } @@ -2013,7 +2013,7 @@ static irqreturn_t do_cciss_intr(int irq CommandList_struct *c; unsigned long flags; __u32 a, a1; - + int j; /* Is this interrupt for us? */ if (( h->access.intr_pending(h) == 0) || (h->interrupts_enabled == 0)) @@ -2059,11 +2059,18 @@ static irqreturn_t do_cciss_intr(int irq } } } - /* * See if we can queue up some more IO + * check every disk that exists on this controller + * and start it's IO */ - blk_start_queue(h->queue); + for(j=0;j < NWD; j++) { + /* make sure the disk has been added and the drive is real */ + /* because this can be called from the middle of init_one */ + if(!(h->gendisk[j]->queue) || !(h->drv[j].nr_blocks) ) + continue; + blk_start_queue(h->gendisk[j]->queue); + } spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); return IRQ_HANDLED; } @@ -2510,7 +2517,6 @@ static void free_hba(int i) static int __devinit cciss_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - request_queue_t *q; int i; int j; @@ -2568,13 +2574,6 @@ static int __devinit cciss_init_one(stru } spin_lock_init(&hba[i]->lock); - q = blk_init_queue(do_cciss_request, &hba[i]->lock); - if (!q) - goto clean4; - - q->backing_dev_info.ra_pages = READ_AHEAD; - hba[i]->queue = q; - q->queuedata = hba[i]; /* Initialize the pdev driver private data. have it point to hba[i]. */ @@ -2596,6 +2595,19 @@ static int __devinit cciss_init_one(stru cciss_procinit(i); + for(j=0; jdrv[j]); + struct gendisk *disk = hba[i]->gendisk[j]; + request_queue_t *q; + + q = blk_init_queue(do_cciss_request, &hba[i]->lock); + if (!q) { + printk(KERN_ERR + "cciss: unable to allocate queue for disk %d\n", + j); + break; + } + drv->queue = q; blk_queue_bounce_limit(q, hba[i]->pdev->dma_mask); /* This is a hardware imposed limit. */ @@ -2606,21 +2618,17 @@ static int __devinit cciss_init_one(stru blk_queue_max_sectors(q, 512); - - for(j=0; jdrv[j]); - struct gendisk *disk = hba[i]->gendisk[j]; - + q->queuedata = hba[i]; sprintf(disk->disk_name, "cciss/c%dd%d", i, j); sprintf(disk->devfs_name, "cciss/host%d/target%d", i, j); disk->major = COMPAQ_CISS_MAJOR + i; disk->first_minor = j << NWD_SHIFT; disk->fops = &cciss_fops; - disk->queue = hba[i]->queue; + disk->queue = q; disk->private_data = drv; if( !(drv->nr_blocks)) continue; - blk_queue_hardsect_size(hba[i]->queue, drv->block_size); + blk_queue_hardsect_size(q, drv->block_size); set_capacity(disk, drv->nr_blocks); add_disk(disk); } @@ -2690,9 +2698,9 @@ static void __devexit cciss_remove_one ( struct gendisk *disk = hba[i]->gendisk[j]; if (disk->flags & GENHD_FL_UP) del_gendisk(disk); + blk_cleanup_queue(disk->queue); } - blk_cleanup_queue(hba[i]->queue); pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof(CommandList_struct), hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle); pci_free_consistent(hba[i]->pdev, NR_CMDS * sizeof( ErrorInfo_struct), --- linux-2.6.6-rc1/drivers/block/cciss.h 2004-02-17 20:48:42.000000000 -0800 +++ 25/drivers/block/cciss.h 2004-04-18 22:25:48.167507904 -0700 @@ -27,6 +27,7 @@ typedef struct _drive_info_struct { __u32 LunID; int usage_count; + struct request_queue *queue; sector_t nr_blocks; int block_size; int heads; @@ -69,7 +70,6 @@ struct ctlr_info unsigned int maxQsinceinit; unsigned int maxSG; spinlock_t lock; - struct request_queue *queue; //* pointers to command and error info pool */ CommandList_struct *cmd_pool; @@ -252,7 +252,7 @@ struct board_type { struct access_method *access; }; -#define CCISS_LOCK(i) (hba[i]->queue->queue_lock) +#define CCISS_LOCK(i) (&(hba[i]->lock)) #endif /* CCISS_H */ --- linux-2.6.6-rc1/drivers/block/floppy98.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/drivers/block/floppy98.c 2004-04-18 22:25:24.679078688 -0700 @@ -168,8 +168,11 @@ static int print_unex=1; #include #include #include -#define FDPATCHES #include +#include +#include +#include /* for the compatibility eject ioctl */ +#include /* * 1998/1/21 -- Richard Gooch -- devfs support @@ -179,7 +182,6 @@ static int print_unex=1; #include #define FLOPPY98_MOTOR_MASK 0x08 -#define FDPATCHES #include #define FD98_STATUS (0 + FD_IOPORT ) #define FD98_DATA (2 + FD_IOPORT ) @@ -250,9 +252,10 @@ static int use_virtual_dma; */ static spinlock_t floppy_lock = SPIN_LOCK_UNLOCKED; +static struct completion device_release; static unsigned short virtual_dma_port=0x3f0; -void floppy_interrupt(int irq, void *dev_id, struct pt_regs * regs); +irqreturn_t floppy_interrupt(int irq, void *dev_id, struct pt_regs * regs); static int set_mode(char mask, char data); static void register_devfs_entries (int drive) __init; @@ -987,9 +990,9 @@ static void empty(void) static DECLARE_WORK(floppy_work, NULL, NULL); -static void schedule_bh( void (*handler)(void*) ) +static void schedule_bh(void (*handler) (void)) { - PREPARE_WORK(&floppy_work, handler, NULL); + PREPARE_WORK(&floppy_work, (void (*)(void *))handler, NULL); schedule_work(&floppy_work); } @@ -1627,7 +1630,7 @@ static void print_result(char *message, } /* interrupt handler. Note that this can be called externally on the Sparc */ -void floppy_interrupt(int irq, void *dev_id, struct pt_regs * regs) +irqreturn_t floppy_interrupt(int irq, void *dev_id, struct pt_regs * regs) { void (*handler)(void) = do_floppy; int do_print; @@ -1648,7 +1651,7 @@ void floppy_interrupt(int irq, void *dev printk("floppy interrupt on bizarre fdc %d\n",fdc); printk("handler=%p\n", handler); is_alive("bizarre fdc"); - return; + return IRQ_NONE; } FDCS->reset = 0; @@ -1661,7 +1664,7 @@ void floppy_interrupt(int irq, void *dev * activity. */ - do_print = !handler && !initialising; + do_print = !handler && print_unex && !initialising; inr = result(); if (inr && do_print) @@ -1701,13 +1704,16 @@ void floppy_interrupt(int irq, void *dev } while ((ST0 & 0x83) != UNIT(current_drive) && inr == 2); } if (handler) { - schedule_bh( (void *)(void *) handler); + schedule_bh(handler); } else { #if 0 FDCS->reset = 1; #endif } is_alive("normal interrupt end"); + + /* FIXME! Was it really for us? */ + return IRQ_HANDLED; } static void recalibrate_floppy(void) @@ -4231,11 +4237,16 @@ static int __init floppy_setup(char *str static int have_no_fdc= -ENODEV; +static void floppy_device_release(struct device *dev) +{ + complete(&device_release); +} + static struct platform_device floppy_device = { .name = "floppy", .id = 0, .dev = { - .name = "Floppy Drive", + .release = floppy_device_release, }, }; @@ -4267,10 +4278,8 @@ int __init floppy_init(void) } devfs_mk_dir (NULL, "floppy", NULL); - if (register_blkdev(FLOPPY_MAJOR,"fd")) { - err = -EBUSY; + if ((err = register_blkdev(FLOPPY_MAJOR,"fd"))) goto out; - } for (i=0; imajor = FLOPPY_MAJOR; @@ -4288,7 +4297,7 @@ int __init floppy_init(void) else floppy_sizes[i] = MAX_DISK_SIZE << 1; - floppy_queue = blk_init_queue(do_fd_request, &floppy_lock) + floppy_queue = blk_init_queue(do_fd_request, &floppy_lock); if (!floppy_queue) goto out_queue; @@ -4628,10 +4637,14 @@ void cleanup_module(void) { int drive; + init_completion(&device_release); platform_device_unregister(&floppy_device); blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); unregister_blkdev(FLOPPY_MAJOR, "fd"); + for (drive = 0; drive < N_DRIVE; drive++) { + del_timer_sync(&motor_off_timer[drive]); + if ((allowed_drive_mask & (1 << drive)) && fdc_state[FDC(drive)].version != FDC_NONE) { del_gendisk(disks[drive]); @@ -4641,9 +4654,17 @@ void cleanup_module(void) } devfs_remove("floppy"); + del_timer_sync(&fd_timeout); + del_timer_sync(&fd_timer); blk_cleanup_queue(floppy_queue); + + if (usage_count) + floppy_release_irq_and_dma(); + /* eject disk, if any */ fd_eject(0); + + wait_for_completion(&device_release); } MODULE_PARM(floppy,"s"); --- linux-2.6.6-rc1/drivers/block/ll_rw_blk.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/block/ll_rw_blk.c 2004-04-18 22:25:51.121058896 -0700 @@ -1153,6 +1153,7 @@ static inline void __generic_unplug_devi **/ void generic_unplug_device(request_queue_t *q) { + might_sleep(); spin_lock_irq(q->queue_lock); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); @@ -2429,7 +2430,7 @@ EXPORT_SYMBOL(generic_make_request); * interfaces, @bio must be presetup and ready for I/O. * */ -int submit_bio(int rw, struct bio *bio) +void submit_bio(int rw, struct bio *bio) { int count = bio_sectors(bio); @@ -2451,7 +2452,6 @@ int submit_bio(int rw, struct bio *bio) } generic_make_request(bio); - return 1; } EXPORT_SYMBOL(submit_bio); @@ -2733,7 +2733,7 @@ void end_that_request_last(struct reques struct gendisk *disk = req->rq_disk; struct completion *waiting = req->waiting; - if (unlikely(laptop_mode)) + if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); if (disk && blk_fs_request(req)) { --- linux-2.6.6-rc1/drivers/bluetooth/bcm203x.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/bluetooth/bcm203x.c 2004-04-18 22:25:24.683078080 -0700 @@ -74,7 +74,7 @@ struct bcm203x_data { struct timer_list timer; struct urb *urb; - unsigned char buffer[4096]; + unsigned char *buffer; unsigned char *fw_data; unsigned int fw_size; @@ -99,8 +99,7 @@ static void bcm203x_complete(struct urb case BCM203X_LOAD_MINIDRV: memcpy(data->buffer, "#", 1); - usb_fill_bulk_urb(urb, udev, - usb_sndbulkpipe(udev, BCM203X_OUT_EP), + usb_fill_bulk_urb(urb, udev, usb_sndbulkpipe(udev, BCM203X_OUT_EP), data->buffer, 1, bcm203x_complete, data); data->state = BCM203X_SELECT_MEMORY; @@ -109,8 +108,7 @@ static void bcm203x_complete(struct urb break; case BCM203X_SELECT_MEMORY: - usb_fill_int_urb(urb, udev, - usb_rcvintpipe(udev, BCM203X_IN_EP), + usb_fill_int_urb(urb, udev, usb_rcvintpipe(udev, BCM203X_IN_EP), data->buffer, 32, bcm203x_complete, data, 1); data->state = BCM203X_CHECK_MEMORY; @@ -130,20 +128,15 @@ static void bcm203x_complete(struct urb case BCM203X_LOAD_FIRMWARE: if (data->fw_sent == data->fw_size) { - usb_fill_int_urb(urb, udev, - usb_rcvintpipe(udev, BCM203X_IN_EP), - data->buffer, 32, - bcm203x_complete, data, 1); + usb_fill_int_urb(urb, udev, usb_rcvintpipe(udev, BCM203X_IN_EP), + data->buffer, 32, bcm203x_complete, data, 1); data->state = BCM203X_CHECK_FIRMWARE; } else { - len = min_t(uint, data->fw_size - data->fw_sent, - sizeof(data->buffer)); + len = min_t(uint, data->fw_size - data->fw_sent, 4096); - usb_fill_bulk_urb(urb, udev, - usb_sndbulkpipe(udev, BCM203X_OUT_EP), - data->fw_data + data->fw_sent, len, - bcm203x_complete, data); + usb_fill_bulk_urb(urb, udev, usb_sndbulkpipe(udev, BCM203X_OUT_EP), + data->fw_data + data->fw_sent, len, bcm203x_complete, data); data->fw_sent += len; } @@ -177,6 +170,7 @@ static int bcm203x_probe(struct usb_inte const struct firmware *firmware; struct usb_device *udev = interface_to_usbdev(intf); struct bcm203x_data *data; + int size; BT_DBG("intf %p id %p", intf, id); @@ -210,18 +204,20 @@ static int bcm203x_probe(struct usb_inte BT_DBG("minidrv data %p size %d", firmware->data, firmware->size); - if (firmware->size > sizeof(data->buffer)) { - BT_ERR("Mini driver exceeds size of buffer"); + size = max_t(uint, firmware->size, 4096); + + data->buffer = kmalloc(size, GFP_KERNEL); + if (!data->buffer) { + BT_ERR("Can't allocate memory for mini driver"); release_firmware(firmware); usb_free_urb(data->urb); kfree(data); - return -EIO; + return -ENOMEM; } memcpy(data->buffer, firmware->data, firmware->size); - usb_fill_bulk_urb(data->urb, udev, - usb_sndbulkpipe(udev, BCM203X_OUT_EP), + usb_fill_bulk_urb(data->urb, udev, usb_sndbulkpipe(udev, BCM203X_OUT_EP), data->buffer, firmware->size, bcm203x_complete, data); release_firmware(firmware); @@ -229,6 +225,7 @@ static int bcm203x_probe(struct usb_inte if (request_firmware(&firmware, "BCM2033-FW.bin", &udev->dev) < 0) { BT_ERR("Firmware request failed"); usb_free_urb(data->urb); + kfree(data->buffer); kfree(data); return -EIO; } @@ -239,6 +236,7 @@ static int bcm203x_probe(struct usb_inte if (!data->fw_data) { BT_ERR("Can't allocate memory for firmware image"); usb_free_urb(data->urb); + kfree(data->buffer); kfree(data); return -ENOMEM; } @@ -272,6 +270,7 @@ static void bcm203x_disconnect(struct us usb_free_urb(data->urb); kfree(data->fw_data); + kfree(data->buffer); kfree(data); } --- linux-2.6.6-rc1/drivers/bluetooth/bfusb.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/bluetooth/bfusb.c 2004-04-18 22:25:24.684077928 -0700 @@ -98,6 +98,14 @@ struct bfusb_scb { static void bfusb_tx_complete(struct urb *urb, struct pt_regs *regs); static void bfusb_rx_complete(struct urb *urb, struct pt_regs *regs); +static inline void bfusb_wait_for_urb(struct urb *urb) +{ + while (atomic_read(&urb->count) > 1) { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout((5 * HZ + 999) / 1000); + } +} + static struct urb *bfusb_get_completed(struct bfusb *bfusb) { struct sk_buff *skb; @@ -114,7 +122,7 @@ static struct urb *bfusb_get_completed(s return urb; } -static inline void bfusb_unlink_urbs(struct bfusb *bfusb) +static void bfusb_unlink_urbs(struct bfusb *bfusb) { struct sk_buff *skb; struct urb *urb; @@ -124,6 +132,7 @@ static inline void bfusb_unlink_urbs(str while ((skb = skb_dequeue(&bfusb->pending_q))) { urb = ((struct bfusb_scb *) skb->cb)->urb; usb_unlink_urb(urb); + bfusb_wait_for_urb(urb); skb_queue_tail(&bfusb->completed_q, skb); } @@ -359,11 +368,11 @@ static void bfusb_rx_complete(struct urb BT_DBG("bfusb %p urb %p skb %p len %d", bfusb, urb, skb, skb->len); - if (!test_bit(HCI_RUNNING, &bfusb->hdev->flags)) - return; - read_lock(&bfusb->lock); + if (!test_bit(HCI_RUNNING, &bfusb->hdev->flags)) + goto unlock; + if (urb->status || !count) goto resubmit; @@ -414,6 +423,7 @@ resubmit: bfusb->hdev->name, urb, err); } +unlock: read_unlock(&bfusb->lock); } --- linux-2.6.6-rc1/drivers/bluetooth/bluecard_cs.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/bluetooth/bluecard_cs.c 2004-04-18 22:25:24.685077776 -0700 @@ -174,6 +174,9 @@ void bluecard_activity_led_timeout(u_lon bluecard_info_t *info = (bluecard_info_t *)arg; unsigned int iobase = info->link.io.BasePort1; + if (!test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state))) + return; + if (test_bit(CARD_HAS_ACTIVITY_LED, &(info->hw_state))) { /* Disable activity LED */ outb(0x08 | 0x20, iobase + 0x30); @@ -188,6 +191,9 @@ static void bluecard_enable_activity_led { unsigned int iobase = info->link.io.BasePort1; + if (!test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state))) + return; + if (test_bit(CARD_HAS_ACTIVITY_LED, &(info->hw_state))) { /* Enable activity LED */ outb(0x10 | 0x40, iobase + 0x30); @@ -505,13 +511,13 @@ static irqreturn_t bluecard_interrupt(in unsigned int iobase; unsigned char reg; - if (!info) { + if (!info || !info->hdev) { BT_ERR("Call of irq %d for unknown device", irq); return IRQ_NONE; } if (!test_bit(CARD_READY, &(info->hw_state))) - return IRQ_NONE; + return IRQ_HANDLED; iobase = info->link.io.BasePort1; @@ -629,13 +635,16 @@ static int bluecard_hci_open(struct hci_ bluecard_info_t *info = (bluecard_info_t *)(hdev->driver_data); unsigned int iobase = info->link.io.BasePort1; - bluecard_hci_set_baud_rate(hdev, DEFAULT_BAUD_RATE); + if (test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state))) + bluecard_hci_set_baud_rate(hdev, DEFAULT_BAUD_RATE); if (test_and_set_bit(HCI_RUNNING, &(hdev->flags))) return 0; - /* Enable LED */ - outb(0x08 | 0x20, iobase + 0x30); + if (test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state))) { + /* Enable LED */ + outb(0x08 | 0x20, iobase + 0x30); + } return 0; } @@ -651,8 +660,10 @@ static int bluecard_hci_close(struct hci bluecard_hci_flush(hdev); - /* Disable LED */ - outb(0x00, iobase + 0x30); + if (test_bit(CARD_HAS_PCCARD_ID, &(info->hw_state))) { + /* Disable LED */ + outb(0x00, iobase + 0x30); + } return 0; } @@ -725,6 +736,27 @@ int bluecard_open(bluecard_info_t *info) info->rx_count = 0; info->rx_skb = NULL; + /* Initialize HCI device */ + hdev = hci_alloc_dev(); + if (!hdev) { + BT_ERR("Can't allocate HCI device"); + return -ENOMEM; + } + + info->hdev = hdev; + + hdev->type = HCI_PCCARD; + hdev->driver_data = info; + + hdev->open = bluecard_hci_open; + hdev->close = bluecard_hci_close; + hdev->flush = bluecard_hci_flush; + hdev->send = bluecard_hci_send_frame; + hdev->destruct = bluecard_hci_destruct; + hdev->ioctl = bluecard_hci_ioctl; + + hdev->owner = THIS_MODULE; + id = inb(iobase + 0x30); if ((id & 0x0f) == 0x02) @@ -759,6 +791,24 @@ int bluecard_open(bluecard_info_t *info) info->ctrl_reg |= REG_CONTROL_INTERRUPT; outb(info->ctrl_reg, iobase + REG_CONTROL); + if ((id & 0x0f) == 0x03) { + /* Disable RTS */ + info->ctrl_reg |= REG_CONTROL_RTS; + outb(info->ctrl_reg, iobase + REG_CONTROL); + + /* Set baud rate */ + info->ctrl_reg |= 0x03; + outb(info->ctrl_reg, iobase + REG_CONTROL); + + /* Enable RTS */ + info->ctrl_reg &= ~REG_CONTROL_RTS; + outb(info->ctrl_reg, iobase + REG_CONTROL); + + set_bit(XMIT_BUF_ONE_READY, &(info->tx_state)); + set_bit(XMIT_BUF_TWO_READY, &(info->tx_state)); + set_bit(XMIT_SENDING_READY, &(info->tx_state)); + } + /* Start the RX buffers */ outb(REG_COMMAND_RX_BUF_ONE, iobase + REG_COMMAND); outb(REG_COMMAND_RX_BUF_TWO, iobase + REG_COMMAND); @@ -776,30 +826,10 @@ int bluecard_open(bluecard_info_t *info) set_current_state(TASK_INTERRUPTIBLE); schedule_timeout((HZ * 5) / 4); // or set it to 3/2 - - /* Initialize and register HCI device */ - hdev = hci_alloc_dev(); - if (!hdev) { - BT_ERR("Can't allocate HCI device"); - return -ENOMEM; - } - - info->hdev = hdev; - - hdev->type = HCI_PCCARD; - hdev->driver_data = info; - - hdev->open = bluecard_hci_open; - hdev->close = bluecard_hci_close; - hdev->flush = bluecard_hci_flush; - hdev->send = bluecard_hci_send_frame; - hdev->destruct = bluecard_hci_destruct; - hdev->ioctl = bluecard_hci_ioctl; - - hdev->owner = THIS_MODULE; - + /* Register HCI device */ if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); + info->hdev = NULL; hci_free_dev(hdev); return -ENODEV; } @@ -813,6 +843,9 @@ int bluecard_close(bluecard_info_t *info unsigned int iobase = info->link.io.BasePort1; struct hci_dev *hdev = info->hdev; + if (!hdev) + return -ENODEV; + bluecard_hci_close(hdev); clear_bit(CARD_READY, &(info->hw_state)); @@ -1017,6 +1050,8 @@ void bluecard_release(dev_link_t *link) if (link->state & DEV_PRESENT) bluecard_close(info); + del_timer(&(info->timer)); + link->dev = NULL; pcmcia_release_configuration(link->handle); --- linux-2.6.6-rc1/drivers/bluetooth/bt3c_cs.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/bluetooth/bt3c_cs.c 2004-04-18 22:25:24.687077472 -0700 @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,9 @@ #include #include +#include +#include + #include #include #include @@ -361,7 +365,7 @@ static irqreturn_t bt3c_interrupt(int ir unsigned int iobase; int iir; - if (!info) { + if (!info || !info->hdev) { BT_ERR("Call of irq %d for unknown device", irq); return IRQ_NONE; } @@ -379,7 +383,8 @@ static irqreturn_t bt3c_interrupt(int ir } else if ((stat & 0xff) != 0xff) { if (stat & 0x0020) { int stat = bt3c_read(iobase, 0x7002) & 0x10; - BT_ERR("Antenna %s", stat ? "out" : "in"); + BT_INFO("%s: Antenna %s", info->hdev->name, + stat ? "out" : "in"); } if (stat & 0x0001) bt3c_receive(info); @@ -481,36 +486,101 @@ static int bt3c_hci_ioctl(struct hci_dev -/* ======================== User mode firmware loader ======================== */ +/* ======================== Card services HCI interaction ======================== */ -#define FW_LOADER "/sbin/bluefw" +static struct device bt3c_device = { + .bus_id = "pcmcia", +}; -static int bt3c_firmware_load(bt3c_info_t *info) +static int bt3c_load_firmware(bt3c_info_t *info, unsigned char *firmware, int count) { - char dev[16]; - int err; + char *ptr = (char *) firmware; + char b[9]; + unsigned int iobase, size, addr, fcs, tmp; + int i, err = 0; - char *argv[] = { FW_LOADER, "pccard", dev, NULL }; - char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; + iobase = info->link.io.BasePort1; - sprintf(dev, "%04x", info->link.io.BasePort1); + /* Reset */ + bt3c_io_write(iobase, 0x8040, 0x0404); + bt3c_io_write(iobase, 0x8040, 0x0400); + + udelay(1); + + bt3c_io_write(iobase, 0x8040, 0x0404); + + udelay(17); + + /* Load */ + while (count) { + if (ptr[0] != 'S') { + BT_ERR("Bad address in firmware"); + err = -EFAULT; + goto error; + } - err = call_usermodehelper(FW_LOADER, argv, envp, 1); - if (err) - BT_ERR("Failed to run \"%s pccard %s\" (errno=%d)", FW_LOADER, dev, err); + memset(b, 0, sizeof(b)); + memcpy(b, ptr + 2, 2); + size = simple_strtol(b, NULL, 16); + + memset(b, 0, sizeof(b)); + memcpy(b, ptr + 4, 8); + addr = simple_strtol(b, NULL, 16); + + memset(b, 0, sizeof(b)); + memcpy(b, ptr + (size * 2) + 2, 2); + fcs = simple_strtol(b, NULL, 16); + + memset(b, 0, sizeof(b)); + for (tmp = 0, i = 0; i < size; i++) { + memcpy(b, ptr + (i * 2) + 2, 2); + tmp += simple_strtol(b, NULL, 16); + } - return err; -} + if (((tmp + fcs) & 0xff) != 0xff) { + BT_ERR("Checksum error in firmware"); + err = -EILSEQ; + goto error; + } + if (ptr[1] == '3') { + bt3c_address(iobase, addr); + memset(b, 0, sizeof(b)); + for (i = 0; i < (size - 4) / 2; i++) { + memcpy(b, ptr + (i * 4) + 12, 4); + tmp = simple_strtol(b, NULL, 16); + bt3c_put(iobase, tmp); + } + } -/* ======================== Card services HCI interaction ======================== */ + ptr += (size * 2) + 6; + count -= (size * 2) + 6; + } + + udelay(17); + + /* Boot */ + bt3c_address(iobase, 0x3000); + outb(inb(iobase + CONTROL) | 0x40, iobase + CONTROL); + +error: + udelay(17); + + /* Clear */ + bt3c_io_write(iobase, 0x7006, 0x0000); + bt3c_io_write(iobase, 0x7005, 0x0000); + bt3c_io_write(iobase, 0x7001, 0x0000); + + return err; +} int bt3c_open(bt3c_info_t *info) { + const struct firmware *firmware; struct hci_dev *hdev; int err; @@ -522,18 +592,7 @@ int bt3c_open(bt3c_info_t *info) info->rx_count = 0; info->rx_skb = NULL; - /* Load firmware */ - - if ((err = bt3c_firmware_load(info)) < 0) - return err; - - /* Timeout before it is safe to send the first HCI packet */ - - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - - - /* Initialize and register HCI device */ + /* Initialize HCI device */ hdev = hci_alloc_dev(); if (!hdev) { BT_ERR("Can't allocate HCI device"); @@ -545,22 +604,48 @@ int bt3c_open(bt3c_info_t *info) hdev->type = HCI_PCCARD; hdev->driver_data = info; - hdev->open = bt3c_hci_open; - hdev->close = bt3c_hci_close; - hdev->flush = bt3c_hci_flush; - hdev->send = bt3c_hci_send_frame; + hdev->open = bt3c_hci_open; + hdev->close = bt3c_hci_close; + hdev->flush = bt3c_hci_flush; + hdev->send = bt3c_hci_send_frame; hdev->destruct = bt3c_hci_destruct; - hdev->ioctl = bt3c_hci_ioctl; + hdev->ioctl = bt3c_hci_ioctl; hdev->owner = THIS_MODULE; - - if (hci_register_dev(hdev) < 0) { + + /* Load firmware */ + err = request_firmware(&firmware, "BT3CPCC.bin", &bt3c_device); + if (err < 0) { + BT_ERR("Firmware request failed"); + goto error; + } + + err = bt3c_load_firmware(info, firmware->data, firmware->size); + + release_firmware(firmware); + + if (err < 0) { + BT_ERR("Firmware loading failed"); + goto error; + } + + /* Timeout before it is safe to send the first HCI packet */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + + /* Register HCI device */ + err = hci_register_dev(hdev); + if (err < 0) { BT_ERR("Can't register HCI device"); - hci_free_dev(hdev); - return -ENODEV; + goto error; } return 0; + +error: + info->hdev = NULL; + hci_free_dev(hdev); + return err; } @@ -568,6 +653,9 @@ int bt3c_close(bt3c_info_t *info) { struct hci_dev *hdev = info->hdev; + if (!hdev) + return -ENODEV; + bt3c_hci_close(hdev); if (hci_unregister_dev(hdev) < 0) --- linux-2.6.6-rc1/drivers/bluetooth/btuart_cs.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/bluetooth/btuart_cs.c 2004-04-18 22:25:24.688077320 -0700 @@ -308,7 +308,7 @@ static irqreturn_t btuart_interrupt(int int boguscount = 0; int iir, lsr; - if (!info) { + if (!info || !info->hdev) { BT_ERR("Call of irq %d for unknown device", irq); return IRQ_NONE; } @@ -504,6 +504,27 @@ int btuart_open(btuart_info_t *info) info->rx_count = 0; info->rx_skb = NULL; + /* Initialize HCI device */ + hdev = hci_alloc_dev(); + if (!hdev) { + BT_ERR("Can't allocate HCI device"); + return -ENOMEM; + } + + info->hdev = hdev; + + hdev->type = HCI_PCCARD; + hdev->driver_data = info; + + hdev->open = btuart_hci_open; + hdev->close = btuart_hci_close; + hdev->flush = btuart_hci_flush; + hdev->send = btuart_hci_send_frame; + hdev->destruct = btuart_hci_destruct; + hdev->ioctl = btuart_hci_ioctl; + + hdev->owner = THIS_MODULE; + spin_lock_irqsave(&(info->lock), flags); /* Reset UART */ @@ -527,30 +548,10 @@ int btuart_open(btuart_info_t *info) set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ); - - /* Initialize and register HCI device */ - hdev = hci_alloc_dev(); - if (!hdev) { - BT_ERR("Can't allocate HCI device"); - return -ENOMEM; - } - - info->hdev = hdev; - - hdev->type = HCI_PCCARD; - hdev->driver_data = info; - - hdev->open = btuart_hci_open; - hdev->close = btuart_hci_close; - hdev->flush = btuart_hci_flush; - hdev->send = btuart_hci_send_frame; - hdev->destruct = btuart_hci_destruct; - hdev->ioctl = btuart_hci_ioctl; - - hdev->owner = THIS_MODULE; - + /* Register HCI device */ if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); + info->hdev = NULL; hci_free_dev(hdev); return -ENODEV; } @@ -565,6 +566,9 @@ int btuart_close(btuart_info_t *info) unsigned int iobase = info->link.io.BasePort1; struct hci_dev *hdev = info->hdev; + if (!hdev) + return -ENODEV; + btuart_hci_close(hdev); spin_lock_irqsave(&(info->lock), flags); --- linux-2.6.6-rc1/drivers/bluetooth/dtl1_cs.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/bluetooth/dtl1_cs.c 2004-04-18 22:25:24.689077168 -0700 @@ -312,7 +312,7 @@ static irqreturn_t dtl1_interrupt(int ir int boguscount = 0; int iir, lsr; - if (!info) { + if (!info || !info->hdev) { BT_ERR("Call of irq %d for unknown device", irq); return IRQ_NONE; } @@ -483,6 +483,27 @@ int dtl1_open(dtl1_info_t *info) set_bit(XMIT_WAITING, &(info->tx_state)); + /* Initialize HCI device */ + hdev = hci_alloc_dev(); + if (!hdev) { + BT_ERR("Can't allocate HCI device"); + return -ENOMEM; + } + + info->hdev = hdev; + + hdev->type = HCI_PCCARD; + hdev->driver_data = info; + + hdev->open = dtl1_hci_open; + hdev->close = dtl1_hci_close; + hdev->flush = dtl1_hci_flush; + hdev->send = dtl1_hci_send_frame; + hdev->destruct = dtl1_hci_destruct; + hdev->ioctl = dtl1_hci_ioctl; + + hdev->owner = THIS_MODULE; + spin_lock_irqsave(&(info->lock), flags); /* Reset UART */ @@ -506,30 +527,10 @@ int dtl1_open(dtl1_info_t *info) set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ * 2); - - /* Initialize and register HCI device */ - hdev = hci_alloc_dev(); - if (!hdev) { - BT_ERR("Can't allocate HCI device"); - return -ENOMEM; - } - - info->hdev = hdev; - - hdev->type = HCI_PCCARD; - hdev->driver_data = info; - - hdev->open = dtl1_hci_open; - hdev->close = dtl1_hci_close; - hdev->flush = dtl1_hci_flush; - hdev->send = dtl1_hci_send_frame; - hdev->destruct = dtl1_hci_destruct; - hdev->ioctl = dtl1_hci_ioctl; - - hdev->owner = THIS_MODULE; - + /* Register HCI device */ if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); + info->hdev = NULL; hci_free_dev(hdev); return -ENODEV; } @@ -544,6 +545,9 @@ int dtl1_close(dtl1_info_t *info) unsigned int iobase = info->link.io.BasePort1; struct hci_dev *hdev = info->hdev; + if (!hdev) + return -ENODEV; + dtl1_hci_close(hdev); spin_lock_irqsave(&(info->lock), flags); --- linux-2.6.6-rc1/drivers/bluetooth/hci_uart.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/bluetooth/hci_uart.h 2004-04-18 22:25:24.689077168 -0700 @@ -35,11 +35,12 @@ #define HCIUARTGETPROTO _IOR('U', 201, int) /* UART protocols */ -#define HCI_UART_MAX_PROTO 3 +#define HCI_UART_MAX_PROTO 4 #define HCI_UART_H4 0 #define HCI_UART_BCSP 1 -#define HCI_UART_NCSP 2 +#define HCI_UART_3WIRE 2 +#define HCI_UART_H4DS 3 #ifdef __KERNEL__ struct hci_uart; --- linux-2.6.6-rc1/drivers/bluetooth/hci_usb.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/bluetooth/hci_usb.c 2004-04-18 22:25:24.690077016 -0700 @@ -109,8 +109,7 @@ struct _urb *_urb_alloc(int isoc, int gf sizeof(struct usb_iso_packet_descriptor) * isoc, gfp); if (_urb) { memset(_urb, 0, sizeof(*_urb)); - _urb->urb.count = (atomic_t)ATOMIC_INIT(1); - spin_lock_init(&_urb->urb.lock); + usb_init_urb(&_urb->urb); } return _urb; } @@ -341,6 +340,14 @@ static int hci_usb_flush(struct hci_dev return 0; } +static inline void hci_usb_wait_for_urb(struct urb *urb) +{ + while (atomic_read(&urb->count) > 1) { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout((5 * HZ + 999) / 1000); + } +} + static void hci_usb_unlink_urbs(struct hci_usb *husb) { int i; @@ -357,6 +364,7 @@ static void hci_usb_unlink_urbs(struct h BT_DBG("%s unlinking _urb %p type %d urb %p", husb->hdev->name, _urb, _urb->type, urb); usb_unlink_urb(urb); + hci_usb_wait_for_urb(urb); _urb_queue_tail(__completed_q(husb, _urb->type), _urb); } @@ -699,11 +707,11 @@ static void hci_usb_rx_complete(struct u BT_DBG("%s urb %p type %d status %d count %d flags %x", hdev->name, urb, _urb->type, urb->status, count, urb->transfer_flags); - if (!test_bit(HCI_RUNNING, &hdev->flags)) - return; - read_lock(&husb->completion_lock); + if (!test_bit(HCI_RUNNING, &hdev->flags)) + goto unlock; + if (urb->status || !count) goto resubmit; @@ -739,6 +747,7 @@ resubmit: BT_DBG("%s urb %p type %d resubmit status %d", hdev->name, urb, _urb->type, err); +unlock: read_unlock(&husb->completion_lock); } --- linux-2.6.6-rc1/drivers/bluetooth/Kconfig 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/bluetooth/Kconfig 2004-04-18 22:25:24.682078232 -0700 @@ -104,6 +104,7 @@ config BT_HCIDTL1 config BT_HCIBT3C tristate "HCI BT3C (PC Card) driver" depends on PCMCIA + select FW_LOADER help Bluetooth HCI BT3C (PC Card) driver. This driver provides support for Bluetooth PCMCIA devices with @@ -111,9 +112,6 @@ config BT_HCIBT3C 3Com Bluetooth Card (3CRWB6096) HP Bluetooth Card - The HCI BT3C driver uses external firmware loader program provided in - the BlueFW package. For more information, see . - Say Y here to compile support for HCI BT3C devices into the kernel or say M to compile it as module (bt3c_cs). --- linux-2.6.6-rc1/drivers/char/agp/ati-agp.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/char/agp/ati-agp.c 2004-04-18 22:25:28.230538784 -0700 @@ -131,6 +131,7 @@ static int ati_create_gatt_pages(int nr_ i--; } kfree (tables); + tables = NULL; retval = -ENOMEM; break; } --- linux-2.6.6-rc1/drivers/char/drm/drm_agpsupport.h 2003-11-09 16:45:05.000000000 -0800 +++ 25/drivers/char/drm/drm_agpsupport.h 2004-04-18 22:25:29.180394384 -0700 @@ -103,7 +103,13 @@ int DRM(agp_acquire)(struct inode *inode drm_device_t *dev = priv->dev; int retcode; - if (!dev->agp || dev->agp->acquired || !drm_agp->acquire) + if (!dev->agp) + return -ENODEV; + if (dev->agp->acquired) + return -EBUSY; + if (!drm_agp->acquire) + return -EINVAL; + if ( dev->agp->cant_use_aperture ) return -EINVAL; if ((retcode = drm_agp->acquire())) return retcode; --- linux-2.6.6-rc1/drivers/char/drm/drm_bufs.h 2003-07-13 21:44:34.000000000 -0700 +++ 25/drivers/char/drm/drm_bufs.h 2004-04-18 22:25:29.182394080 -0700 @@ -147,7 +147,9 @@ int DRM(addmap)( struct inode *inode, st MTRR_TYPE_WRCOMB, 1 ); } #endif - map->handle = DRM(ioremap)( map->offset, map->size, dev ); + if (map->type == _DRM_REGISTERS) + map->handle = DRM(ioremap)( map->offset, map->size, + dev ); break; case _DRM_SHM: @@ -160,6 +162,12 @@ int DRM(addmap)( struct inode *inode, st } map->offset = (unsigned long)map->handle; if ( map->flags & _DRM_CONTAINS_LOCK ) { + /* Prevent a 2nd X Server from creating a 2nd lock */ + if (dev->lock.hw_lock != NULL) { + vfree( map->handle ); + DRM(free)( map, sizeof(*map), DRM_MEM_MAPS ); + return -EBUSY; + } dev->sigdata.lock = dev->lock.hw_lock = map->handle; /* Pointer to lock */ } @@ -767,7 +775,7 @@ int DRM(addbufs_pci)( struct inode *inod } #endif /* __HAVE_PCI_DMA */ -#ifdef __HAVE_SG +#if __HAVE_SG int DRM(addbufs_sg)( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ) { --- linux-2.6.6-rc1/drivers/char/drm/drm_context.h 2003-07-13 21:44:34.000000000 -0700 +++ 25/drivers/char/drm/drm_context.h 2004-04-18 22:25:29.182394080 -0700 @@ -401,6 +401,7 @@ int DRM(addctx)( struct inode *inode, st { drm_file_t *priv = filp->private_data; drm_device_t *dev = priv->dev; + drm_ctx_list_t * ctx_entry; drm_ctx_t ctx; if ( copy_from_user( &ctx, (drm_ctx_t *)arg, sizeof(ctx) ) ) @@ -421,6 +422,20 @@ int DRM(addctx)( struct inode *inode, st if ( ctx.handle != DRM_KERNEL_CONTEXT ) DRIVER_CTX_CTOR(ctx.handle); /* XXX: also pass dev ? */ #endif + ctx_entry = DRM(alloc)( sizeof(*ctx_entry), DRM_MEM_CTXLIST ); + if ( !ctx_entry ) { + DRM_DEBUG("out of memory\n"); + return -ENOMEM; + } + + INIT_LIST_HEAD( &ctx_entry->head ); + ctx_entry->handle = ctx.handle; + ctx_entry->tag = priv; + + down( &dev->ctxlist_sem ); + list_add( &ctx_entry->head, &dev->ctxlist->head ); + ++dev->ctx_count; + up( &dev->ctxlist_sem ); if ( copy_to_user( (drm_ctx_t *)arg, &ctx, sizeof(ctx) ) ) return -EFAULT; @@ -543,6 +558,20 @@ int DRM(rmctx)( struct inode *inode, str DRM(ctxbitmap_free)( dev, ctx.handle ); } + down( &dev->ctxlist_sem ); + if ( !list_empty( &dev->ctxlist->head ) ) { + drm_ctx_list_t *pos, *n; + + list_for_each_entry_safe( pos, n, &dev->ctxlist->head, head ) { + if ( pos->handle == ctx.handle ) { + list_del( &pos->head ); + DRM(free)( pos, sizeof(*pos), DRM_MEM_CTXLIST ); + --dev->ctx_count; + } + } + } + up( &dev->ctxlist_sem ); + return 0; } --- linux-2.6.6-rc1/drivers/char/drm/drm_dma.h 2003-07-13 21:44:34.000000000 -0700 +++ 25/drivers/char/drm/drm_dma.h 2004-04-18 22:25:29.184393776 -0700 @@ -35,7 +35,6 @@ #include "drmP.h" -#include /* For task queue support */ #ifndef __HAVE_DMA_WAITQUEUE #define __HAVE_DMA_WAITQUEUE 0 @@ -43,15 +42,6 @@ #ifndef __HAVE_DMA_RECLAIM #define __HAVE_DMA_RECLAIM 0 #endif -#ifndef __HAVE_SHARED_IRQ -#define __HAVE_SHARED_IRQ 0 -#endif - -#if __HAVE_SHARED_IRQ -#define DRM_IRQ_TYPE SA_SHIRQ -#else -#define DRM_IRQ_TYPE 0 -#endif #if __HAVE_DMA @@ -214,293 +204,11 @@ void DRM(reclaim_buffers)( struct file * } #endif - - - -#if __HAVE_DMA_IRQ - -/** - * Install IRQ handler. - * - * \param dev DRM device. - * \param irq IRQ number. - * - * Initializes the IRQ related data, and setups drm_device::vbl_queue. Installs the handler, calling the driver - * \c DRM(driver_irq_preinstall)() and \c DRM(driver_irq_postinstall)() functions - * before and after the installation. - */ -int DRM(irq_install)( drm_device_t *dev, int irq ) -{ - int ret; - - if ( !irq ) - return -EINVAL; - - down( &dev->struct_sem ); - - /* Driver must have been initialized */ - if ( !dev->dev_private ) { - up( &dev->struct_sem ); - return -EINVAL; - } - - if ( dev->irq ) { - up( &dev->struct_sem ); - return -EBUSY; - } - dev->irq = irq; - up( &dev->struct_sem ); - - DRM_DEBUG( "%s: irq=%d\n", __FUNCTION__, irq ); - - dev->context_flag = 0; - dev->interrupt_flag = 0; - dev->dma_flag = 0; - - dev->dma->next_buffer = NULL; - dev->dma->next_queue = NULL; - dev->dma->this_buffer = NULL; - -#if __HAVE_DMA_IRQ_BH - INIT_WORK(&dev->work, DRM(dma_immediate_bh), dev); -#endif - -#if __HAVE_VBL_IRQ - init_waitqueue_head(&dev->vbl_queue); - - spin_lock_init( &dev->vbl_lock ); - - INIT_LIST_HEAD( &dev->vbl_sigs.head ); - - dev->vbl_pending = 0; -#endif - - /* Before installing handler */ - DRM(driver_irq_preinstall)(dev); - - /* Install handler */ - ret = request_irq( dev->irq, DRM(dma_service), - DRM_IRQ_TYPE, dev->devname, dev ); - if ( ret < 0 ) { - down( &dev->struct_sem ); - dev->irq = 0; - up( &dev->struct_sem ); - return ret; - } - - /* After installing handler */ - DRM(driver_irq_postinstall)(dev); - - return 0; -} - -/** - * Uninstall the IRQ handler. - * - * \param dev DRM device. - * - * Calls the driver's \c DRM(driver_irq_uninstall)() function, and stops the irq. - */ -int DRM(irq_uninstall)( drm_device_t *dev ) -{ - int irq; - - down( &dev->struct_sem ); - irq = dev->irq; - dev->irq = 0; - up( &dev->struct_sem ); - - if ( !irq ) - return -EINVAL; - - DRM_DEBUG( "%s: irq=%d\n", __FUNCTION__, irq ); - - DRM(driver_irq_uninstall)( dev ); - - free_irq( irq, dev ); - - return 0; -} - -/** - * IRQ control ioctl. - * - * \param inode device inode. - * \param filp file pointer. - * \param cmd command. - * \param arg user argument, pointing to a drm_control structure. - * \return zero on success or a negative number on failure. - * - * Calls irq_install() or irq_uninstall() according to \p arg. - */ -int DRM(control)( struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg ) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_control_t ctl; - - if ( copy_from_user( &ctl, (drm_control_t *)arg, sizeof(ctl) ) ) - return -EFAULT; - - switch ( ctl.func ) { - case DRM_INST_HANDLER: - return DRM(irq_install)( dev, ctl.irq ); - case DRM_UNINST_HANDLER: - return DRM(irq_uninstall)( dev ); - default: - return -EINVAL; - } -} - -#if __HAVE_VBL_IRQ - -/** - * Wait for VBLANK. - * - * \param inode device inode. - * \param filp file pointer. - * \param cmd command. - * \param data user argument, pointing to a drm_wait_vblank structure. - * \return zero on success or a negative number on failure. - * - * Verifies the IRQ is installed. - * - * If a signal is requested checks if this task has already scheduled the same signal - * for the same vblank sequence number - nothing to be done in - * that case. If the number of tasks waiting for the interrupt exceeds 100 the - * function fails. Otherwise adds a new entry to drm_device::vbl_sigs for this - * task. - * - * If a signal is not requested, then calls vblank_wait(). - */ -int DRM(wait_vblank)( DRM_IOCTL_ARGS ) -{ - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_wait_vblank_t vblwait; - struct timeval now; - int ret = 0; - unsigned int flags; - - if (!dev->irq) - return -EINVAL; - - DRM_COPY_FROM_USER_IOCTL( vblwait, (drm_wait_vblank_t *)data, - sizeof(vblwait) ); - - switch ( vblwait.request.type & ~_DRM_VBLANK_FLAGS_MASK ) { - case _DRM_VBLANK_RELATIVE: - vblwait.request.sequence += atomic_read( &dev->vbl_received ); - vblwait.request.type &= ~_DRM_VBLANK_RELATIVE; - case _DRM_VBLANK_ABSOLUTE: - break; - default: - return -EINVAL; - } - - flags = vblwait.request.type & _DRM_VBLANK_FLAGS_MASK; - - if ( flags & _DRM_VBLANK_SIGNAL ) { - unsigned long irqflags; - drm_vbl_sig_t *vbl_sig; - - vblwait.reply.sequence = atomic_read( &dev->vbl_received ); - - spin_lock_irqsave( &dev->vbl_lock, irqflags ); - - /* Check if this task has already scheduled the same signal - * for the same vblank sequence number; nothing to be done in - * that case - */ - list_for_each_entry( vbl_sig, &dev->vbl_sigs.head, head ) { - if (vbl_sig->sequence == vblwait.request.sequence - && vbl_sig->info.si_signo == vblwait.request.signal - && vbl_sig->task == current) - { - spin_unlock_irqrestore( &dev->vbl_lock, irqflags ); - goto done; - } - } - - if ( dev->vbl_pending >= 100 ) { - spin_unlock_irqrestore( &dev->vbl_lock, irqflags ); - return -EBUSY; - } - - dev->vbl_pending++; - - spin_unlock_irqrestore( &dev->vbl_lock, irqflags ); - - if ( !( vbl_sig = DRM_MALLOC( sizeof( drm_vbl_sig_t ) ) ) ) { - return -ENOMEM; - } - - memset( (void *)vbl_sig, 0, sizeof(*vbl_sig) ); - - vbl_sig->sequence = vblwait.request.sequence; - vbl_sig->info.si_signo = vblwait.request.signal; - vbl_sig->task = current; - - spin_lock_irqsave( &dev->vbl_lock, irqflags ); - - list_add_tail( (struct list_head *) vbl_sig, &dev->vbl_sigs.head ); - - spin_unlock_irqrestore( &dev->vbl_lock, irqflags ); - } else { - ret = DRM(vblank_wait)( dev, &vblwait.request.sequence ); - - do_gettimeofday( &now ); - vblwait.reply.tval_sec = now.tv_sec; - vblwait.reply.tval_usec = now.tv_usec; - } - -done: - DRM_COPY_TO_USER_IOCTL( (drm_wait_vblank_t *)data, vblwait, - sizeof(vblwait) ); - - return ret; -} - -/** - * Send the VBLANK signals. - * - * \param dev DRM device. - * - * Sends a signal for each task in drm_device::vbl_sigs and empties the list. - * - * If a signal is not requested, then calls vblank_wait(). +#if !__HAVE_IRQ +/* This stub DRM_IOCTL_CONTROL handler is for the drivers that used to require + * IRQs for DMA but no longer do. It maintains compatibility with the X Servers + * that try to use the control ioctl by simply returning success. */ -void DRM(vbl_send_signals)( drm_device_t *dev ) -{ - struct list_head *list, *tmp; - drm_vbl_sig_t *vbl_sig; - unsigned int vbl_seq = atomic_read( &dev->vbl_received ); - unsigned long flags; - - spin_lock_irqsave( &dev->vbl_lock, flags ); - - list_for_each_safe( list, tmp, &dev->vbl_sigs.head ) { - vbl_sig = list_entry( list, drm_vbl_sig_t, head ); - if ( ( vbl_seq - vbl_sig->sequence ) <= (1<<23) ) { - vbl_sig->info.si_code = vbl_seq; - send_sig_info( vbl_sig->info.si_signo, &vbl_sig->info, vbl_sig->task ); - - list_del( list ); - - DRM_FREE( vbl_sig, sizeof(*vbl_sig) ); - - dev->vbl_pending--; - } - } - - spin_unlock_irqrestore( &dev->vbl_lock, flags ); -} - -#endif /* __HAVE_VBL_IRQ */ - -#else - int DRM(control)( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ) { @@ -517,7 +225,6 @@ int DRM(control)( struct inode *inode, s return -EINVAL; } } - -#endif /* __HAVE_DMA_IRQ */ +#endif #endif /* __HAVE_DMA */ --- linux-2.6.6-rc1/drivers/char/drm/drm_drv.h 2003-10-25 14:45:44.000000000 -0700 +++ 25/drivers/char/drm/drm_drv.h 2004-04-18 22:25:29.187393320 -0700 @@ -58,8 +58,8 @@ #ifndef __HAVE_CTX_BITMAP #define __HAVE_CTX_BITMAP 0 #endif -#ifndef __HAVE_DMA_IRQ -#define __HAVE_DMA_IRQ 0 +#ifndef __HAVE_IRQ +#define __HAVE_IRQ 0 #endif #ifndef __HAVE_DMA_QUEUE #define __HAVE_DMA_QUEUE 0 @@ -126,6 +126,9 @@ #ifndef DRIVER_IOCTLS #define DRIVER_IOCTLS #endif +#ifndef DRIVER_OPEN_HELPER +#define DRIVER_OPEN_HELPER( priv, dev ) +#endif #ifndef DRIVER_FOPS #define DRIVER_FOPS \ static struct file_operations DRM(fops) = { \ @@ -159,15 +162,8 @@ __setup( DRIVER_NAME "=", DRM_OPTIONS_FU #undef DRM_OPTIONS_FUNC #endif -/** - * The default number of instances (minor numbers) to initialize. - */ -#ifndef DRIVER_NUM_CARDS -#define DRIVER_NUM_CARDS 1 -#endif - -static drm_device_t *DRM(device); -static int *DRM(minor); +#define MAX_DEVICES 4 +static drm_device_t DRM(device)[MAX_DEVICES]; static int DRM(numdevs) = 0; DRIVER_FOPS; @@ -177,10 +173,13 @@ static drm_ioctl_desc_t DRM(ioctls)[] [DRM_IOCTL_NR(DRM_IOCTL_VERSION)] = { DRM(version), 0, 0 }, [DRM_IOCTL_NR(DRM_IOCTL_GET_UNIQUE)] = { DRM(getunique), 0, 0 }, [DRM_IOCTL_NR(DRM_IOCTL_GET_MAGIC)] = { DRM(getmagic), 0, 0 }, - [DRM_IOCTL_NR(DRM_IOCTL_IRQ_BUSID)] = { DRM(irq_busid), 0, 1 }, +#if __HAVE_IRQ + [DRM_IOCTL_NR(DRM_IOCTL_IRQ_BUSID)] = { DRM(irq_by_busid), 0, 1 }, +#endif [DRM_IOCTL_NR(DRM_IOCTL_GET_MAP)] = { DRM(getmap), 0, 0 }, [DRM_IOCTL_NR(DRM_IOCTL_GET_CLIENT)] = { DRM(getclient), 0, 0 }, [DRM_IOCTL_NR(DRM_IOCTL_GET_STATS)] = { DRM(getstats), 0, 0 }, + [DRM_IOCTL_NR(DRM_IOCTL_SET_VERSION)] = { DRM(setversion), 0, 1 }, [DRM_IOCTL_NR(DRM_IOCTL_SET_UNIQUE)] = { DRM(setunique), 1, 1 }, [DRM_IOCTL_NR(DRM_IOCTL_BLOCK)] = { DRM(noop), 1, 1 }, @@ -222,9 +221,9 @@ static drm_ioctl_desc_t DRM(ioctls)[] [DRM_IOCTL_NR(DRM_IOCTL_INFO_BUFS)] = { DRM(infobufs), 1, 0 }, [DRM_IOCTL_NR(DRM_IOCTL_MAP_BUFS)] = { DRM(mapbufs), 1, 0 }, [DRM_IOCTL_NR(DRM_IOCTL_FREE_BUFS)] = { DRM(freebufs), 1, 0 }, - - /* The DRM_IOCTL_DMA ioctl should be defined by the driver. - */ + /* The DRM_IOCTL_DMA ioctl should be defined by the driver. */ +#endif +#if __HAVE_IRQ || __HAVE_DMA [DRM_IOCTL_NR(DRM_IOCTL_CONTROL)] = { DRM(control), 1, 1 }, #endif @@ -330,6 +329,12 @@ static int DRM(setup)( drm_device_t *dev memset(dev->maplist, 0, sizeof(*dev->maplist)); INIT_LIST_HEAD(&dev->maplist->head); + dev->ctxlist = DRM(alloc)(sizeof(*dev->ctxlist), + DRM_MEM_CTXLIST); + if(dev->ctxlist == NULL) return -ENOMEM; + memset(dev->ctxlist, 0, sizeof(*dev->ctxlist)); + INIT_LIST_HEAD(&dev->ctxlist->head); + dev->vmalist = NULL; dev->sigdata.lock = dev->lock.hw_lock = NULL; init_waitqueue_head( &dev->lock.lock_queue ); @@ -337,7 +342,7 @@ static int DRM(setup)( drm_device_t *dev dev->queue_reserved = 0; dev->queue_slots = 0; dev->queuelist = NULL; - dev->irq = 0; + dev->irq_enabled = 0; dev->context_flag = 0; dev->interrupt_flag = 0; dev->dma_flag = 0; @@ -345,6 +350,7 @@ static int DRM(setup)( drm_device_t *dev dev->last_switch = 0; dev->last_checked = 0; init_waitqueue_head( &dev->context_wait ); + dev->if_version = 0; dev->ctx_start = 0; dev->lck_start = 0; @@ -391,8 +397,8 @@ static int DRM(takedown)( drm_device_t * DRM_DEBUG( "\n" ); DRIVER_PRETAKEDOWN(); -#if __HAVE_DMA_IRQ - if ( dev->irq ) DRM(irq_uninstall)( dev ); +#if __HAVE_IRQ + if ( dev->irq_enabled ) DRM(irq_uninstall)( dev ); #endif down( &dev->struct_sem ); @@ -534,43 +540,101 @@ static int DRM(takedown)( drm_device_t * return 0; } -/** - * Figure out how many instances to initialize. - * - * \return number of cards found. - * - * Searches for every PCI card in \c DRIVER_CARD_LIST with matching vendor and device ids. - */ -static int drm_count_cards(void) +static drm_pci_id_list_t DRM(pciidlist)[] = { + DRIVER_PCI_IDS +}; + +static int DRM(probe)(struct pci_dev *pdev) { - int num = 0; -#if defined(DRIVER_CARD_LIST) - int i; - drm_pci_list_t *l; - u16 device, vendor; - struct pci_dev *pdev = NULL; + drm_device_t *dev; +#if __HAVE_CTX_BITMAP + int retcode; #endif + int i; + int is_compat = 0; DRM_DEBUG( "\n" ); -#if defined(DRIVER_COUNT_CARDS) - num = DRIVER_COUNT_CARDS(); -#elif defined(DRIVER_CARD_LIST) - for (i = 0, l = DRIVER_CARD_LIST; l[i].vendor != 0; i++) { - pdev = NULL; - vendor = l[i].vendor; - device = l[i].device; - if(device == 0xffff) device = PCI_ANY_ID; - if(vendor == 0xffff) vendor = PCI_ANY_ID; - while ((pdev = pci_find_device(vendor, device, pdev))) { - num++; + for (i = 0; DRM(pciidlist)[i].vendor != 0; i++) { + if ((DRM(pciidlist)[i].vendor == pdev->vendor) && + (DRM(pciidlist)[i].device == pdev->device)) { + is_compat = 1; } } + if (is_compat == 0) + return -ENODEV; + + if (DRM(numdevs) >= MAX_DEVICES) + return -ENODEV; + + dev = &(DRM(device)[DRM(numdevs)]); + + memset( (void *)dev, 0, sizeof(*dev) ); + dev->count_lock = SPIN_LOCK_UNLOCKED; + init_timer( &dev->timer ); + sema_init( &dev->struct_sem, 1 ); + sema_init( &dev->ctxlist_sem, 1 ); + + if ((dev->minor = DRM(stub_register)(DRIVER_NAME, &DRM(fops),dev)) < 0) + return -EPERM; + dev->device = MKDEV(DRM_MAJOR, dev->minor ); + dev->name = DRIVER_NAME; + + dev->pdev = pdev; +#ifdef __alpha__ + dev->hose = pdev->sysdata; + dev->pci_domain = dev->hose->bus->number; #else - num = DRIVER_NUM_CARDS; + dev->pci_domain = 0; #endif - DRM_DEBUG("numdevs = %d\n", num); - return num; + dev->pci_bus = pdev->bus->number; + dev->pci_slot = PCI_SLOT(pdev->devfn); + dev->pci_func = PCI_FUNC(pdev->devfn); + dev->irq = pdev->irq; + + DRIVER_PREINIT(); + +#if __REALLY_HAVE_AGP + dev->agp = DRM(agp_init)(); +#if __MUST_HAVE_AGP + if ( dev->agp == NULL ) { + DRM_ERROR( "Cannot initialize the agpgart module.\n" ); + DRM(stub_unregister)(dev->minor); + DRM(takedown)( dev ); + return -EINVAL; + } +#endif +#if __REALLY_HAVE_MTRR + if (dev->agp) + dev->agp->agp_mtrr = mtrr_add( dev->agp->agp_info.aper_base, + dev->agp->agp_info.aper_size*1024*1024, + MTRR_TYPE_WRCOMB, + 1 ); +#endif +#endif + +#if __HAVE_CTX_BITMAP + retcode = DRM(ctxbitmap_init)( dev ); + if( retcode ) { + DRM_ERROR( "Cannot allocate memory for context bitmap.\n" ); + DRM(stub_unregister)(dev->minor); + DRM(takedown)( dev ); + return retcode; + } +#endif + DRM(numdevs)++; /* no errors, mark it reserved */ + + DRM_INFO( "Initialized %s %d.%d.%d %s on minor %d\n", + DRIVER_NAME, + DRIVER_MAJOR, + DRIVER_MINOR, + DRIVER_PATCHLEVEL, + DRIVER_DATE, + dev->minor); + + DRIVER_POSTINIT(); + + return 0; } /** @@ -579,7 +643,7 @@ static int drm_count_cards(void) * * \return zero on success or a negative number on failure. * - * Allocates and initialize an array of drm_device structures, and attempts to + * Initializes an array of drm_device structures, and attempts to * initialize all available devices, using consecutive minors, registering the * stubs and initializing the AGP device. * @@ -588,88 +652,19 @@ static int drm_count_cards(void) */ static int __init drm_init( void ) { + struct pci_dev *pdev = NULL; - drm_device_t *dev; - int i; -#if __HAVE_CTX_BITMAP - int retcode; -#endif DRM_DEBUG( "\n" ); #ifdef MODULE DRM(parse_options)( drm_opts ); #endif - DRM(numdevs) = drm_count_cards(); - /* Force at least one instance. */ - if (DRM(numdevs) <= 0) - DRM(numdevs) = 1; - - DRM(device) = kmalloc(sizeof(*DRM(device)) * DRM(numdevs), GFP_KERNEL); - if (!DRM(device)) { - return -ENOMEM; - } - DRM(minor) = kmalloc(sizeof(*DRM(minor)) * DRM(numdevs), GFP_KERNEL); - if (!DRM(minor)) { - kfree(DRM(device)); - return -ENOMEM; - } - - DRIVER_PREINIT(); - DRM(mem_init)(); - for (i = 0; i < DRM(numdevs); i++) { - dev = &(DRM(device)[i]); - memset( (void *)dev, 0, sizeof(*dev) ); - dev->count_lock = SPIN_LOCK_UNLOCKED; - init_timer( &dev->timer ); - sema_init( &dev->struct_sem, 1 ); - - if ((DRM(minor)[i] = DRM(stub_register)(DRIVER_NAME, &DRM(fops),dev)) < 0) - return -EPERM; - dev->device = MKDEV(DRM_MAJOR, DRM(minor)[i] ); - dev->name = DRIVER_NAME; - -#if __REALLY_HAVE_AGP - dev->agp = DRM(agp_init)(); -#if __MUST_HAVE_AGP - if ( dev->agp == NULL ) { - DRM_ERROR( "Cannot initialize the agpgart module.\n" ); - DRM(stub_unregister)(DRM(minor)[i]); - DRM(takedown)( dev ); - return -EINVAL; - } -#endif -#if __REALLY_HAVE_MTRR - if (dev->agp) - dev->agp->agp_mtrr = mtrr_add( dev->agp->agp_info.aper_base, - dev->agp->agp_info.aper_size*1024*1024, - MTRR_TYPE_WRCOMB, - 1 ); -#endif -#endif - -#if __HAVE_CTX_BITMAP - retcode = DRM(ctxbitmap_init)( dev ); - if( retcode ) { - DRM_ERROR( "Cannot allocate memory for context bitmap.\n" ); - DRM(stub_unregister)(DRM(minor)[i]); - DRM(takedown)( dev ); - return retcode; - } -#endif - DRM_INFO( "Initialized %s %d.%d.%d %s on minor %d\n", - DRIVER_NAME, - DRIVER_MAJOR, - DRIVER_MINOR, - DRIVER_PATCHLEVEL, - DRIVER_DATE, - DRM(minor)[i] ); + while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev)) != NULL) { + DRM(probe)(pdev); } - - DRIVER_POSTINIT(); - return 0; } @@ -689,10 +684,10 @@ static void __exit drm_cleanup( void ) for (i = DRM(numdevs) - 1; i >= 0; i--) { dev = &(DRM(device)[i]); - if ( DRM(stub_unregister)(DRM(minor)[i]) ) { + if ( DRM(stub_unregister)(dev->minor) ) { DRM_ERROR( "Cannot unload module\n" ); } else { - DRM_DEBUG("minor %d unregistered\n", DRM(minor)[i]); + DRM_DEBUG("minor %d unregistered\n", dev->minor); if (i == 0) { DRM_INFO( "Module unloaded\n" ); } @@ -722,8 +717,6 @@ static void __exit drm_cleanup( void ) #endif } DRIVER_POSTCLEANUP(); - kfree(DRM(minor)); - kfree(DRM(device)); DRM(numdevs) = 0; } @@ -795,7 +788,7 @@ int DRM(open)( struct inode *inode, stru int i; for (i = 0; i < DRM(numdevs); i++) { - if (iminor(inode) == DRM(minor)[i]) { + if (iminor(inode) == DRM(device)[i].minor) { dev = &(DRM(device)[i]); break; } @@ -908,6 +901,26 @@ int DRM(release)( struct inode *inode, s DRM(fasync)( -1, filp, 0 ); + down( &dev->ctxlist_sem ); + if ( !list_empty( &dev->ctxlist->head ) ) { + drm_ctx_list_t *pos, *n; + + list_for_each_entry_safe( pos, n, &dev->ctxlist->head, head ) { + if ( pos->tag == priv && + pos->handle != DRM_KERNEL_CONTEXT ) { +#ifdef DRIVER_CTX_DTOR + DRIVER_CTX_DTOR(pos->handle); +#endif +#if __HAVE_CTX_BITMAP + DRM(ctxbitmap_free)( dev, pos->handle ); +#endif + list_del( &pos->head ); + DRM(free)( pos, sizeof(*pos), DRM_MEM_CTXLIST ); + } + } + } + up( &dev->ctxlist_sem ); + down( &dev->struct_sem ); if ( priv->remove_auth_on_close == 1 ) { drm_file_t *temp = dev->file_first; --- linux-2.6.6-rc1/drivers/char/drm/drm_fops.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/char/drm/drm_fops.h 2004-04-18 22:25:29.188393168 -0700 @@ -72,6 +72,8 @@ int DRM(open_helper)(struct inode *inode priv->authenticated = capable(CAP_SYS_ADMIN); priv->lock_count = 0; + DRIVER_OPEN_HELPER( priv, dev ); + down(&dev->struct_sem); if (!dev->file_last) { priv->next = NULL; --- linux-2.6.6-rc1/drivers/char/drm/drm.h 2003-07-13 21:44:34.000000000 -0700 +++ 25/drivers/char/drm/drm.h 2004-04-18 22:25:29.178394688 -0700 @@ -46,8 +46,8 @@ #define DRM_IOC_WRITE _IOC_WRITE #define DRM_IOC_READWRITE _IOC_READ|_IOC_WRITE #define DRM_IOC(dir, group, nr, size) _IOC(dir, group, nr, size) -#elif defined(__FreeBSD__) || defined(__NetBSD__) -#if defined(__FreeBSD__) && defined(XFree86Server) +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) +#if defined(__FreeBSD__) && defined(IN_MODULE) /* Prevent name collision when including sys/ioccom.h */ #undef ioctl #include @@ -130,6 +130,18 @@ typedef struct drm_tex_region { unsigned int age; } drm_tex_region_t; +/** + * Hardware lock. + * + * The lock structure is a simple cache-line aligned integer. To avoid + * processor bus contention on a multiprocessor system, there should not be any + * other data stored in the same cache line. + */ +typedef struct drm_hw_lock { + __volatile__ unsigned int lock; /**< lock variable */ + char padding[60]; /**< Pad to cache line */ +} drm_hw_lock_t; + /** * DRM_IOCTL_VERSION ioctl argument type. @@ -580,6 +592,16 @@ typedef struct drm_scatter_gather { unsigned long handle; /**< Used for mapping / unmapping */ } drm_scatter_gather_t; +/** + * DRM_IOCTL_SET_VERSION ioctl argument type. + */ +typedef struct drm_set_version { + int drm_di_major; + int drm_di_minor; + int drm_dd_major; + int drm_dd_minor; +} drm_set_version_t; + #define DRM_IOCTL_BASE 'd' #define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) @@ -594,6 +616,7 @@ typedef struct drm_scatter_gather { #define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, drm_map_t) #define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, drm_client_t) #define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, drm_stats_t) +#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, drm_set_version_t) #define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, drm_unique_t) #define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, drm_auth_t) --- linux-2.6.6-rc1/drivers/char/drm/drm_ioctl.h 2003-07-13 21:44:34.000000000 -0700 +++ 25/drivers/char/drm/drm_ioctl.h 2004-04-18 22:25:29.189393016 -0700 @@ -35,69 +35,7 @@ #include "drmP.h" - -/** - * Get interrupt from bus id. - * - * \param inode device inode. - * \param filp file pointer. - * \param cmd command. - * \param arg user argument, pointing to a drm_irq_busid structure. - * \return zero on success or a negative number on failure. - * - * Finds the PCI device with the specified bus id and gets its IRQ number. - */ -int DRM(irq_busid)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - drm_irq_busid_t p; - struct pci_dev *dev; - - if (copy_from_user(&p, (drm_irq_busid_t *)arg, sizeof(p))) - return -EFAULT; -#ifdef __alpha__ - { - int domain = p.busnum >> 8; - p.busnum &= 0xff; - - /* - * Find the hose the device is on (the domain number is the - * hose index) and offset the bus by the root bus of that - * hose. - */ - for(dev = pci_find_device(PCI_ANY_ID,PCI_ANY_ID,NULL); - dev; - dev = pci_find_device(PCI_ANY_ID,PCI_ANY_ID,dev)) { - struct pci_controller *hose = dev->sysdata; - - if (hose->index == domain) { - p.busnum += hose->bus->number; - break; - } - } - } -#endif - dev = pci_find_slot(p.busnum, PCI_DEVFN(p.devnum, p.funcnum)); - if (!dev) { - DRM_ERROR("pci_find_slot failed for %d:%d:%d\n", - p.busnum, p.devnum, p.funcnum); - p.irq = 0; - goto out; - } - if (pci_enable_device(dev) != 0) { - DRM_ERROR("pci_enable_device failed for %d:%d:%d\n", - p.busnum, p.devnum, p.funcnum); - p.irq = 0; - goto out; - } - p.irq = dev->irq; - out: - DRM_DEBUG("%d:%d:%d => IRQ %d\n", - p.busnum, p.devnum, p.funcnum, p.irq); - if (copy_to_user((drm_irq_busid_t *)arg, &p, sizeof(p))) - return -EFAULT; - return 0; -} +#include "linux/pci.h" /** * Get the bus id. @@ -138,8 +76,10 @@ int DRM(getunique)(struct inode *inode, * \param arg user argument, pointing to a drm_unique structure. * \return zero on success or a negative number on failure. * - * Copies the bus id from userspace into drm_device::unique, and searches for - * the respective PCI device, updating drm_device::pdev. + * Copies the bus id from userspace into drm_device::unique, and verifies that + * it matches the device this DRM is attached to (EINVAL otherwise). Deprecated + * in interface version 1.1 and will return EBUSY when setversion has requested + * version 1.1 or greater. */ int DRM(setunique)(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -147,6 +87,7 @@ int DRM(setunique)(struct inode *inode, drm_file_t *priv = filp->private_data; drm_device_t *dev = priv->dev; drm_unique_t u; + int domain, bus, slot, func, ret; if (dev->unique_len || dev->unique) return -EBUSY; @@ -164,55 +105,42 @@ int DRM(setunique)(struct inode *inode, dev->devname = DRM(alloc)(strlen(dev->name) + strlen(dev->unique) + 2, DRM_MEM_DRIVER); - if(!dev->devname) { - DRM(free)(dev->devname, sizeof(*dev->devname), DRM_MEM_DRIVER); + if (!dev->devname) return -ENOMEM; - } + sprintf(dev->devname, "%s@%s", dev->name, dev->unique); - do { - struct pci_dev *pci_dev; - int domain, b, d, f; - char *p; - - for(p = dev->unique; p && *p && *p != ':'; p++); - if (!p || !*p) break; - b = (int)simple_strtoul(p+1, &p, 10); - if (*p != ':') break; - d = (int)simple_strtoul(p+1, &p, 10); - if (*p != ':') break; - f = (int)simple_strtoul(p+1, &p, 10); - if (*p) break; - - domain = b >> 8; - b &= 0xff; - -#ifdef __alpha__ - /* - * Find the hose the device is on (the domain number is the - * hose index) and offset the bus by the root bus of that - * hose. - */ - for(pci_dev = pci_find_device(PCI_ANY_ID,PCI_ANY_ID,NULL); - pci_dev; - pci_dev = pci_find_device(PCI_ANY_ID,PCI_ANY_ID,pci_dev)) { - struct pci_controller *hose = pci_dev->sysdata; - - if (hose->index == domain) { - b += hose->bus->number; - break; - } - } -#endif + /* Return error if the busid submitted doesn't match the device's actual + * busid. + */ + ret = sscanf(dev->unique, "PCI:%d:%d:%d", &bus, &slot, &func); + if (ret != 3) + return DRM_ERR(EINVAL); + domain = bus >> 8; + bus &= 0xff; + + if ((domain != dev->pci_domain) || + (bus != dev->pci_bus) || + (slot != dev->pci_slot) || + (func != dev->pci_func)) + return -EINVAL; - pci_dev = pci_find_slot(b, PCI_DEVFN(d,f)); - if (pci_dev) { - dev->pdev = pci_dev; -#ifdef __alpha__ - dev->hose = pci_dev->sysdata; -#endif - } - } while(0); + return 0; +} + +static int +DRM(set_busid)(drm_device_t *dev) +{ + if (dev->unique != NULL) + return EBUSY; + + dev->unique_len = 20; + dev->unique = DRM(alloc)(dev->unique_len + 1, DRM_MEM_DRIVER); + if (dev->unique == NULL) + return ENOMEM; + + snprintf(dev->unique, dev->unique_len, "pci:%04x:%02x:%02x.%d", + dev->pci_domain, dev->pci_bus, dev->pci_slot, dev->pci_func); return 0; } @@ -363,3 +291,47 @@ int DRM(getstats)( struct inode *inode, return -EFAULT; return 0; } + +#define DRM_IF_MAJOR 1 +#define DRM_IF_MINOR 2 + +int DRM(setversion)(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_set_version_t sv; + drm_set_version_t retv; + int if_version; + + DRM_COPY_FROM_USER_IOCTL(sv, (drm_set_version_t *)data, sizeof(sv)); + + retv.drm_di_major = DRM_IF_MAJOR; + retv.drm_di_minor = DRM_IF_MINOR; + retv.drm_dd_major = DRIVER_MAJOR; + retv.drm_dd_minor = DRIVER_MINOR; + + DRM_COPY_TO_USER_IOCTL((drm_set_version_t *)data, retv, sizeof(sv)); + + if (sv.drm_di_major != -1) { + if (sv.drm_di_major != DRM_IF_MAJOR || + sv.drm_di_minor < 0 || sv.drm_di_minor > DRM_IF_MINOR) + return EINVAL; + if_version = DRM_IF_VERSION(sv.drm_di_major, sv.drm_dd_minor); + dev->if_version = DRM_MAX(if_version, dev->if_version); + if (sv.drm_di_minor >= 1) { + /* + * Version 1.1 includes tying of DRM to specific device + */ + DRM(set_busid)(dev); + } + } + + if (sv.drm_dd_major != -1) { + if (sv.drm_dd_major != DRIVER_MAJOR || + sv.drm_dd_minor < 0 || sv.drm_dd_minor > DRIVER_MINOR) + return EINVAL; +#ifdef DRIVER_SETVERSION + DRIVER_SETVERSION(dev, &sv); +#endif + } + return 0; +} --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/char/drm/drm_irq.h 2004-04-18 22:25:29.192392560 -0700 @@ -0,0 +1,372 @@ +/** + * \file drm_irq.h + * IRQ support + * + * \author Rickard E. (Rik) Faith + * \author Gareth Hughes + */ + +/* + * Created: Fri Mar 19 14:30:16 1999 by faith@valinux.com + * + * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas. + * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define __NO_VERSION__ +#include "drmP.h" + +#include /* For task queue support */ + +#ifndef __HAVE_SHARED_IRQ +#define __HAVE_SHARED_IRQ 0 +#endif + +#if __HAVE_SHARED_IRQ +#define DRM_IRQ_TYPE SA_SHIRQ +#else +#define DRM_IRQ_TYPE 0 +#endif + +/** + * Get interrupt from bus id. + * + * \param inode device inode. + * \param filp file pointer. + * \param cmd command. + * \param arg user argument, pointing to a drm_irq_busid structure. + * \return zero on success or a negative number on failure. + * + * Finds the PCI device with the specified bus id and gets its IRQ number. + * This IOCTL is deprecated, and will now return EINVAL for any busid not equal + * to that of the device that this DRM instance attached to. + */ +int DRM(irq_by_busid)(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->dev; + drm_irq_busid_t p; + + if (copy_from_user(&p, (drm_irq_busid_t *)arg, sizeof(p))) + return -EFAULT; + + if ((p.busnum >> 8) != dev->pci_domain || + (p.busnum & 0xff) != dev->pci_bus || + p.devnum != dev->pci_slot || + p.funcnum != dev->pci_func) + return -EINVAL; + + p.irq = dev->irq; + + DRM_DEBUG("%d:%d:%d => IRQ %d\n", + p.busnum, p.devnum, p.funcnum, p.irq); + if (copy_to_user((drm_irq_busid_t *)arg, &p, sizeof(p))) + return -EFAULT; + return 0; +} + +#if __HAVE_IRQ + +/** + * Install IRQ handler. + * + * \param dev DRM device. + * \param irq IRQ number. + * + * Initializes the IRQ related data, and setups drm_device::vbl_queue. Installs the handler, calling the driver + * \c DRM(driver_irq_preinstall)() and \c DRM(driver_irq_postinstall)() functions + * before and after the installation. + */ +int DRM(irq_install)( drm_device_t *dev ) +{ + int ret; + + if ( dev->irq == 0 ) + return -EINVAL; + + down( &dev->struct_sem ); + + /* Driver must have been initialized */ + if ( !dev->dev_private ) { + up( &dev->struct_sem ); + return -EINVAL; + } + + if ( dev->irq_enabled ) { + up( &dev->struct_sem ); + return -EBUSY; + } + dev->irq_enabled = 1; + up( &dev->struct_sem ); + + DRM_DEBUG( "%s: irq=%d\n", __FUNCTION__, dev->irq ); + +#if __HAVE_DMA + dev->dma->next_buffer = NULL; + dev->dma->next_queue = NULL; + dev->dma->this_buffer = NULL; +#endif + +#if __HAVE_IRQ_BH + INIT_WORK(&dev->work, DRM(irq_immediate_bh), dev); +#endif + +#if __HAVE_VBL_IRQ + init_waitqueue_head(&dev->vbl_queue); + + spin_lock_init( &dev->vbl_lock ); + + INIT_LIST_HEAD( &dev->vbl_sigs.head ); + + dev->vbl_pending = 0; +#endif + + /* Before installing handler */ + DRM(driver_irq_preinstall)(dev); + + /* Install handler */ + ret = request_irq( dev->irq, DRM(irq_handler), + DRM_IRQ_TYPE, dev->devname, dev ); + if ( ret < 0 ) { + down( &dev->struct_sem ); + dev->irq_enabled = 0; + up( &dev->struct_sem ); + return ret; + } + + /* After installing handler */ + DRM(driver_irq_postinstall)(dev); + + return 0; +} + +/** + * Uninstall the IRQ handler. + * + * \param dev DRM device. + * + * Calls the driver's \c DRM(driver_irq_uninstall)() function, and stops the irq. + */ +int DRM(irq_uninstall)( drm_device_t *dev ) +{ + int irq_enabled; + + down( &dev->struct_sem ); + irq_enabled = dev->irq_enabled; + dev->irq_enabled = 0; + up( &dev->struct_sem ); + + if ( !irq_enabled ) + return -EINVAL; + + DRM_DEBUG( "%s: irq=%d\n", __FUNCTION__, dev->irq ); + + DRM(driver_irq_uninstall)( dev ); + + free_irq( dev->irq, dev ); + + return 0; +} + +/** + * IRQ control ioctl. + * + * \param inode device inode. + * \param filp file pointer. + * \param cmd command. + * \param arg user argument, pointing to a drm_control structure. + * \return zero on success or a negative number on failure. + * + * Calls irq_install() or irq_uninstall() according to \p arg. + */ +int DRM(control)( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->dev; + drm_control_t ctl; + + if ( copy_from_user( &ctl, (drm_control_t *)arg, sizeof(ctl) ) ) + return -EFAULT; + + switch ( ctl.func ) { + case DRM_INST_HANDLER: + if (dev->if_version < DRM_IF_VERSION(1, 2) && + ctl.irq != dev->irq) + return -EINVAL; + return DRM(irq_install)( dev ); + case DRM_UNINST_HANDLER: + return DRM(irq_uninstall)( dev ); + default: + return -EINVAL; + } +} + +#if __HAVE_VBL_IRQ + +/** + * Wait for VBLANK. + * + * \param inode device inode. + * \param filp file pointer. + * \param cmd command. + * \param data user argument, pointing to a drm_wait_vblank structure. + * \return zero on success or a negative number on failure. + * + * Verifies the IRQ is installed. + * + * If a signal is requested checks if this task has already scheduled the same signal + * for the same vblank sequence number - nothing to be done in + * that case. If the number of tasks waiting for the interrupt exceeds 100 the + * function fails. Otherwise adds a new entry to drm_device::vbl_sigs for this + * task. + * + * If a signal is not requested, then calls vblank_wait(). + */ +int DRM(wait_vblank)( DRM_IOCTL_ARGS ) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->dev; + drm_wait_vblank_t vblwait; + struct timeval now; + int ret = 0; + unsigned int flags; + + if (!dev->irq) + return -EINVAL; + + DRM_COPY_FROM_USER_IOCTL( vblwait, (drm_wait_vblank_t *)data, + sizeof(vblwait) ); + + switch ( vblwait.request.type & ~_DRM_VBLANK_FLAGS_MASK ) { + case _DRM_VBLANK_RELATIVE: + vblwait.request.sequence += atomic_read( &dev->vbl_received ); + vblwait.request.type &= ~_DRM_VBLANK_RELATIVE; + case _DRM_VBLANK_ABSOLUTE: + break; + default: + return -EINVAL; + } + + flags = vblwait.request.type & _DRM_VBLANK_FLAGS_MASK; + + if ( flags & _DRM_VBLANK_SIGNAL ) { + unsigned long irqflags; + drm_vbl_sig_t *vbl_sig; + + vblwait.reply.sequence = atomic_read( &dev->vbl_received ); + + spin_lock_irqsave( &dev->vbl_lock, irqflags ); + + /* Check if this task has already scheduled the same signal + * for the same vblank sequence number; nothing to be done in + * that case + */ + list_for_each_entry( vbl_sig, &dev->vbl_sigs.head, head ) { + if (vbl_sig->sequence == vblwait.request.sequence + && vbl_sig->info.si_signo == vblwait.request.signal + && vbl_sig->task == current) + { + spin_unlock_irqrestore( &dev->vbl_lock, irqflags ); + goto done; + } + } + + if ( dev->vbl_pending >= 100 ) { + spin_unlock_irqrestore( &dev->vbl_lock, irqflags ); + return -EBUSY; + } + + dev->vbl_pending++; + + spin_unlock_irqrestore( &dev->vbl_lock, irqflags ); + + if ( !( vbl_sig = DRM_MALLOC( sizeof( drm_vbl_sig_t ) ) ) ) { + return -ENOMEM; + } + + memset( (void *)vbl_sig, 0, sizeof(*vbl_sig) ); + + vbl_sig->sequence = vblwait.request.sequence; + vbl_sig->info.si_signo = vblwait.request.signal; + vbl_sig->task = current; + + spin_lock_irqsave( &dev->vbl_lock, irqflags ); + + list_add_tail( (struct list_head *) vbl_sig, &dev->vbl_sigs.head ); + + spin_unlock_irqrestore( &dev->vbl_lock, irqflags ); + } else { + ret = DRM(vblank_wait)( dev, &vblwait.request.sequence ); + + do_gettimeofday( &now ); + vblwait.reply.tval_sec = now.tv_sec; + vblwait.reply.tval_usec = now.tv_usec; + } + +done: + DRM_COPY_TO_USER_IOCTL( (drm_wait_vblank_t *)data, vblwait, + sizeof(vblwait) ); + + return ret; +} + +/** + * Send the VBLANK signals. + * + * \param dev DRM device. + * + * Sends a signal for each task in drm_device::vbl_sigs and empties the list. + * + * If a signal is not requested, then calls vblank_wait(). + */ +void DRM(vbl_send_signals)( drm_device_t *dev ) +{ + struct list_head *list, *tmp; + drm_vbl_sig_t *vbl_sig; + unsigned int vbl_seq = atomic_read( &dev->vbl_received ); + unsigned long flags; + + spin_lock_irqsave( &dev->vbl_lock, flags ); + + list_for_each_safe( list, tmp, &dev->vbl_sigs.head ) { + vbl_sig = list_entry( list, drm_vbl_sig_t, head ); + if ( ( vbl_seq - vbl_sig->sequence ) <= (1<<23) ) { + vbl_sig->info.si_code = vbl_seq; + send_sig_info( vbl_sig->info.si_signo, &vbl_sig->info, vbl_sig->task ); + + list_del( list ); + + DRM_FREE( vbl_sig, sizeof(*vbl_sig) ); + + dev->vbl_pending--; + } + } + + spin_unlock_irqrestore( &dev->vbl_lock, flags ); +} + +#endif /* __HAVE_VBL_IRQ */ + +#endif /* __HAVE_IRQ */ --- linux-2.6.6-rc1/drivers/char/drm/drm_memory_debug.h 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/char/drm/drm_memory_debug.h 2004-04-18 22:25:29.192392560 -0700 @@ -67,6 +67,7 @@ static drm_mem_stats_t DRM(mem_stats)[ [DRM_MEM_TOTALAGP] = { "totalagp" }, [DRM_MEM_BOUNDAGP] = { "boundagp" }, [DRM_MEM_CTXBITMAP] = { "ctxbitmap"}, + [DRM_MEM_CTXLIST] = { "ctxlist" }, [DRM_MEM_STUB] = { "stub" }, { NULL, 0, } /* Last entry must be null */ }; --- linux-2.6.6-rc1/drivers/char/drm/drm_os_linux.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/char/drm/drm_os_linux.h 2004-04-18 22:25:29.193392408 -0700 @@ -62,8 +62,12 @@ verify_area( VERIFY_READ, uaddr, size ) #define DRM_COPY_FROM_USER_UNCHECKED(arg1, arg2, arg3) \ __copy_from_user(arg1, arg2, arg3) +#define DRM_COPY_TO_USER_UNCHECKED(arg1, arg2, arg3) \ + __copy_to_user(arg1, arg2, arg3) #define DRM_GET_USER_UNCHECKED(val, uaddr) \ __get_user(val, uaddr) +#define DRM_PUT_USER_UNCHECKED(uaddr, val) \ + __put_user(val, uaddr) /** 'malloc' without the overhead of DRM(alloc)() */ @@ -71,6 +75,8 @@ /** 'free' without the overhead of DRM(free)() */ #define DRM_FREE(x,size) kfree(x) +#define DRM_GET_PRIV_WITH_RETURN(_priv, _filp) _priv = _filp->private_data + /** * Get the pointer to the SAREA. * --- linux-2.6.6-rc1/drivers/char/drm/drmP.h 2004-01-09 00:04:31.000000000 -0800 +++ 25/drivers/char/drm/drmP.h 2004-04-18 22:25:29.180394384 -0700 @@ -92,8 +92,8 @@ #ifndef __HAVE_DMA #define __HAVE_DMA 0 #endif -#ifndef __HAVE_DMA_IRQ -#define __HAVE_DMA_IRQ 0 +#ifndef __HAVE_IRQ +#define __HAVE_IRQ 0 #endif #ifndef __HAVE_DMA_WAITLIST #define __HAVE_DMA_WAITLIST 0 @@ -148,6 +148,7 @@ #define DRM_MEM_CTXBITMAP 18 #define DRM_MEM_STUB 19 #define DRM_MEM_SGLISTS 20 +#define DRM_MEM_CTXLIST 21 #define DRM_MAX_CTXBITMAP (PAGE_SIZE * 8) @@ -324,6 +325,7 @@ do { \ #define DRM_BUFCOUNT(x) ((x)->count - DRM_LEFTCOUNT(x)) #define DRM_WAITCOUNT(dev,idx) DRM_BUFCOUNT(&dev->queuelist[idx]->waitlist) +#define DRM_IF_VERSION(maj, min) (maj << 16 | min) /** * Get the private SAREA mapping. * @@ -362,10 +364,12 @@ do { \ typedef int drm_ioctl_t( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); -typedef struct drm_pci_list { - u16 vendor; - u16 device; -} drm_pci_list_t; +typedef struct drm_pci_id_list +{ + int vendor; + int device; + long driver_private; +} drm_pci_id_list_t; typedef struct drm_ioctl_desc { drm_ioctl_t *func; @@ -463,18 +467,6 @@ typedef struct drm_buf_entry { drm_freelist_t freelist; } drm_buf_entry_t; -/** - * Hardware lock. - * - * The lock structure is a simple cache-line aligned integer. To avoid - * processor bus contention on a multiprocessor system, there should not be any - * other data stored in the same cache line. - */ -typedef struct drm_hw_lock { - __volatile__ unsigned int lock; /**< lock variable */ - char padding[60]; /**< Pad to cache line */ -} drm_hw_lock_t; - /** File private data */ typedef struct drm_file { int authenticated; @@ -488,6 +480,9 @@ typedef struct drm_file { struct drm_device *dev; int remove_auth_on_close; unsigned long lock_count; +#ifdef DRIVER_FILE_FIELDS + DRIVER_FILE_FIELDS; +#endif } drm_file_t; /** Wait queue */ @@ -602,6 +597,15 @@ typedef struct drm_map_list { typedef drm_map_t drm_local_map_t; +/** + * Context handle list + */ +typedef struct drm_ctx_list { + struct list_head head; /**< list head */ + drm_context_t handle; /**< context handle */ + drm_file_t *tag; /**< associated fd private data */ +} drm_ctx_list_t; + #if __HAVE_VBL_IRQ typedef struct drm_vbl_sig { @@ -622,6 +626,8 @@ typedef struct drm_device { int unique_len; /**< Length of unique field */ dev_t device; /**< Device number for mknod */ char *devname; /**< For /proc/interrupts */ + int minor; /**< Minor device number */ + int if_version; /**< Highest interface version set */ int blocked; /**< Blocked due to VC switch? */ struct proc_dir_entry *root; /**< Root for this device's entries */ @@ -660,6 +666,12 @@ typedef struct drm_device { drm_map_list_t *maplist; /**< Linked list of regions */ int map_count; /**< Number of mappable regions */ + /** \name Context handle management */ + /*@{*/ + drm_ctx_list_t *ctxlist; /**< Linked list of context handles */ + int ctx_count; /**< Number of context handles */ + struct semaphore ctxlist_sem; /**< For ctxlist */ + drm_map_t **context_sareas; /**< per-context SAREA's */ int max_context; @@ -679,6 +691,7 @@ typedef struct drm_device { /** \name Context support */ /*@{*/ int irq; /**< Interrupt used by board */ + int irq_enabled; /**< True if irq handler is enabled */ __volatile__ long context_flag; /**< Context swapping flag */ __volatile__ long interrupt_flag; /**< Interruption handler flag */ __volatile__ long dma_flag; /**< DMA dispatch flag */ @@ -714,7 +727,12 @@ typedef struct drm_device { #if __REALLY_HAVE_AGP drm_agp_head_t *agp; /**< AGP data */ #endif - struct pci_dev *pdev; /**< PCI device structure */ + + struct pci_dev *pdev; /**< PCI device structure */ + int pci_domain; /**< PCI bus domain number */ + int pci_bus; /**< PCI bus number */ + int pci_slot; /**< PCI slot number */ + int pci_func; /**< PCI function number */ #ifdef __alpha__ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) struct pci_controler *hose; @@ -758,18 +776,6 @@ extern int DRM(flush)(struct file * extern int DRM(fasync)(int fd, struct file *filp, int on); /* Mapping support (drm_vm.h) */ -extern struct page *DRM(vm_nopage)(struct vm_area_struct *vma, - unsigned long address, - int *type); -extern struct page *DRM(vm_shm_nopage)(struct vm_area_struct *vma, - unsigned long address, - int *type); -extern struct page *DRM(vm_dma_nopage)(struct vm_area_struct *vma, - unsigned long address, - int *type); -extern struct page *DRM(vm_sg_nopage)(struct vm_area_struct *vma, - unsigned long address, - int *type); extern void DRM(vm_open)(struct vm_area_struct *vma); extern void DRM(vm_close)(struct vm_area_struct *vma); extern void DRM(vm_shm_close)(struct vm_area_struct *vma); @@ -804,8 +810,8 @@ extern int DRM(unbind_agp)(DRM #endif /* Misc. IOCTL support (drm_ioctl.h) */ -extern int DRM(irq_busid)(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); +extern int DRM(irq_by_busid)(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); extern int DRM(getunique)(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); extern int DRM(setunique)(struct inode *inode, struct file *filp, @@ -816,6 +822,8 @@ extern int DRM(getclient)(struct in unsigned int cmd, unsigned long arg); extern int DRM(getstats)(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg); +extern int DRM(setversion)(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); /* Context IOCTL support (drm_context.h) */ extern int DRM(resctx)( struct inode *inode, struct file *filp, @@ -900,12 +908,17 @@ extern int DRM(dma_setup)(drm_devic extern void DRM(dma_takedown)(drm_device_t *dev); extern void DRM(free_buffer)(drm_device_t *dev, drm_buf_t *buf); extern void DRM(reclaim_buffers)( struct file *filp ); -#if __HAVE_DMA_IRQ +#endif /* __HAVE_DMA */ + + /* IRQ support (drm_irq.h) */ +#if __HAVE_IRQ || __HAVE_DMA extern int DRM(control)( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); -extern int DRM(irq_install)( drm_device_t *dev, int irq ); +#endif +#if __HAVE_IRQ +extern int DRM(irq_install)( drm_device_t *dev ); extern int DRM(irq_uninstall)( drm_device_t *dev ); -extern irqreturn_t DRM(dma_service)( DRM_IRQ_ARGS ); +extern irqreturn_t DRM(irq_handler)( DRM_IRQ_ARGS ); extern void DRM(driver_irq_preinstall)( drm_device_t *dev ); extern void DRM(driver_irq_postinstall)( drm_device_t *dev ); extern void DRM(driver_irq_uninstall)( drm_device_t *dev ); @@ -915,12 +928,11 @@ extern int DRM(wait_vblank)(st extern int DRM(vblank_wait)(drm_device_t *dev, unsigned int *vbl_seq); extern void DRM(vbl_send_signals)( drm_device_t *dev ); #endif -#if __HAVE_DMA_IRQ_BH -extern void DRM(dma_immediate_bh)( void *dev ); +#if __HAVE_IRQ_BH +extern void DRM(irq_immediate_bh)( void *dev ); #endif #endif -#endif /* __HAVE_DMA */ #if __REALLY_HAVE_AGP /* AGP/GART support (drm_agpsupport.h) */ --- linux-2.6.6-rc1/drivers/char/drm/drm_sarea.h 2003-07-13 21:44:34.000000000 -0700 +++ 25/drivers/char/drm/drm_sarea.h 2004-04-18 22:25:29.194392256 -0700 @@ -32,9 +32,23 @@ #ifndef _DRM_SAREA_H_ #define _DRM_SAREA_H_ +#include "drm.h" + +/* SAREA area needs to be at least a page */ +#if defined(__alpha__) +#define SAREA_MAX 0x2000 +#elif defined(__ia64__) +#define SAREA_MAX 0x10000 /* 64kB */ +#else +/* Intel 830M driver needs at least 8k SAREA */ +#define SAREA_MAX 0x2000 +#endif + /** Maximum number of drawables in the SAREA */ #define SAREA_MAX_DRAWABLES 256 +#define SAREA_DRAWABLE_CLAIMED_ENTRY 0x80000000 + /** SAREA drawable */ typedef struct drm_sarea_drawable { unsigned int stamp; --- linux-2.6.6-rc1/drivers/char/drm/drm_stub.h 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/char/drm/drm_stub.h 2004-04-18 22:25:29.194392256 -0700 @@ -209,8 +209,8 @@ int DRM(stub_register)(const char *name, ret2 = DRM(stub_info).info_register(name, fops, dev); if (ret2) { if (!ret1) { - unregister_chrdev(DRM_MAJOR, "drm"); - class_simple_destroy(drm_class); + unregister_chrdev(DRM_MAJOR, "drm"); + class_simple_destroy(drm_class); } if (!i) inter_module_unregister("drm"); --- linux-2.6.6-rc1/drivers/char/drm/drm_vm.h 2004-01-09 00:04:31.000000000 -0800 +++ 25/drivers/char/drm/drm_vm.h 2004-04-18 22:25:29.197391800 -0700 @@ -35,48 +35,19 @@ #include "drmP.h" -/** AGP virtual memory operations */ -struct vm_operations_struct DRM(vm_ops) = { - .nopage = DRM(vm_nopage), - .open = DRM(vm_open), - .close = DRM(vm_close), -}; - -/** Shared virtual memory operations */ -struct vm_operations_struct DRM(vm_shm_ops) = { - .nopage = DRM(vm_shm_nopage), - .open = DRM(vm_open), - .close = DRM(vm_shm_close), -}; - -/** DMA virtual memory operations */ -struct vm_operations_struct DRM(vm_dma_ops) = { - .nopage = DRM(vm_dma_nopage), - .open = DRM(vm_open), - .close = DRM(vm_close), -}; - -/** Scatter-gather virtual memory operations */ -struct vm_operations_struct DRM(vm_sg_ops) = { - .nopage = DRM(vm_sg_nopage), - .open = DRM(vm_open), - .close = DRM(vm_close), -}; /** * \c nopage method for AGP virtual memory. * * \param vma virtual memory area. * \param address access address. - * \param write_access sharing. * \return pointer to the page structure. * * Find the right map and if it's AGP memory find the real physical page to * map, get the page, increment the use count and return it. */ -struct page *DRM(vm_nopage)(struct vm_area_struct *vma, - unsigned long address, - int *type) +static __inline__ struct page *DRM(do_vm_nopage)(struct vm_area_struct *vma, + unsigned long address) { #if __REALLY_HAVE_AGP drm_file_t *priv = vma->vm_file->private_data; @@ -133,8 +104,6 @@ struct page *DRM(vm_nopage)(struct vm_ar baddr, __va(agpmem->memory->memory[offset]), offset, atomic_read(&page->count)); - if (type) - *type = VM_FAULT_MINOR; return page; } vm_nopage_error: @@ -148,15 +117,13 @@ vm_nopage_error: * * \param vma virtual memory area. * \param address access address. - * \param write_access sharing. * \return pointer to the page structure. * * Get the the mapping, find the real physical page to map, get the page, and * return it. */ -struct page *DRM(vm_shm_nopage)(struct vm_area_struct *vma, - unsigned long address, - int *type) +static __inline__ struct page *DRM(do_vm_shm_nopage)(struct vm_area_struct *vma, + unsigned long address) { drm_map_t *map = (drm_map_t *)vma->vm_private_data; unsigned long offset; @@ -172,8 +139,6 @@ struct page *DRM(vm_shm_nopage)(struct v if (!page) return NOPAGE_OOM; get_page(page); - if (type) - *type = VM_FAULT_MINOR; DRM_DEBUG("shm_nopage 0x%lx\n", address); return page; @@ -265,14 +230,12 @@ void DRM(vm_shm_close)(struct vm_area_st * * \param vma virtual memory area. * \param address access address. - * \param write_access sharing. * \return pointer to the page structure. * * Determine the page number from the page offset and get it from drm_device_dma::pagelist. */ -struct page *DRM(vm_dma_nopage)(struct vm_area_struct *vma, - unsigned long address, - int *type) +static __inline__ struct page *DRM(do_vm_dma_nopage)(struct vm_area_struct *vma, + unsigned long address) { drm_file_t *priv = vma->vm_file->private_data; drm_device_t *dev = priv->dev; @@ -291,8 +254,6 @@ struct page *DRM(vm_dma_nopage)(struct v (offset & (~PAGE_MASK)))); get_page(page); - if (type) - *type = VM_FAULT_MINOR; DRM_DEBUG("dma_nopage 0x%lx (page %lu)\n", address, page_nr); return page; @@ -303,14 +264,12 @@ struct page *DRM(vm_dma_nopage)(struct v * * \param vma virtual memory area. * \param address access address. - * \param write_access sharing. * \return pointer to the page structure. * * Determine the map offset from the page offset and get it from drm_sg_mem::pagelist. */ -struct page *DRM(vm_sg_nopage)(struct vm_area_struct *vma, - unsigned long address, - int *type) +static __inline__ struct page *DRM(do_vm_sg_nopage)(struct vm_area_struct *vma, + unsigned long address) { drm_map_t *map = (drm_map_t *)vma->vm_private_data; drm_file_t *priv = vma->vm_file->private_data; @@ -331,12 +290,99 @@ struct page *DRM(vm_sg_nopage)(struct vm page_offset = (offset >> PAGE_SHIFT) + (map_offset >> PAGE_SHIFT); page = entry->pagelist[page_offset]; get_page(page); - if (type) - *type = VM_FAULT_MINOR; return page; } + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) + +static struct page *DRM(vm_nopage)(struct vm_area_struct *vma, + unsigned long address, + int *type) { + if (type) *type = VM_FAULT_MINOR; + return DRM(do_vm_nopage)(vma, address); +} + +static struct page *DRM(vm_shm_nopage)(struct vm_area_struct *vma, + unsigned long address, + int *type) { + if (type) *type = VM_FAULT_MINOR; + return DRM(do_vm_shm_nopage)(vma, address); +} + +static struct page *DRM(vm_dma_nopage)(struct vm_area_struct *vma, + unsigned long address, + int *type) { + if (type) *type = VM_FAULT_MINOR; + return DRM(do_vm_dma_nopage)(vma, address); +} + +static struct page *DRM(vm_sg_nopage)(struct vm_area_struct *vma, + unsigned long address, + int *type) { + if (type) *type = VM_FAULT_MINOR; + return DRM(do_vm_sg_nopage)(vma, address); +} + +#else /* LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,0) */ + +static struct page *DRM(vm_nopage)(struct vm_area_struct *vma, + unsigned long address, + int unused) { + return DRM(do_vm_nopage)(vma, address); +} + +static struct page *DRM(vm_shm_nopage)(struct vm_area_struct *vma, + unsigned long address, + int unused) { + return DRM(do_vm_shm_nopage)(vma, address); +} + +static struct page *DRM(vm_dma_nopage)(struct vm_area_struct *vma, + unsigned long address, + int unused) { + return DRM(do_vm_dma_nopage)(vma, address); +} + +static struct page *DRM(vm_sg_nopage)(struct vm_area_struct *vma, + unsigned long address, + int unused) { + return DRM(do_vm_sg_nopage)(vma, address); +} + +#endif + + +/** AGP virtual memory operations */ +static struct vm_operations_struct DRM(vm_ops) = { + .nopage = DRM(vm_nopage), + .open = DRM(vm_open), + .close = DRM(vm_close), +}; + +/** Shared virtual memory operations */ +static struct vm_operations_struct DRM(vm_shm_ops) = { + .nopage = DRM(vm_shm_nopage), + .open = DRM(vm_open), + .close = DRM(vm_shm_close), +}; + +/** DMA virtual memory operations */ +static struct vm_operations_struct DRM(vm_dma_ops) = { + .nopage = DRM(vm_dma_nopage), + .open = DRM(vm_open), + .close = DRM(vm_close), +}; + +/** Scatter-gather virtual memory operations */ +static struct vm_operations_struct DRM(vm_sg_ops) = { + .nopage = DRM(vm_sg_nopage), + .open = DRM(vm_open), + .close = DRM(vm_close), +}; + + /** * \c open method for shared virtual memory. * --- linux-2.6.6-rc1/drivers/char/drm/gamma_dma.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/char/drm/gamma_dma.c 2004-04-18 22:25:29.198391648 -0700 @@ -116,7 +116,7 @@ static inline int gamma_dma_is_ready(drm return (!GAMMA_READ(GAMMA_DMACOUNT)); } -irqreturn_t gamma_dma_service( DRM_IRQ_ARGS ) +irqreturn_t gamma_irq_handler( DRM_IRQ_ARGS ) { drm_device_t *dev = (drm_device_t *)arg; drm_device_dma_t *dma = dev->dma; @@ -262,7 +262,7 @@ static void gamma_dma_timer_bh(unsigned gamma_dma_schedule((drm_device_t *)dev, 0); } -void gamma_dma_immediate_bh(void *dev) +void gamma_irq_immediate_bh(void *dev) { gamma_dma_schedule(dev, 0); } @@ -656,12 +656,12 @@ int gamma_do_cleanup_dma( drm_device_t * { DRM_DEBUG( "%s\n", __FUNCTION__ ); -#if _HAVE_DMA_IRQ +#if __HAVE_IRQ /* Make sure interrupts are disabled here because the uninstall ioctl * may not have been called from userspace and after dev_private * is freed, it's too late. */ - if ( dev->irq ) DRM(irq_uninstall)(dev); + if ( dev->irq_enabled ) DRM(irq_uninstall)(dev); #endif if ( dev->dev_private ) { --- linux-2.6.6-rc1/drivers/char/drm/gamma_drv.c 2003-06-14 12:18:33.000000000 -0700 +++ 25/drivers/char/drm/gamma_drv.c 2004-04-18 22:25:29.199391496 -0700 @@ -48,6 +48,7 @@ #include "drm_fops.h" #include "drm_init.h" #include "drm_ioctl.h" +#include "drm_irq.h" #include "gamma_lists.h" /* NOTE */ #include "drm_lock.h" #include "gamma_lock.h" /* NOTE */ --- linux-2.6.6-rc1/drivers/char/drm/gamma.h 2003-06-14 12:17:55.000000000 -0700 +++ 25/drivers/char/drm/gamma.h 2004-04-18 22:25:29.197391800 -0700 @@ -53,6 +53,10 @@ [DRM_IOCTL_NR(DRM_IOCTL_GAMMA_INIT)] = { gamma_dma_init, 1, 1 }, \ [DRM_IOCTL_NR(DRM_IOCTL_GAMMA_COPY)] = { gamma_dma_copy, 1, 1 } +#define DRIVER_PCI_IDS \ + {0x3d3d, 0x0008, 0}, \ + {0, 0, 0} + #define IOCTL_TABLE_NAME DRM(ioctls) #define IOCTL_FUNC_NAME DRM(ioctl) @@ -104,8 +108,8 @@ return 0; \ } while (0) -#define __HAVE_DMA_IRQ 1 -#define __HAVE_DMA_IRQ_BH 1 +#define __HAVE_IRQ 1 +#define __HAVE_IRQ_BH 1 #define DRIVER_AGP_BUFFERS_MAP( dev ) \ ((drm_gamma_private_t *)((dev)->dev_private))->buffers --- linux-2.6.6-rc1/drivers/char/drm/i810_dma.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/char/drm/i810_dma.c 2004-04-18 22:25:29.200391344 -0700 @@ -232,12 +232,12 @@ int i810_dma_cleanup(drm_device_t *dev) { drm_device_dma_t *dma = dev->dma; -#if _HAVE_DMA_IRQ +#if __HAVE_IRQ /* Make sure interrupts are disabled here because the uninstall ioctl * may not have been called from userspace and after dev_private * is freed, it's too late. */ - if (dev->irq) DRM(irq_uninstall)(dev); + if ( dev->irq_enabled ) DRM(irq_uninstall)(dev); #endif if (dev->dev_private) { --- linux-2.6.6-rc1/drivers/char/drm/i810.h 2003-08-22 19:23:40.000000000 -0700 +++ 25/drivers/char/drm/i810.h 2004-04-18 22:25:29.199391496 -0700 @@ -77,7 +77,14 @@ [DRM_IOCTL_NR(DRM_IOCTL_I810_MC)] = { i810_dma_mc, 1, 1 }, \ [DRM_IOCTL_NR(DRM_IOCTL_I810_RSTATUS)] = { i810_rstatus, 1, 0 }, \ [DRM_IOCTL_NR(DRM_IOCTL_I810_FLIP)] = { i810_flip_bufs, 1, 0 } - + +#define DRIVER_PCI_IDS \ + {0x8086, 0x7121, 0}, \ + {0x8086, 0x7123, 0}, \ + {0x8086, 0x7125, 0}, \ + {0x8086, 0x1132, 0}, \ + {0, 0, 0} + #define __HAVE_COUNTERS 4 #define __HAVE_COUNTER6 _DRM_STAT_IRQ @@ -112,7 +119,7 @@ * a noop stub is generated for compatibility. */ /* XXX: Add vblank support? */ -#define __HAVE_DMA_IRQ 0 +#define __HAVE_IRQ 0 /* Buffer customization: */ --- linux-2.6.6-rc1/drivers/char/drm/i830_dma.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/char/drm/i830_dma.c 2004-04-18 22:25:29.202391040 -0700 @@ -232,12 +232,12 @@ int i830_dma_cleanup(drm_device_t *dev) { drm_device_dma_t *dma = dev->dma; -#if _HAVE_DMA_IRQ +#if __HAVE_IRQ /* Make sure interrupts are disabled here because the uninstall ioctl * may not have been called from userspace and after dev_private * is freed, it's too late. */ - if (dev->irq) DRM(irq_uninstall)(dev); + if ( dev->irq_enabled ) DRM(irq_uninstall)(dev); #endif if (dev->dev_private) { @@ -1540,7 +1540,7 @@ int i830_getparam( struct inode *inode, switch( param.param ) { case I830_PARAM_IRQ_ACTIVE: - value = dev->irq ? 1 : 0; + value = dev->irq_enabled; break; default: return -EINVAL; --- linux-2.6.6-rc1/drivers/char/drm/i830_drv.c 2003-06-14 12:18:06.000000000 -0700 +++ 25/drivers/char/drm/i830_drv.c 2004-04-18 22:25:29.203390888 -0700 @@ -50,6 +50,7 @@ #include "drm_fops.h" #include "drm_init.h" #include "drm_ioctl.h" +#include "drm_irq.h" #include "drm_lock.h" #include "drm_memory.h" #include "drm_proc.h" --- linux-2.6.6-rc1/drivers/char/drm/i830.h 2003-06-14 12:18:06.000000000 -0700 +++ 25/drivers/char/drm/i830.h 2004-04-18 22:25:29.201391192 -0700 @@ -77,6 +77,13 @@ [DRM_IOCTL_NR(DRM_IOCTL_I830_GETPARAM)] = { i830_getparam, 1, 0 }, \ [DRM_IOCTL_NR(DRM_IOCTL_I830_SETPARAM)] = { i830_setparam, 1, 0 } +#define DRIVER_PCI_IDS \ + {0x8086, 0x3577, 0}, \ + {0x8086, 0x2562, 0}, \ + {0x8086, 0x3582, 0}, \ + {0x8086, 0x2572, 0}, \ + {0, 0, 0} + #define __HAVE_COUNTERS 4 #define __HAVE_COUNTER6 _DRM_STAT_IRQ #define __HAVE_COUNTER7 _DRM_STAT_PRIMARY @@ -115,10 +122,10 @@ #define USE_IRQS 0 #if USE_IRQS -#define __HAVE_DMA_IRQ 1 +#define __HAVE_IRQ 1 #define __HAVE_SHARED_IRQ 1 #else -#define __HAVE_DMA_IRQ 0 +#define __HAVE_IRQ 0 #endif --- linux-2.6.6-rc1/drivers/char/drm/i830_irq.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/char/drm/i830_irq.c 2004-04-18 22:25:29.203390888 -0700 @@ -35,7 +35,7 @@ #include -irqreturn_t DRM(dma_service)( DRM_IRQ_ARGS ) +irqreturn_t DRM(irq_handler)( DRM_IRQ_ARGS ) { drm_device_t *dev = (drm_device_t *)arg; drm_i830_private_t *dev_priv = (drm_i830_private_t *)dev->dev_private; --- linux-2.6.6-rc1/drivers/char/drm/mga_dma.c 2003-06-14 12:18:32.000000000 -0700 +++ 25/drivers/char/drm/mga_dma.c 2004-04-18 22:25:29.205390584 -0700 @@ -500,14 +500,6 @@ static int mga_do_init_dma( drm_device_t return DRM_ERR(EINVAL); } - DRM_FIND_MAP( dev_priv->fb, init->fb_offset ); - if(!dev_priv->fb) { - DRM_ERROR( "failed to find framebuffer!\n" ); - /* Assign dev_private so we can do cleanup. */ - dev->dev_private = (void *)dev_priv; - mga_do_cleanup_dma( dev ); - return DRM_ERR(EINVAL); - } DRM_FIND_MAP( dev_priv->mmio, init->mmio_offset ); if(!dev_priv->mmio) { DRM_ERROR( "failed to find mmio region!\n" ); @@ -639,12 +631,12 @@ int mga_do_cleanup_dma( drm_device_t *de { DRM_DEBUG( "\n" ); -#if _HAVE_DMA_IRQ +#if __HAVE_IRQ /* Make sure interrupts are disabled here because the uninstall ioctl * may not have been called from userspace and after dev_private * is freed, it's too late. */ - if ( dev->irq ) DRM(irq_uninstall)(dev); + if ( dev->irq_enabled ) DRM(irq_uninstall)(dev); #endif if ( dev->dev_private ) { --- linux-2.6.6-rc1/drivers/char/drm/mga_drm.h 2003-06-14 12:18:28.000000000 -0700 +++ 25/drivers/char/drm/mga_drm.h 2004-04-18 22:25:29.206390432 -0700 @@ -117,6 +117,8 @@ #define MGA_NR_TEX_REGIONS 16 #define MGA_LOG_MIN_TEX_REGION_SIZE 16 +#define DRM_MGA_IDLE_RETRY 2048 + #endif /* __MGA_SAREA_DEFINES__ */ @@ -230,16 +232,27 @@ typedef struct _drm_mga_sarea { /* MGA specific ioctls * The device specific ioctl range is 0x40 to 0x79. */ -#define DRM_IOCTL_MGA_INIT DRM_IOW( 0x40, drm_mga_init_t) -#define DRM_IOCTL_MGA_FLUSH DRM_IOW( 0x41, drm_lock_t) -#define DRM_IOCTL_MGA_RESET DRM_IO( 0x42) -#define DRM_IOCTL_MGA_SWAP DRM_IO( 0x43) -#define DRM_IOCTL_MGA_CLEAR DRM_IOW( 0x44, drm_mga_clear_t) -#define DRM_IOCTL_MGA_VERTEX DRM_IOW( 0x45, drm_mga_vertex_t) -#define DRM_IOCTL_MGA_INDICES DRM_IOW( 0x46, drm_mga_indices_t) -#define DRM_IOCTL_MGA_ILOAD DRM_IOW( 0x47, drm_mga_iload_t) -#define DRM_IOCTL_MGA_BLIT DRM_IOW( 0x48, drm_mga_blit_t) -#define DRM_IOCTL_MGA_GETPARAM DRM_IOWR(0x49, drm_mga_getparam_t) +#define DRM_MGA_INIT 0x00 +#define DRM_MGA_FLUSH 0x01 +#define DRM_MGA_RESET 0x02 +#define DRM_MGA_SWAP 0x03 +#define DRM_MGA_CLEAR 0x04 +#define DRM_MGA_VERTEX 0x05 +#define DRM_MGA_INDICES 0x06 +#define DRM_MGA_ILOAD 0x07 +#define DRM_MGA_BLIT 0x08 +#define DRM_MGA_GETPARAM 0x09 + +#define DRM_IOCTL_MGA_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INIT, drm_mga_init_t) +#define DRM_IOCTL_MGA_FLUSH DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_FLUSH, drm_lock_t) +#define DRM_IOCTL_MGA_RESET DRM_IO( DRM_COMMAND_BASE + DRM_MGA_RESET) +#define DRM_IOCTL_MGA_SWAP DRM_IO( DRM_COMMAND_BASE + DRM_MGA_SWAP) +#define DRM_IOCTL_MGA_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_CLEAR, drm_mga_clear_t) +#define DRM_IOCTL_MGA_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_VERTEX, drm_mga_vertex_t) +#define DRM_IOCTL_MGA_INDICES DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_INDICES, drm_mga_indices_t) +#define DRM_IOCTL_MGA_ILOAD DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_ILOAD, drm_mga_iload_t) +#define DRM_IOCTL_MGA_BLIT DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_BLIT, drm_mga_blit_t) +#define DRM_IOCTL_MGA_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_GETPARAM, drm_mga_getparam_t) typedef struct _drm_mga_warp_index { int installed; @@ -330,7 +343,7 @@ typedef struct _drm_mga_blit { typedef struct drm_mga_getparam { int param; - int *value; + void *value; } drm_mga_getparam_t; #endif --- linux-2.6.6-rc1/drivers/char/drm/mga_drv.c 2003-06-14 12:18:08.000000000 -0700 +++ 25/drivers/char/drm/mga_drv.c 2004-04-18 22:25:29.206390432 -0700 @@ -45,6 +45,7 @@ #include "drm_fops.h" #include "drm_init.h" #include "drm_ioctl.h" +#include "drm_irq.h" #include "drm_lock.h" #include "drm_memory.h" #include "drm_proc.h" --- linux-2.6.6-rc1/drivers/char/drm/mga_drv.h 2003-06-14 12:18:33.000000000 -0700 +++ 25/drivers/char/drm/mga_drv.h 2004-04-18 22:25:29.207390280 -0700 @@ -91,7 +91,6 @@ typedef struct drm_mga_private { unsigned int texture_size; drm_local_map_t *sarea; - drm_local_map_t *fb; drm_local_map_t *mmio; drm_local_map_t *status; drm_local_map_t *warp; --- linux-2.6.6-rc1/drivers/char/drm/mga.h 2003-06-14 12:17:55.000000000 -0700 +++ 25/drivers/char/drm/mga.h 2004-04-18 22:25:29.204390736 -0700 @@ -64,6 +64,12 @@ [DRM_IOCTL_NR(DRM_IOCTL_MGA_BLIT)] = { mga_dma_blit, 1, 0 }, \ [DRM_IOCTL_NR(DRM_IOCTL_MGA_GETPARAM)]= { mga_getparam, 1, 0 }, +#define DRIVER_PCI_IDS \ + {0x102b, 0x0521, 0}, \ + {0x102b, 0x0525, 0}, \ + {0x102b, 0x2527, 0}, \ + {0, 0, 0} + #define __HAVE_COUNTERS 3 #define __HAVE_COUNTER6 _DRM_STAT_IRQ #define __HAVE_COUNTER7 _DRM_STAT_PRIMARY @@ -78,7 +84,7 @@ /* DMA customization: */ #define __HAVE_DMA 1 -#define __HAVE_DMA_IRQ 1 +#define __HAVE_IRQ 1 #define __HAVE_VBL_IRQ 1 #define __HAVE_SHARED_IRQ 1 --- linux-2.6.6-rc1/drivers/char/drm/mga_irq.c 2003-06-14 12:17:55.000000000 -0700 +++ 25/drivers/char/drm/mga_irq.c 2004-04-18 22:25:29.207390280 -0700 @@ -36,7 +36,7 @@ #include "mga_drm.h" #include "mga_drv.h" -irqreturn_t mga_dma_service( DRM_IRQ_ARGS ) +irqreturn_t mga_irq_handler( DRM_IRQ_ARGS ) { drm_device_t *dev = (drm_device_t *) arg; drm_mga_private_t *dev_priv = --- linux-2.6.6-rc1/drivers/char/drm/r128_cce.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/char/drm/r128_cce.c 2004-04-18 22:25:29.210389824 -0700 @@ -212,7 +212,7 @@ int r128_do_cce_idle( drm_r128_private_t int i; for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { - if ( GET_RING_HEAD( &dev_priv->ring ) == dev_priv->ring.tail ) { + if ( GET_RING_HEAD( dev_priv ) == dev_priv->ring.tail ) { int pm4stat = R128_READ( R128_PM4_STAT ); if ( ( (pm4stat & R128_PM4_FIFOCNT_MASK) >= dev_priv->cce_fifo_size ) && @@ -238,7 +238,8 @@ static void r128_do_cce_start( drm_r128_ r128_do_wait_for_idle( dev_priv ); R128_WRITE( R128_PM4_BUFFER_CNTL, - dev_priv->cce_mode | dev_priv->ring.size_l2qw ); + dev_priv->cce_mode | dev_priv->ring.size_l2qw + | R128_PM4_BUFFER_CNTL_NOUPDATE ); R128_READ( R128_PM4_BUFFER_ADDR ); /* as per the sample code */ R128_WRITE( R128_PM4_MICRO_CNTL, R128_PM4_MICRO_FREERUN ); @@ -253,7 +254,6 @@ static void r128_do_cce_reset( drm_r128_ { R128_WRITE( R128_PM4_BUFFER_DL_WPTR, 0 ); R128_WRITE( R128_PM4_BUFFER_DL_RPTR, 0 ); - SET_RING_HEAD( &dev_priv->ring, 0 ); dev_priv->ring.tail = 0; } @@ -264,7 +264,8 @@ static void r128_do_cce_reset( drm_r128_ static void r128_do_cce_stop( drm_r128_private_t *dev_priv ) { R128_WRITE( R128_PM4_MICRO_CNTL, 0 ); - R128_WRITE( R128_PM4_BUFFER_CNTL, R128_PM4_NONPM4 ); + R128_WRITE( R128_PM4_BUFFER_CNTL, + R128_PM4_NONPM4 | R128_PM4_BUFFER_CNTL_NOUPDATE ); dev_priv->cce_running = 0; } @@ -333,26 +334,6 @@ static void r128_cce_init_ring_buffer( d R128_WRITE( R128_PM4_BUFFER_DL_WPTR, 0 ); R128_WRITE( R128_PM4_BUFFER_DL_RPTR, 0 ); - /* DL_RPTR_ADDR is a physical address in AGP space. */ - SET_RING_HEAD( &dev_priv->ring, 0 ); - - if ( !dev_priv->is_pci ) { - R128_WRITE( R128_PM4_BUFFER_DL_RPTR_ADDR, - dev_priv->ring_rptr->offset ); - } else { - drm_sg_mem_t *entry = dev->sg; - unsigned long tmp_ofs, page_ofs; - - tmp_ofs = dev_priv->ring_rptr->offset - dev->sg->handle; - page_ofs = tmp_ofs >> PAGE_SHIFT; - - R128_WRITE( R128_PM4_BUFFER_DL_RPTR_ADDR, - entry->busaddr[page_ofs]); - DRM_DEBUG( "ring rptr: offset=0x%08lx handle=0x%08lx\n", - (unsigned long) entry->busaddr[page_ofs], - entry->handle + tmp_ofs ); - } - /* Set watermark control */ R128_WRITE( R128_PM4_BUFFER_WM_CNTL, ((R128_WATERMARK_L/4) << R128_WMA_SHIFT) @@ -486,13 +467,6 @@ static int r128_do_init_cce( drm_device_ return DRM_ERR(EINVAL); } - DRM_FIND_MAP( dev_priv->fb, init->fb_offset ); - if(!dev_priv->fb) { - DRM_ERROR("could not find framebuffer!\n"); - dev->dev_private = (void *)dev_priv; - r128_do_cleanup_cce( dev ); - return DRM_ERR(EINVAL); - } DRM_FIND_MAP( dev_priv->mmio, init->mmio_offset ); if(!dev_priv->mmio) { DRM_ERROR("could not find mmio region!\n"); @@ -567,9 +541,6 @@ static int r128_do_init_cce( drm_device_ #endif dev_priv->cce_buffers_offset = dev->sg->handle; - dev_priv->ring.head = ((__volatile__ u32 *) - dev_priv->ring_rptr->handle); - dev_priv->ring.start = (u32 *)dev_priv->cce_ring->handle; dev_priv->ring.end = ((u32 *)dev_priv->cce_ring->handle + init->ring_size / sizeof(u32)); @@ -580,7 +551,6 @@ static int r128_do_init_cce( drm_device_ (dev_priv->ring.size / sizeof(u32)) - 1; dev_priv->ring.high_mark = 128; - dev_priv->ring.ring_rptr = dev_priv->ring_rptr; dev_priv->sarea_priv->last_frame = 0; R128_WRITE( R128_LAST_FRAME_REG, dev_priv->sarea_priv->last_frame ); @@ -589,8 +559,9 @@ static int r128_do_init_cce( drm_device_ R128_WRITE( R128_LAST_DISPATCH_REG, dev_priv->sarea_priv->last_dispatch ); -#if __REALLY_HAVE_SG +#if __REALLY_HAVE_AGP if ( dev_priv->is_pci ) { +#endif if (!DRM(ati_pcigart_init)( dev, &dev_priv->phys_pci_gart, &dev_priv->bus_pci_gart) ) { DRM_ERROR( "failed to init PCI GART!\n" ); @@ -599,6 +570,7 @@ static int r128_do_init_cce( drm_device_ return DRM_ERR(ENOMEM); } R128_WRITE( R128_PCI_GART_PAGE, dev_priv->bus_pci_gart ); +#if __REALLY_HAVE_AGP } #endif @@ -615,12 +587,12 @@ static int r128_do_init_cce( drm_device_ int r128_do_cleanup_cce( drm_device_t *dev ) { -#if _HAVE_DMA_IRQ +#if __HAVE_IRQ /* Make sure interrupts are disabled here because the uninstall ioctl * may not have been called from userspace and after dev_private * is freed, it's too late. */ - if ( dev->irq ) DRM(irq_uninstall)(dev); + if ( dev->irq_enabled ) DRM(irq_uninstall)(dev); #endif if ( dev->dev_private ) { @@ -901,7 +873,7 @@ int r128_wait_ring( drm_r128_private_t * int i; for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { - r128_update_ring_snapshot( ring ); + r128_update_ring_snapshot( dev_priv ); if ( ring->space >= n ) return 0; DRM_UDELAY( 1 ); --- linux-2.6.6-rc1/drivers/char/drm/r128_drm.h 2003-08-08 22:55:11.000000000 -0700 +++ 25/drivers/char/drm/r128_drm.h 2004-04-18 22:25:29.211389672 -0700 @@ -176,24 +176,47 @@ typedef struct drm_r128_sarea { /* Rage 128 specific ioctls * The device specific ioctl range is 0x40 to 0x79. */ -#define DRM_IOCTL_R128_INIT DRM_IOW( 0x40, drm_r128_init_t) -#define DRM_IOCTL_R128_CCE_START DRM_IO( 0x41) -#define DRM_IOCTL_R128_CCE_STOP DRM_IOW( 0x42, drm_r128_cce_stop_t) -#define DRM_IOCTL_R128_CCE_RESET DRM_IO( 0x43) -#define DRM_IOCTL_R128_CCE_IDLE DRM_IO( 0x44) -#define DRM_IOCTL_R128_RESET DRM_IO( 0x46) -#define DRM_IOCTL_R128_SWAP DRM_IO( 0x47) -#define DRM_IOCTL_R128_CLEAR DRM_IOW( 0x48, drm_r128_clear_t) -#define DRM_IOCTL_R128_VERTEX DRM_IOW( 0x49, drm_r128_vertex_t) -#define DRM_IOCTL_R128_INDICES DRM_IOW( 0x4a, drm_r128_indices_t) -#define DRM_IOCTL_R128_BLIT DRM_IOW( 0x4b, drm_r128_blit_t) -#define DRM_IOCTL_R128_DEPTH DRM_IOW( 0x4c, drm_r128_depth_t) -#define DRM_IOCTL_R128_STIPPLE DRM_IOW( 0x4d, drm_r128_stipple_t) -#define DRM_IOCTL_R128_INDIRECT DRM_IOWR(0x4f, drm_r128_indirect_t) -#define DRM_IOCTL_R128_FULLSCREEN DRM_IOW( 0x50, drm_r128_fullscreen_t) -#define DRM_IOCTL_R128_CLEAR2 DRM_IOW( 0x51, drm_r128_clear2_t) -#define DRM_IOCTL_R128_GETPARAM DRM_IOW( 0x52, drm_r128_getparam_t) -#define DRM_IOCTL_R128_FLIP DRM_IO( 0x53) +#define DRM_R128_INIT 0x00 +#define DRM_R128_CCE_START 0x01 +#define DRM_R128_CCE_STOP 0x02 +#define DRM_R128_CCE_RESET 0x03 +#define DRM_R128_CCE_IDLE 0x04 +/* 0x05 not used */ +#define DRM_R128_RESET 0x06 +#define DRM_R128_SWAP 0x07 +#define DRM_R128_CLEAR 0x08 +#define DRM_R128_VERTEX 0x09 +#define DRM_R128_INDICES 0x0a +#define DRM_R128_BLIT 0x0b +#define DRM_R128_DEPTH 0x0c +#define DRM_R128_STIPPLE 0x0d +/* 0x0e not used */ +#define DRM_R128_INDIRECT 0x0f +#define DRM_R128_FULLSCREEN 0x10 +#define DRM_R128_CLEAR2 0x11 +#define DRM_R128_GETPARAM 0x12 +#define DRM_R128_FLIP 0x13 + +#define DRM_IOCTL_R128_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_R128_INIT, drm_r128_init_t) +#define DRM_IOCTL_R128_CCE_START DRM_IO( DRM_COMMAND_BASE + DRM_R128_CCE_START) +#define DRM_IOCTL_R128_CCE_STOP DRM_IOW( DRM_COMMAND_BASE + DRM_R128_CCE_STOP, drm_r128_cce_stop_t) +#define DRM_IOCTL_R128_CCE_RESET DRM_IO( DRM_COMMAND_BASE + DRM_R128_CCE_RESET) +#define DRM_IOCTL_R128_CCE_IDLE DRM_IO( DRM_COMMAND_BASE + DRM_R128_CCE_IDLE) +/* 0x05 not used */ +#define DRM_IOCTL_R128_RESET DRM_IO( DRM_COMMAND_BASE + DRM_R128_RESET) +#define DRM_IOCTL_R128_SWAP DRM_IO( DRM_COMMAND_BASE + DRM_R128_SWAP) +#define DRM_IOCTL_R128_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_R128_CLEAR, drm_r128_clear_t) +#define DRM_IOCTL_R128_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_R128_VERTEX, drm_r128_vertex_t) +#define DRM_IOCTL_R128_INDICES DRM_IOW( DRM_COMMAND_BASE + DRM_R128_INDICES, drm_r128_indices_t) +#define DRM_IOCTL_R128_BLIT DRM_IOW( DRM_COMMAND_BASE + DRM_R128_BLIT, drm_r128_blit_t) +#define DRM_IOCTL_R128_DEPTH DRM_IOW( DRM_COMMAND_BASE + DRM_R128_DEPTH, drm_r128_depth_t) +#define DRM_IOCTL_R128_STIPPLE DRM_IOW( DRM_COMMAND_BASE + DRM_R128_STIPPLE, drm_r128_stipple_t) +/* 0x0e not used */ +#define DRM_IOCTL_R128_INDIRECT DRM_IOWR(DRM_COMMAND_BASE + DRM_R128_INDIRECT, drm_r128_indirect_t) +#define DRM_IOCTL_R128_FULLSCREEN DRM_IOW( DRM_COMMAND_BASE + DRM_R128_FULLSCREEN, drm_r128_fullscreen_t) +#define DRM_IOCTL_R128_CLEAR2 DRM_IOW( DRM_COMMAND_BASE + DRM_R128_CLEAR2, drm_r128_clear2_t) +#define DRM_IOCTL_R128_GETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_R128_GETPARAM, drm_r128_getparam_t) +#define DRM_IOCTL_R128_FLIP DRM_IO( DRM_COMMAND_BASE + DRM_R128_FLIP) typedef struct drm_r128_init { enum { @@ -316,7 +339,7 @@ typedef struct drm_r128_fullscreen { typedef struct drm_r128_getparam { int param; - int *value; + void *value; } drm_r128_getparam_t; #endif --- linux-2.6.6-rc1/drivers/char/drm/r128_drv.c 2003-06-14 12:18:05.000000000 -0700 +++ 25/drivers/char/drm/r128_drv.c 2004-04-18 22:25:29.211389672 -0700 @@ -47,6 +47,7 @@ #include "drm_fops.h" #include "drm_init.h" #include "drm_ioctl.h" +#include "drm_irq.h" #include "drm_lock.h" #include "drm_memory.h" #include "drm_proc.h" --- linux-2.6.6-rc1/drivers/char/drm/r128_drv.h 2003-08-08 22:55:11.000000000 -0700 +++ 25/drivers/char/drm/r128_drv.h 2004-04-18 22:25:29.212389520 -0700 @@ -34,8 +34,7 @@ #ifndef __R128_DRV_H__ #define __R128_DRV_H__ -#define GET_RING_HEAD(ring) DRM_READ32( (ring)->ring_rptr, 0 ) /* (ring)->head */ -#define SET_RING_HEAD(ring,val) DRM_WRITE32( (ring)->ring_rptr, 0, (val) ) /* (ring)->head */ +#define GET_RING_HEAD(dev_priv) R128_READ( R128_PM4_BUFFER_DL_RPTR ) typedef struct drm_r128_freelist { unsigned int age; @@ -50,13 +49,11 @@ typedef struct drm_r128_ring_buffer { int size; int size_l2qw; - volatile u32 *head; u32 tail; u32 tail_mask; int space; int high_mark; - drm_local_map_t *ring_rptr; } drm_r128_ring_buffer_t; typedef struct drm_r128_private { @@ -100,7 +97,6 @@ typedef struct drm_r128_private { u32 span_pitch_offset_c; drm_local_map_t *sarea; - drm_local_map_t *fb; drm_local_map_t *mmio; drm_local_map_t *cce_ring; drm_local_map_t *ring_rptr; @@ -132,14 +128,6 @@ extern drm_buf_t *r128_freelist_get( drm extern int r128_wait_ring( drm_r128_private_t *dev_priv, int n ); -static __inline__ void -r128_update_ring_snapshot( drm_r128_ring_buffer_t *ring ) -{ - ring->space = (GET_RING_HEAD( ring ) - ring->tail) * sizeof(u32); - if ( ring->space <= 0 ) - ring->space += ring->size; -} - extern int r128_do_cce_idle( drm_r128_private_t *dev_priv ); extern int r128_do_cleanup_cce( drm_device_t *dev ); extern int r128_do_cleanup_pageflip( drm_device_t *dev ); @@ -279,6 +267,7 @@ extern int r128_cce_indirect( DRM_IOCTL_ # define R128_PM4_64PIO_64VCBM_64INDBM (7 << 28) # define R128_PM4_64BM_64VCBM_64INDBM (8 << 28) # define R128_PM4_64PIO_64VCPIO_64INDPIO (15 << 28) +# define R128_PM4_BUFFER_CNTL_NOUPDATE (1 << 27) #define R128_PM4_BUFFER_WM_CNTL 0x0708 # define R128_WMA_SHIFT 0 @@ -403,6 +392,15 @@ extern int R128_READ_PLL(drm_device_t *d (pkt) | ((n) << 16)) +static __inline__ void +r128_update_ring_snapshot( drm_r128_private_t *dev_priv ) +{ + drm_r128_ring_buffer_t *ring = &dev_priv->ring; + ring->space = (GET_RING_HEAD( dev_priv ) - ring->tail) * sizeof(u32); + if ( ring->space <= 0 ) + ring->space += ring->size; +} + /* ================================================================ * Misc helper macros */ @@ -412,7 +410,7 @@ do { \ drm_r128_ring_buffer_t *ring = &dev_priv->ring; int i; \ if ( ring->space < ring->high_mark ) { \ for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { \ - r128_update_ring_snapshot( ring ); \ + r128_update_ring_snapshot( dev_priv ); \ if ( ring->space >= ring->high_mark ) \ goto __ring_space_done; \ DRM_UDELAY(1); \ @@ -445,17 +443,10 @@ do { \ * Ring control */ -#if defined(__powerpc__) -#define r128_flush_write_combine() (void) GET_RING_HEAD( &dev_priv->ring ) -#else -#define r128_flush_write_combine() DRM_WRITEMEMORYBARRIER() -#endif - - #define R128_VERBOSE 0 #define RING_LOCALS \ - int write; unsigned int tail_mask; volatile u32 *ring; + int write, _nr; unsigned int tail_mask; volatile u32 *ring; #define BEGIN_RING( n ) do { \ if ( R128_VERBOSE ) { \ @@ -463,9 +454,10 @@ do { \ (n), __FUNCTION__ ); \ } \ if ( dev_priv->ring.space <= (n) * sizeof(u32) ) { \ + COMMIT_RING(); \ r128_wait_ring( dev_priv, (n) * sizeof(u32) ); \ } \ - dev_priv->ring.space -= (n) * sizeof(u32); \ + _nr = n; dev_priv->ring.space -= (n) * sizeof(u32); \ ring = dev_priv->ring.start; \ write = dev_priv->ring.tail; \ tail_mask = dev_priv->ring.tail_mask; \ @@ -488,9 +480,23 @@ do { \ dev_priv->ring.start, \ write * sizeof(u32) ); \ } \ - r128_flush_write_combine(); \ - dev_priv->ring.tail = write; \ - R128_WRITE( R128_PM4_BUFFER_DL_WPTR, write ); \ + if (((dev_priv->ring.tail + _nr) & tail_mask) != write) { \ + DRM_ERROR( \ + "ADVANCE_RING(): mismatch: nr: %x write: %x line: %d\n", \ + ((dev_priv->ring.tail + _nr) & tail_mask), \ + write, __LINE__); \ + } else \ + dev_priv->ring.tail = write; \ +} while (0) + +#define COMMIT_RING() do { \ + if ( R128_VERBOSE ) { \ + DRM_INFO( "COMMIT_RING() tail=0x%06x\n", \ + dev_priv->ring.tail ); \ + } \ + DRM_MEMORYBARRIER(); \ + R128_WRITE( R128_PM4_BUFFER_DL_WPTR, dev_priv->ring.tail ); \ + R128_READ( R128_PM4_BUFFER_DL_WPTR ); \ } while (0) #define OUT_RING( x ) do { \ --- linux-2.6.6-rc1/drivers/char/drm/r128.h 2003-08-22 19:23:40.000000000 -0700 +++ 25/drivers/char/drm/r128.h 2004-04-18 22:25:29.208390128 -0700 @@ -79,6 +79,46 @@ [DRM_IOCTL_NR(DRM_IOCTL_R128_INDIRECT)] = { r128_cce_indirect, 1, 1 }, \ [DRM_IOCTL_NR(DRM_IOCTL_R128_GETPARAM)] = { r128_getparam, 1, 0 }, +#define DRIVER_PCI_IDS \ + {0x1002, 0x4c45, 0}, \ + {0x1002, 0x4c46, 0}, \ + {0x1002, 0x4d46, 0}, \ + {0x1002, 0x4d4c, 0}, \ + {0x1002, 0x5041, 0}, \ + {0x1002, 0x5042, 0}, \ + {0x1002, 0x5043, 0}, \ + {0x1002, 0x5044, 0}, \ + {0x1002, 0x5045, 0}, \ + {0x1002, 0x5046, 0}, \ + {0x1002, 0x5047, 0}, \ + {0x1002, 0x5048, 0}, \ + {0x1002, 0x5049, 0}, \ + {0x1002, 0x504A, 0}, \ + {0x1002, 0x504B, 0}, \ + {0x1002, 0x504C, 0}, \ + {0x1002, 0x504D, 0}, \ + {0x1002, 0x504E, 0}, \ + {0x1002, 0x504F, 0}, \ + {0x1002, 0x5050, 0}, \ + {0x1002, 0x5051, 0}, \ + {0x1002, 0x5052, 0}, \ + {0x1002, 0x5053, 0}, \ + {0x1002, 0x5054, 0}, \ + {0x1002, 0x5055, 0}, \ + {0x1002, 0x5056, 0}, \ + {0x1002, 0x5057, 0}, \ + {0x1002, 0x5058, 0}, \ + {0x1002, 0x5245, 0}, \ + {0x1002, 0x5246, 0}, \ + {0x1002, 0x5247, 0}, \ + {0x1002, 0x524b, 0}, \ + {0x1002, 0x524c, 0}, \ + {0x1002, 0x534d, 0}, \ + {0x1002, 0x5446, 0}, \ + {0x1002, 0x544C, 0}, \ + {0x1002, 0x5452, 0}, \ + {0, 0, 0} + /* Driver customization: */ #define DRIVER_PRERELEASE() do { \ @@ -97,7 +137,7 @@ /* DMA customization: */ #define __HAVE_DMA 1 -#define __HAVE_DMA_IRQ 1 +#define __HAVE_IRQ 1 #define __HAVE_VBL_IRQ 1 #define __HAVE_SHARED_IRQ 1 --- linux-2.6.6-rc1/drivers/char/drm/r128_irq.c 2003-06-14 12:18:52.000000000 -0700 +++ 25/drivers/char/drm/r128_irq.c 2004-04-18 22:25:29.213389368 -0700 @@ -36,7 +36,7 @@ #include "r128_drm.h" #include "r128_drv.h" -irqreturn_t r128_dma_service( DRM_IRQ_ARGS ) +irqreturn_t r128_irq_handler( DRM_IRQ_ARGS ) { drm_device_t *dev = (drm_device_t *) arg; drm_r128_private_t *dev_priv = --- linux-2.6.6-rc1/drivers/char/drm/r128_state.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/char/drm/r128_state.c 2004-04-18 22:25:29.215389064 -0700 @@ -45,7 +45,7 @@ static void r128_emit_clip_rects( drm_r1 RING_LOCALS; DRM_DEBUG( " %s\n", __FUNCTION__ ); - BEGIN_RING( 17 ); + BEGIN_RING( (count < 3? count: 3) * 5 + 2 ); if ( count >= 1 ) { OUT_RING( CCE_PACKET0( R128_AUX1_SC_LEFT, 3 ) ); @@ -1011,7 +1011,7 @@ static int r128_cce_dispatch_write_pixel DRM_DEBUG( "\n" ); count = depth->n; - if (count > 4096 || count <= 0) + if (count > 4096 || count <= 0) return -EMSGSIZE; xbuf_size = count * sizeof(*x); @@ -1280,6 +1280,7 @@ int r128_cce_clear( DRM_IOCTL_ARGS ) sarea_priv->nbox = R128_NR_SAREA_CLIPRECTS; r128_cce_dispatch_clear( dev, &clear ); + COMMIT_RING(); /* Make sure we restore the 3D state next time. */ @@ -1315,8 +1316,10 @@ int r128_do_cleanup_pageflip( drm_device R128_WRITE( R128_CRTC_OFFSET, dev_priv->crtc_offset ); R128_WRITE( R128_CRTC_OFFSET_CNTL, dev_priv->crtc_offset_cntl ); - if (dev_priv->current_page != 0) + if (dev_priv->current_page != 0) { r128_cce_dispatch_flip( dev ); + COMMIT_RING(); + } dev_priv->page_flipping = 0; return 0; @@ -1341,6 +1344,7 @@ int r128_cce_flip( DRM_IOCTL_ARGS ) r128_cce_dispatch_flip( dev ); + COMMIT_RING(); return 0; } @@ -1362,6 +1366,7 @@ int r128_cce_swap( DRM_IOCTL_ARGS ) dev_priv->sarea_priv->dirty |= (R128_UPLOAD_CONTEXT | R128_UPLOAD_MASKS); + COMMIT_RING(); return 0; } @@ -1421,6 +1426,7 @@ int r128_cce_vertex( DRM_IOCTL_ARGS ) r128_cce_dispatch_vertex( dev, buf ); + COMMIT_RING(); return 0; } @@ -1492,6 +1498,7 @@ int r128_cce_indices( DRM_IOCTL_ARGS ) r128_cce_dispatch_indices( dev, buf, elts.start, elts.end, count ); + COMMIT_RING(); return 0; } @@ -1501,6 +1508,7 @@ int r128_cce_blit( DRM_IOCTL_ARGS ) drm_device_dma_t *dma = dev->dma; drm_r128_private_t *dev_priv = dev->dev_private; drm_r128_blit_t blit; + int ret; LOCK_TEST_WITH_RETURN( dev, filp ); @@ -1518,7 +1526,10 @@ int r128_cce_blit( DRM_IOCTL_ARGS ) RING_SPACE_TEST_WITH_RETURN( dev_priv ); VB_AGE_TEST_WITH_RETURN( dev_priv ); - return r128_cce_dispatch_blit( filp, dev, &blit ); + ret = r128_cce_dispatch_blit( filp, dev, &blit ); + + COMMIT_RING(); + return ret; } int r128_cce_depth( DRM_IOCTL_ARGS ) @@ -1526,6 +1537,7 @@ int r128_cce_depth( DRM_IOCTL_ARGS ) DRM_DEVICE; drm_r128_private_t *dev_priv = dev->dev_private; drm_r128_depth_t depth; + int ret; LOCK_TEST_WITH_RETURN( dev, filp ); @@ -1534,18 +1546,20 @@ int r128_cce_depth( DRM_IOCTL_ARGS ) RING_SPACE_TEST_WITH_RETURN( dev_priv ); + ret = DRM_ERR(EINVAL); switch ( depth.func ) { case R128_WRITE_SPAN: - return r128_cce_dispatch_write_span( dev, &depth ); + ret = r128_cce_dispatch_write_span( dev, &depth ); case R128_WRITE_PIXELS: - return r128_cce_dispatch_write_pixels( dev, &depth ); + ret = r128_cce_dispatch_write_pixels( dev, &depth ); case R128_READ_SPAN: - return r128_cce_dispatch_read_span( dev, &depth ); + ret = r128_cce_dispatch_read_span( dev, &depth ); case R128_READ_PIXELS: - return r128_cce_dispatch_read_pixels( dev, &depth ); + ret = r128_cce_dispatch_read_pixels( dev, &depth ); } - return DRM_ERR(EINVAL); + COMMIT_RING(); + return ret; } int r128_cce_stipple( DRM_IOCTL_ARGS ) @@ -1568,6 +1582,7 @@ int r128_cce_stipple( DRM_IOCTL_ARGS ) r128_cce_dispatch_stipple( dev, mask ); + COMMIT_RING(); return 0; } @@ -1643,6 +1658,7 @@ int r128_cce_indirect( DRM_IOCTL_ARGS ) */ r128_cce_dispatch_indirect( dev, buf, indirect.start, indirect.end ); + COMMIT_RING(); return 0; } --- linux-2.6.6-rc1/drivers/char/drm/radeon_cp.c 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/char/drm/radeon_cp.c 2004-04-18 22:25:29.218388608 -0700 @@ -855,7 +855,8 @@ static void radeon_cp_init_ring_buffer( /* Initialize the memory controller */ RADEON_WRITE( RADEON_MC_FB_LOCATION, - (dev_priv->gart_vm_start - 1) & 0xffff0000 ); + ( ( dev_priv->gart_vm_start - 1 ) & 0xffff0000 ) + | ( dev_priv->fb_location >> 16 ) ); #if __REALLY_HAVE_AGP if ( !dev_priv->is_pci ) { @@ -1071,13 +1072,6 @@ static int radeon_do_init_cp( drm_device dev_priv->depth_offset = init->depth_offset; dev_priv->depth_pitch = init->depth_pitch; - dev_priv->front_pitch_offset = (((dev_priv->front_pitch/64) << 22) | - (dev_priv->front_offset >> 10)); - dev_priv->back_pitch_offset = (((dev_priv->back_pitch/64) << 22) | - (dev_priv->back_offset >> 10)); - dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch/64) << 22) | - (dev_priv->depth_offset >> 10)); - /* Hardware state for depth clears. Remove this if/when we no * longer clear the depth buffer with a 3D rectangle. Hard-code * all values to prevent unwanted 3D state from slipping through @@ -1124,13 +1118,6 @@ static int radeon_do_init_cp( drm_device return DRM_ERR(EINVAL); } - DRM_FIND_MAP( dev_priv->fb, init->fb_offset ); - if(!dev_priv->fb) { - DRM_ERROR("could not find framebuffer!\n"); - dev->dev_private = (void *)dev_priv; - radeon_do_cleanup_cp(dev); - return DRM_ERR(EINVAL); - } DRM_FIND_MAP( dev_priv->mmio, init->mmio_offset ); if(!dev_priv->mmio) { DRM_ERROR("could not find mmio region!\n"); @@ -1204,9 +1191,26 @@ static int radeon_do_init_cp( drm_device dev_priv->buffers->handle ); } + dev_priv->fb_location = ( RADEON_READ( RADEON_MC_FB_LOCATION ) + & 0xffff ) << 16; + + dev_priv->front_pitch_offset = (((dev_priv->front_pitch/64) << 22) | + ( ( dev_priv->front_offset + + dev_priv->fb_location ) >> 10 ) ); + + dev_priv->back_pitch_offset = (((dev_priv->back_pitch/64) << 22) | + ( ( dev_priv->back_offset + + dev_priv->fb_location ) >> 10 ) ); + + dev_priv->depth_pitch_offset = (((dev_priv->depth_pitch/64) << 22) | + ( ( dev_priv->depth_offset + + dev_priv->fb_location ) >> 10 ) ); + dev_priv->gart_size = init->gart_size; - dev_priv->gart_vm_start = RADEON_READ( RADEON_CONFIG_APER_SIZE ); + dev_priv->gart_vm_start = dev_priv->fb_location + + RADEON_READ( RADEON_CONFIG_APER_SIZE ); + #if __REALLY_HAVE_AGP if ( !dev_priv->is_pci ) dev_priv->gart_buffers_offset = (dev_priv->buffers->offset @@ -1271,12 +1275,12 @@ int radeon_do_cleanup_cp( drm_device_t * { DRM_DEBUG( "\n" ); -#if _HAVE_DMA_IRQ +#if __HAVE_IRQ /* Make sure interrupts are disabled here because the uninstall ioctl * may not have been called from userspace and after dev_private * is freed, it's too late. */ - if ( dev->irq ) DRM(irq_uninstall)(dev); + if ( dev->irq_enabled ) DRM(irq_uninstall)(dev); #endif if ( dev->dev_private ) { --- linux-2.6.6-rc1/drivers/char/drm/radeon_drm.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/char/drm/radeon_drm.h 2004-04-18 22:25:29.219388456 -0700 @@ -226,6 +226,13 @@ typedef union { #define RADEON_MAX_TEXTURE_LEVELS 12 #define RADEON_MAX_TEXTURE_UNITS 3 +/* Blits have strict offset rules. All blit offset must be aligned on + * a 1K-byte boundary. + */ +#define RADEON_OFFSET_SHIFT 10 +#define RADEON_OFFSET_ALIGN (1 << RADEON_OFFSET_SHIFT) +#define RADEON_OFFSET_MASK (RADEON_OFFSET_ALIGN - 1) + #endif /* __RADEON_SAREA_DEFINES__ */ typedef struct { @@ -365,31 +372,58 @@ typedef struct { /* Radeon specific ioctls * The device specific ioctl range is 0x40 to 0x79. */ -#define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( 0x40, drm_radeon_init_t) -#define DRM_IOCTL_RADEON_CP_START DRM_IO( 0x41) -#define DRM_IOCTL_RADEON_CP_STOP DRM_IOW( 0x42, drm_radeon_cp_stop_t) -#define DRM_IOCTL_RADEON_CP_RESET DRM_IO( 0x43) -#define DRM_IOCTL_RADEON_CP_IDLE DRM_IO( 0x44) -#define DRM_IOCTL_RADEON_RESET DRM_IO( 0x45) -#define DRM_IOCTL_RADEON_FULLSCREEN DRM_IOW( 0x46, drm_radeon_fullscreen_t) -#define DRM_IOCTL_RADEON_SWAP DRM_IO( 0x47) -#define DRM_IOCTL_RADEON_CLEAR DRM_IOW( 0x48, drm_radeon_clear_t) -#define DRM_IOCTL_RADEON_VERTEX DRM_IOW( 0x49, drm_radeon_vertex_t) -#define DRM_IOCTL_RADEON_INDICES DRM_IOW( 0x4a, drm_radeon_indices_t) -#define DRM_IOCTL_RADEON_STIPPLE DRM_IOW( 0x4c, drm_radeon_stipple_t) -#define DRM_IOCTL_RADEON_INDIRECT DRM_IOWR(0x4d, drm_radeon_indirect_t) -#define DRM_IOCTL_RADEON_TEXTURE DRM_IOWR(0x4e, drm_radeon_texture_t) -#define DRM_IOCTL_RADEON_VERTEX2 DRM_IOW( 0x4f, drm_radeon_vertex2_t) -#define DRM_IOCTL_RADEON_CMDBUF DRM_IOW( 0x50, drm_radeon_cmd_buffer_t) -#define DRM_IOCTL_RADEON_GETPARAM DRM_IOWR(0x51, drm_radeon_getparam_t) -#define DRM_IOCTL_RADEON_FLIP DRM_IO( 0x52) -#define DRM_IOCTL_RADEON_ALLOC DRM_IOWR( 0x53, drm_radeon_mem_alloc_t) -#define DRM_IOCTL_RADEON_FREE DRM_IOW( 0x54, drm_radeon_mem_free_t) -#define DRM_IOCTL_RADEON_INIT_HEAP DRM_IOW( 0x55, drm_radeon_mem_init_heap_t) -#define DRM_IOCTL_RADEON_IRQ_EMIT DRM_IOWR( 0x56, drm_radeon_irq_emit_t) -#define DRM_IOCTL_RADEON_IRQ_WAIT DRM_IOW( 0x57, drm_radeon_irq_wait_t) -/* added by Charl P. Botha - see radeon_cp.c for details */ -#define DRM_IOCTL_RADEON_CP_RESUME DRM_IO(0x58) +#define DRM_RADEON_CP_INIT 0x00 +#define DRM_RADEON_CP_START 0x01 +#define DRM_RADEON_CP_STOP 0x02 +#define DRM_RADEON_CP_RESET 0x03 +#define DRM_RADEON_CP_IDLE 0x04 +#define DRM_RADEON_RESET 0x05 +#define DRM_RADEON_FULLSCREEN 0x06 +#define DRM_RADEON_SWAP 0x07 +#define DRM_RADEON_CLEAR 0x08 +#define DRM_RADEON_VERTEX 0x09 +#define DRM_RADEON_INDICES 0x0A +#define DRM_RADEON_NOT_USED +#define DRM_RADEON_STIPPLE 0x0C +#define DRM_RADEON_INDIRECT 0x0D +#define DRM_RADEON_TEXTURE 0x0E +#define DRM_RADEON_VERTEX2 0x0F +#define DRM_RADEON_CMDBUF 0x10 +#define DRM_RADEON_GETPARAM 0x11 +#define DRM_RADEON_FLIP 0x12 +#define DRM_RADEON_ALLOC 0x13 +#define DRM_RADEON_FREE 0x14 +#define DRM_RADEON_INIT_HEAP 0x15 +#define DRM_RADEON_IRQ_EMIT 0x16 +#define DRM_RADEON_IRQ_WAIT 0x17 +#define DRM_RADEON_CP_RESUME 0x18 +#define DRM_RADEON_SETPARAM 0x19 + +#define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) +#define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) +#define DRM_IOCTL_RADEON_CP_STOP DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_STOP, drm_radeon_cp_stop_t) +#define DRM_IOCTL_RADEON_CP_RESET DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_RESET) +#define DRM_IOCTL_RADEON_CP_IDLE DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_IDLE) +#define DRM_IOCTL_RADEON_RESET DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_RESET) +#define DRM_IOCTL_RADEON_FULLSCREEN DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_FULLSCREEN, drm_radeon_fullscreen_t) +#define DRM_IOCTL_RADEON_SWAP DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_SWAP) +#define DRM_IOCTL_RADEON_CLEAR DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CLEAR, drm_radeon_clear_t) +#define DRM_IOCTL_RADEON_VERTEX DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_VERTEX, drm_radeon_vertex_t) +#define DRM_IOCTL_RADEON_INDICES DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_INDICES, drm_radeon_indices_t) +#define DRM_IOCTL_RADEON_STIPPLE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_STIPPLE, drm_radeon_stipple_t) +#define DRM_IOCTL_RADEON_INDIRECT DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INDIRECT, drm_radeon_indirect_t) +#define DRM_IOCTL_RADEON_TEXTURE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_TEXTURE, drm_radeon_texture_t) +#define DRM_IOCTL_RADEON_VERTEX2 DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_VERTEX2, drm_radeon_vertex2_t) +#define DRM_IOCTL_RADEON_CMDBUF DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CMDBUF, drm_radeon_cmd_buffer_t) +#define DRM_IOCTL_RADEON_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GETPARAM, drm_radeon_getparam_t) +#define DRM_IOCTL_RADEON_FLIP DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_FLIP) +#define DRM_IOCTL_RADEON_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_ALLOC, drm_radeon_mem_alloc_t) +#define DRM_IOCTL_RADEON_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_FREE, drm_radeon_mem_free_t) +#define DRM_IOCTL_RADEON_INIT_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_INIT_HEAP, drm_radeon_mem_init_heap_t) +#define DRM_IOCTL_RADEON_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_IRQ_EMIT, drm_radeon_irq_emit_t) +#define DRM_IOCTL_RADEON_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_IRQ_WAIT, drm_radeon_irq_wait_t) +#define DRM_IOCTL_RADEON_CP_RESUME DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_RESUME) +#define DRM_IOCTL_RADEON_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SETPARAM, drm_radeon_setparam_t) typedef struct drm_radeon_init { enum { @@ -502,7 +536,7 @@ typedef struct drm_radeon_tex_image { } drm_radeon_tex_image_t; typedef struct drm_radeon_texture { - int offset; + unsigned int offset; int pitch; int format; int width; /* Texture image coordinates */ @@ -537,10 +571,11 @@ typedef struct drm_radeon_indirect { #define RADEON_PARAM_STATUS_HANDLE 8 #define RADEON_PARAM_SAREA_HANDLE 9 #define RADEON_PARAM_GART_TEX_HANDLE 10 +#define RADEON_PARAM_SCRATCH_OFFSET 11 typedef struct drm_radeon_getparam { int param; - int *value; + void *value; } drm_radeon_getparam_t; /* 1.6: Set up a memory manager for regions of shared memory: @@ -578,4 +613,16 @@ typedef struct drm_radeon_irq_wait { } drm_radeon_irq_wait_t; +/* 1.10: Clients tell the DRM where they think the framebuffer is located in + * the card's address space, via a new generic ioctl to set parameters + */ + +typedef struct drm_radeon_setparam { + unsigned int param; + int64_t value; +} drm_radeon_setparam_t; + +#define RADEON_SETPARAM_FB_LOCATION 1 /* determined framebuffer location */ + + #endif --- linux-2.6.6-rc1/drivers/char/drm/radeon_drv.c 2003-07-13 21:44:34.000000000 -0700 +++ 25/drivers/char/drm/radeon_drv.c 2004-04-18 22:25:29.220388304 -0700 @@ -48,6 +48,7 @@ #include "drm_fops.h" #include "drm_init.h" #include "drm_ioctl.h" +#include "drm_irq.h" #include "drm_lock.h" #include "drm_memory.h" #include "drm_proc.h" --- linux-2.6.6-rc1/drivers/char/drm/radeon_drv.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/char/drm/radeon_drv.h 2004-04-18 22:25:29.221388152 -0700 @@ -73,6 +73,8 @@ typedef struct drm_radeon_private { drm_radeon_ring_buffer_t ring; drm_radeon_sarea_t *sarea_priv; + u32 fb_location; + int gart_size; u32 gart_vm_start; unsigned long gart_buffers_offset; @@ -133,7 +135,6 @@ typedef struct drm_radeon_private { unsigned long gart_textures_offset; drm_local_map_t *sarea; - drm_local_map_t *fb; drm_local_map_t *mmio; drm_local_map_t *cp_ring; drm_local_map_t *ring_rptr; @@ -184,6 +185,7 @@ extern int radeon_cp_indirect( DRM_IOCTL extern int radeon_cp_vertex2( DRM_IOCTL_ARGS ); extern int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ); extern int radeon_cp_getparam( DRM_IOCTL_ARGS ); +extern int radeon_cp_setparam( DRM_IOCTL_ARGS ); extern int radeon_cp_flip( DRM_IOCTL_ARGS ); extern int radeon_mem_alloc( DRM_IOCTL_ARGS ); @@ -239,6 +241,7 @@ extern void radeon_do_release(drm_device #define RADEON_CRTC2_OFFSET 0x0324 #define RADEON_CRTC2_OFFSET_CNTL 0x0328 +#define RADEON_RB3D_COLOROFFSET 0x1c40 #define RADEON_RB3D_COLORPITCH 0x1c48 #define RADEON_DP_GUI_MASTER_CNTL 0x146c @@ -332,6 +335,7 @@ extern void radeon_do_release(drm_device #define RADEON_PP_MISC 0x1c14 #define RADEON_PP_ROT_MATRIX_0 0x1d58 #define RADEON_PP_TXFILTER_0 0x1c54 +#define RADEON_PP_TXOFFSET_0 0x1c5c #define RADEON_PP_TXFILTER_1 0x1c6c #define RADEON_PP_TXFILTER_2 0x1c84 --- linux-2.6.6-rc1/drivers/char/drm/radeon.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/char/drm/radeon.h 2004-04-18 22:25:29.216388912 -0700 @@ -51,7 +51,7 @@ #define DRIVER_DATE "20020828" #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 9 +#define DRIVER_MINOR 10 #define DRIVER_PATCHLEVEL 0 /* Interface history: @@ -81,6 +81,9 @@ * Add 'GET' queries for starting additional clients on different VT's. * 1.9 - Add DRM_IOCTL_RADEON_CP_RESUME ioctl. * Add texture rectangle support for r100. + * 1.10- Add SETPARAM ioctl; first parameter to set is FB_LOCATION, which + * clients use to tell the DRM where they think the framebuffer is + * located in the card's address space */ #define DRIVER_IOCTLS \ [DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { radeon_cp_buffers, 1, 0 }, \ @@ -106,10 +109,82 @@ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_ALLOC)] = { radeon_mem_alloc, 1, 0 }, \ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_FREE)] = { radeon_mem_free, 1, 0 }, \ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_INIT_HEAP)] = { radeon_mem_init_heap, 1, 1 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_RADEON_IRQ_EMIT)] = { radeon_irq_emit, 1, 0 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_RADEON_IRQ_WAIT)] = { radeon_irq_wait, 1, 0 }, + [DRM_IOCTL_NR(DRM_IOCTL_RADEON_IRQ_EMIT)] = { radeon_irq_emit, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_RADEON_IRQ_WAIT)] = { radeon_irq_wait, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_RADEON_SETPARAM)] = { radeon_cp_setparam, 1, 0 }, \ + +#define DRIVER_PCI_IDS \ + {0x1002, 0x4136, 0}, \ + {0x1002, 0x4137, 0}, \ + {0x1002, 0x4237, 0}, \ + {0x1002, 0x4242, 0}, \ + {0x1002, 0x4242, 0}, \ + {0x1002, 0x4336, 0}, \ + {0x1002, 0x4337, 0}, \ + {0x1002, 0x4437, 0}, \ + {0x1002, 0x4964, 0}, \ + {0x1002, 0x4965, 0}, \ + {0x1002, 0x4966, 0}, \ + {0x1002, 0x4967, 0}, \ + {0x1002, 0x4C57, 0}, \ + {0x1002, 0x4C58, 0}, \ + {0x1002, 0x4C59, 0}, \ + {0x1002, 0x4C5A, 0}, \ + {0x1002, 0x4C64, 0}, \ + {0x1002, 0x4C65, 0}, \ + {0x1002, 0x4C66, 0}, \ + {0x1002, 0x4C67, 0}, \ + {0x1002, 0x5144, 0}, \ + {0x1002, 0x5145, 0}, \ + {0x1002, 0x5146, 0}, \ + {0x1002, 0x5147, 0}, \ + {0x1002, 0x5148, 0}, \ + {0x1002, 0x5149, 0}, \ + {0x1002, 0x514A, 0}, \ + {0x1002, 0x514B, 0}, \ + {0x1002, 0x514C, 0}, \ + {0x1002, 0x514D, 0}, \ + {0x1002, 0x514E, 0}, \ + {0x1002, 0x514F, 0}, \ + {0x1002, 0x5157, 0}, \ + {0x1002, 0x5158, 0}, \ + {0x1002, 0x5159, 0}, \ + {0x1002, 0x515A, 0}, \ + {0x1002, 0x5168, 0}, \ + {0x1002, 0x5169, 0}, \ + {0x1002, 0x516A, 0}, \ + {0x1002, 0x516B, 0}, \ + {0x1002, 0x516C, 0}, \ + {0x1002, 0x5834, 0}, \ + {0x1002, 0x5835, 0}, \ + {0x1002, 0x5836, 0}, \ + {0x1002, 0x5837, 0}, \ + {0x1002, 0x5960, 0}, \ + {0x1002, 0x5961, 0}, \ + {0x1002, 0x5962, 0}, \ + {0x1002, 0x5963, 0}, \ + {0x1002, 0x5964, 0}, \ + {0x1002, 0x5968, 0}, \ + {0x1002, 0x5969, 0}, \ + {0x1002, 0x596A, 0}, \ + {0x1002, 0x596B, 0}, \ + {0x1002, 0x5c61, 0}, \ + {0x1002, 0x5c62, 0}, \ + {0x1002, 0x5c63, 0}, \ + {0x1002, 0x5c64, 0}, \ + {0, 0, 0} +#define DRIVER_FILE_FIELDS \ + int64_t radeon_fb_delta; \ +#define DRIVER_OPEN_HELPER( filp_priv, dev ) \ +do { \ + drm_radeon_private_t *dev_priv = dev->dev_private; \ + if ( dev_priv ) \ + filp_priv->radeon_fb_delta = dev_priv->fb_location; \ + else \ + filp_priv->radeon_fb_delta = 0; \ +} while( 0 ) /* When a client dies: * - Check for and clean up flipped page state @@ -142,7 +217,7 @@ do { \ /* DMA customization: */ #define __HAVE_DMA 1 -#define __HAVE_DMA_IRQ 1 +#define __HAVE_IRQ 1 #define __HAVE_VBL_IRQ 1 #define __HAVE_SHARED_IRQ 1 --- linux-2.6.6-rc1/drivers/char/drm/radeon_irq.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/drivers/char/drm/radeon_irq.c 2004-04-18 22:25:29.221388152 -0700 @@ -54,7 +54,7 @@ * tied to dma at all, this is just a hangover from dri prehistory. */ -irqreturn_t DRM(dma_service)( DRM_IRQ_ARGS ) +irqreturn_t DRM(irq_handler)( DRM_IRQ_ARGS ) { drm_device_t *dev = (drm_device_t *) arg; drm_radeon_private_t *dev_priv = --- linux-2.6.6-rc1/drivers/char/drm/radeon_state.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/drivers/char/drm/radeon_state.c 2004-04-18 22:25:29.226387392 -0700 @@ -36,6 +36,240 @@ /* ================================================================ + * Helper functions for client state checking and fixup + */ + +static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv, + drm_file_t *filp_priv, + u32 *offset ) { + u32 off = *offset; + + if ( off >= dev_priv->fb_location && + off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) ) + return 0; + + off += filp_priv->radeon_fb_delta; + + DRM_DEBUG( "offset fixed up to 0x%x\n", off ); + + if ( off < dev_priv->fb_location || + off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) ) + return DRM_ERR( EINVAL ); + + *offset = off; + + return 0; +} + +static __inline__ int radeon_check_and_fixup_offset_user( drm_radeon_private_t *dev_priv, + drm_file_t *filp_priv, + u32 *offset ) { + u32 off; + + DRM_GET_USER_UNCHECKED( off, offset ); + + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &off ) ) + return DRM_ERR( EINVAL ); + + DRM_PUT_USER_UNCHECKED( offset, off ); + + return 0; +} + +static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv, + drm_file_t *filp_priv, + int id, + u32 *data ) { + switch ( id ) { + + case RADEON_EMIT_PP_MISC: + if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv, + &data[( RADEON_RB3D_DEPTHOFFSET + - RADEON_PP_MISC ) / 4] ) ) { + DRM_ERROR( "Invalid depth buffer offset\n" ); + return DRM_ERR( EINVAL ); + } + break; + + case RADEON_EMIT_PP_CNTL: + if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv, + &data[( RADEON_RB3D_COLOROFFSET + - RADEON_PP_CNTL ) / 4] ) ) { + DRM_ERROR( "Invalid colour buffer offset\n" ); + return DRM_ERR( EINVAL ); + } + break; + + case R200_EMIT_PP_TXOFFSET_0: + case R200_EMIT_PP_TXOFFSET_1: + case R200_EMIT_PP_TXOFFSET_2: + case R200_EMIT_PP_TXOFFSET_3: + case R200_EMIT_PP_TXOFFSET_4: + case R200_EMIT_PP_TXOFFSET_5: + if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv, + &data[0] ) ) { + DRM_ERROR( "Invalid R200 texture offset\n" ); + return DRM_ERR( EINVAL ); + } + break; + + case RADEON_EMIT_PP_TXFILTER_0: + case RADEON_EMIT_PP_TXFILTER_1: + case RADEON_EMIT_PP_TXFILTER_2: + if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv, + &data[( RADEON_PP_TXOFFSET_0 + - RADEON_PP_TXFILTER_0 ) / 4] ) ) { + DRM_ERROR( "Invalid R100 texture offset\n" ); + return DRM_ERR( EINVAL ); + } + break; + + case R200_EMIT_PP_CUBIC_OFFSETS_0: + case R200_EMIT_PP_CUBIC_OFFSETS_1: + case R200_EMIT_PP_CUBIC_OFFSETS_2: + case R200_EMIT_PP_CUBIC_OFFSETS_3: + case R200_EMIT_PP_CUBIC_OFFSETS_4: + case R200_EMIT_PP_CUBIC_OFFSETS_5: { + int i; + for ( i = 0; i < 5; i++ ) { + if ( radeon_check_and_fixup_offset_user( dev_priv, + filp_priv, + &data[i] ) ) { + DRM_ERROR( "Invalid R200 cubic texture offset\n" ); + return DRM_ERR( EINVAL ); + } + } + break; + } + + case RADEON_EMIT_RB3D_COLORPITCH: + case RADEON_EMIT_RE_LINE_PATTERN: + case RADEON_EMIT_SE_LINE_WIDTH: + case RADEON_EMIT_PP_LUM_MATRIX: + case RADEON_EMIT_PP_ROT_MATRIX_0: + case RADEON_EMIT_RB3D_STENCILREFMASK: + case RADEON_EMIT_SE_VPORT_XSCALE: + case RADEON_EMIT_SE_CNTL: + case RADEON_EMIT_SE_CNTL_STATUS: + case RADEON_EMIT_RE_MISC: + case RADEON_EMIT_PP_BORDER_COLOR_0: + case RADEON_EMIT_PP_BORDER_COLOR_1: + case RADEON_EMIT_PP_BORDER_COLOR_2: + case RADEON_EMIT_SE_ZBIAS_FACTOR: + case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT: + case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED: + case R200_EMIT_PP_TXCBLEND_0: + case R200_EMIT_PP_TXCBLEND_1: + case R200_EMIT_PP_TXCBLEND_2: + case R200_EMIT_PP_TXCBLEND_3: + case R200_EMIT_PP_TXCBLEND_4: + case R200_EMIT_PP_TXCBLEND_5: + case R200_EMIT_PP_TXCBLEND_6: + case R200_EMIT_PP_TXCBLEND_7: + case R200_EMIT_TCL_LIGHT_MODEL_CTL_0: + case R200_EMIT_TFACTOR_0: + case R200_EMIT_VTX_FMT_0: + case R200_EMIT_VAP_CTL: + case R200_EMIT_MATRIX_SELECT_0: + case R200_EMIT_TEX_PROC_CTL_2: + case R200_EMIT_TCL_UCP_VERT_BLEND_CTL: + case R200_EMIT_PP_TXFILTER_0: + case R200_EMIT_PP_TXFILTER_1: + case R200_EMIT_PP_TXFILTER_2: + case R200_EMIT_PP_TXFILTER_3: + case R200_EMIT_PP_TXFILTER_4: + case R200_EMIT_PP_TXFILTER_5: + case R200_EMIT_VTE_CNTL: + case R200_EMIT_OUTPUT_VTX_COMP_SEL: + case R200_EMIT_PP_TAM_DEBUG3: + case R200_EMIT_PP_CNTL_X: + case R200_EMIT_RB3D_DEPTHXY_OFFSET: + case R200_EMIT_RE_AUX_SCISSOR_CNTL: + case R200_EMIT_RE_SCISSOR_TL_0: + case R200_EMIT_RE_SCISSOR_TL_1: + case R200_EMIT_RE_SCISSOR_TL_2: + case R200_EMIT_SE_VAP_CNTL_STATUS: + case R200_EMIT_SE_VTX_STATE_CNTL: + case R200_EMIT_RE_POINTSIZE: + case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0: + case R200_EMIT_PP_CUBIC_FACES_0: + case R200_EMIT_PP_CUBIC_FACES_1: + case R200_EMIT_PP_CUBIC_FACES_2: + case R200_EMIT_PP_CUBIC_FACES_3: + case R200_EMIT_PP_CUBIC_FACES_4: + case R200_EMIT_PP_CUBIC_FACES_5: + case RADEON_EMIT_PP_TEX_SIZE_0: + case RADEON_EMIT_PP_TEX_SIZE_1: + case RADEON_EMIT_PP_TEX_SIZE_2: + /* These packets don't contain memory offsets */ + break; + + default: + DRM_ERROR( "Unknown state packet ID %d\n", id ); + return DRM_ERR( EINVAL ); + } + + return 0; +} + +static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv, + drm_file_t *filp_priv, + drm_radeon_cmd_buffer_t *cmdbuf, + unsigned int *cmdsz ) { + u32 tmp[4], *cmd = ( u32* )cmdbuf->buf; + + if ( DRM_COPY_FROM_USER_UNCHECKED( tmp, cmd, sizeof( tmp ) ) ) { + DRM_ERROR( "Failed to copy data from user space\n" ); + return DRM_ERR( EFAULT ); + } + + *cmdsz = 2 + ( ( tmp[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 ); + + if ( ( tmp[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) { + DRM_ERROR( "Not a type 3 packet\n" ); + return DRM_ERR( EINVAL ); + } + + if ( 4 * *cmdsz > cmdbuf->bufsz ) { + DRM_ERROR( "Packet size larger than size of data provided\n" ); + return DRM_ERR( EINVAL ); + } + + /* Check client state and fix it up if necessary */ + if ( tmp[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */ + u32 offset; + + if ( tmp[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL + | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) { + offset = tmp[2] << 10; + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) { + DRM_ERROR( "Invalid first packet offset\n" ); + return DRM_ERR( EINVAL ); + } + tmp[2] = ( tmp[2] & 0xffc00000 ) | offset >> 10; + } + + if ( ( tmp[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) && + ( tmp[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) { + offset = tmp[3] << 10; + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) { + DRM_ERROR( "Invalid second packet offset\n" ); + return DRM_ERR( EINVAL ); + } + tmp[3] = ( tmp[3] & 0xffc00000 ) | offset >> 10; + } + + if ( DRM_COPY_TO_USER_UNCHECKED( cmd, tmp, sizeof( tmp ) ) ) { + DRM_ERROR( "Failed to copy data to user space\n" ); + return DRM_ERR( EFAULT ); + } + } + + return 0; +} + + +/* ================================================================ * CP hardware state programming functions */ @@ -57,15 +291,28 @@ static __inline__ void radeon_emit_clip_ /* Emit 1.1 state */ -static void radeon_emit_state( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx, - drm_radeon_texture_regs_t *tex, - unsigned int dirty ) +static int radeon_emit_state( drm_radeon_private_t *dev_priv, + drm_file_t *filp_priv, + drm_radeon_context_regs_t *ctx, + drm_radeon_texture_regs_t *tex, + unsigned int dirty ) { RING_LOCALS; DRM_DEBUG( "dirty=0x%08x\n", dirty ); if ( dirty & RADEON_UPLOAD_CONTEXT ) { + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &ctx->rb3d_depthoffset ) ) { + DRM_ERROR( "Invalid depth buffer offset\n" ); + return DRM_ERR( EINVAL ); + } + + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &ctx->rb3d_coloroffset ) ) { + DRM_ERROR( "Invalid depth buffer offset\n" ); + return DRM_ERR( EINVAL ); + } + BEGIN_RING( 14 ); OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) ); OUT_RING( ctx->pp_misc ); @@ -149,6 +396,12 @@ static void radeon_emit_state( drm_radeo } if ( dirty & RADEON_UPLOAD_TEX0 ) { + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &tex[0].pp_txoffset ) ) { + DRM_ERROR( "Invalid texture offset for unit 0\n" ); + return DRM_ERR( EINVAL ); + } + BEGIN_RING( 9 ); OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) ); OUT_RING( tex[0].pp_txfilter ); @@ -163,6 +416,12 @@ static void radeon_emit_state( drm_radeo } if ( dirty & RADEON_UPLOAD_TEX1 ) { + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &tex[1].pp_txoffset ) ) { + DRM_ERROR( "Invalid texture offset for unit 1\n" ); + return DRM_ERR( EINVAL ); + } + BEGIN_RING( 9 ); OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) ); OUT_RING( tex[1].pp_txfilter ); @@ -177,6 +436,12 @@ static void radeon_emit_state( drm_radeo } if ( dirty & RADEON_UPLOAD_TEX2 ) { + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &tex[2].pp_txoffset ) ) { + DRM_ERROR( "Invalid texture offset for unit 2\n" ); + return DRM_ERR( EINVAL ); + } + BEGIN_RING( 9 ); OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) ); OUT_RING( tex[2].pp_txfilter ); @@ -189,12 +454,15 @@ static void radeon_emit_state( drm_radeo OUT_RING( tex[2].pp_border_color ); ADVANCE_RING(); } + + return 0; } /* Emit 1.2 state */ -static void radeon_emit_state2( drm_radeon_private_t *dev_priv, - drm_radeon_state_t *state ) +static int radeon_emit_state2( drm_radeon_private_t *dev_priv, + drm_file_t *filp_priv, + drm_radeon_state_t *state ) { RING_LOCALS; @@ -206,7 +474,7 @@ static void radeon_emit_state2( drm_rade ADVANCE_RING(); } - radeon_emit_state( dev_priv, &state->context, + return radeon_emit_state( dev_priv, filp_priv, &state->context, state->tex, state->dirty ); } @@ -1065,6 +1333,7 @@ static int radeon_cp_dispatch_texture( D drm_radeon_tex_image_t *image ) { drm_radeon_private_t *dev_priv = dev->dev_private; + drm_file_t *filp_priv; drm_buf_t *buf; u32 format; u32 *buffer; @@ -1074,6 +1343,13 @@ static int radeon_cp_dispatch_texture( D int i; RING_LOCALS; + DRM_GET_PRIV_WITH_RETURN( filp_priv, filp ); + + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) { + DRM_ERROR( "Invalid destination offset\n" ); + return DRM_ERR( EINVAL ); + } + dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD; /* Flush the pixel cache. This ensures no pixel data gets mixed @@ -1377,6 +1653,7 @@ int radeon_cp_vertex( DRM_IOCTL_ARGS ) { DRM_DEVICE; drm_radeon_private_t *dev_priv = dev->dev_private; + drm_file_t *filp_priv; drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; drm_device_dma_t *dma = dev->dma; drm_buf_t *buf; @@ -1390,6 +1667,8 @@ int radeon_cp_vertex( DRM_IOCTL_ARGS ) return DRM_ERR(EINVAL); } + DRM_GET_PRIV_WITH_RETURN( filp_priv, filp ); + DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t *)data, sizeof(vertex) ); @@ -1429,11 +1708,14 @@ int radeon_cp_vertex( DRM_IOCTL_ARGS ) buf->used = vertex.count; /* not used? */ if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) { - radeon_emit_state( dev_priv, - &sarea_priv->context_state, - sarea_priv->tex_state, - sarea_priv->dirty ); - + if ( radeon_emit_state( dev_priv, filp_priv, + &sarea_priv->context_state, + sarea_priv->tex_state, + sarea_priv->dirty ) ) { + DRM_ERROR( "radeon_emit_state failed\n" ); + return DRM_ERR( EINVAL ); + } + sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES | RADEON_UPLOAD_TEX1IMAGES | RADEON_UPLOAD_TEX2IMAGES | @@ -1461,6 +1743,7 @@ int radeon_cp_indices( DRM_IOCTL_ARGS ) { DRM_DEVICE; drm_radeon_private_t *dev_priv = dev->dev_private; + drm_file_t *filp_priv; drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; drm_device_dma_t *dma = dev->dma; drm_buf_t *buf; @@ -1475,6 +1758,8 @@ int radeon_cp_indices( DRM_IOCTL_ARGS ) return DRM_ERR(EINVAL); } + DRM_GET_PRIV_WITH_RETURN( filp_priv, filp ); + DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t *)data, sizeof(elts) ); @@ -1523,10 +1808,13 @@ int radeon_cp_indices( DRM_IOCTL_ARGS ) buf->used = elts.end; if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) { - radeon_emit_state( dev_priv, - &sarea_priv->context_state, - sarea_priv->tex_state, - sarea_priv->dirty ); + if ( radeon_emit_state( dev_priv, filp_priv, + &sarea_priv->context_state, + sarea_priv->tex_state, + sarea_priv->dirty ) ) { + DRM_ERROR( "radeon_emit_state failed\n" ); + return DRM_ERR( EINVAL ); + } sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES | RADEON_UPLOAD_TEX1IMAGES | @@ -1686,6 +1974,7 @@ int radeon_cp_vertex2( DRM_IOCTL_ARGS ) { DRM_DEVICE; drm_radeon_private_t *dev_priv = dev->dev_private; + drm_file_t *filp_priv; drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; drm_device_dma_t *dma = dev->dma; drm_buf_t *buf; @@ -1700,6 +1989,8 @@ int radeon_cp_vertex2( DRM_IOCTL_ARGS ) return DRM_ERR(EINVAL); } + DRM_GET_PRIV_WITH_RETURN( filp_priv, filp ); + DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t *)data, sizeof(vertex) ); @@ -1747,7 +2038,10 @@ int radeon_cp_vertex2( DRM_IOCTL_ARGS ) sizeof(state) ) ) return DRM_ERR(EFAULT); - radeon_emit_state2( dev_priv, &state ); + if ( radeon_emit_state2( dev_priv, filp_priv, &state ) ) { + DRM_ERROR( "radeon_emit_state2 failed\n" ); + return DRM_ERR( EINVAL ); + } laststate = prim.stateidx; } @@ -1784,6 +2078,7 @@ int radeon_cp_vertex2( DRM_IOCTL_ARGS ) static int radeon_emit_packets( drm_radeon_private_t *dev_priv, + drm_file_t *filp_priv, drm_radeon_cmd_header_t header, drm_radeon_cmd_buffer_t *cmdbuf ) { @@ -1798,8 +2093,15 @@ static int radeon_emit_packets( sz = packet[id].len; reg = packet[id].start; - if (sz * sizeof(int) > cmdbuf->bufsz) + if (sz * sizeof(int) > cmdbuf->bufsz) { + DRM_ERROR( "Packet size provided larger than data provided\n" ); return DRM_ERR(EINVAL); + } + + if ( radeon_check_and_fixup_packets( dev_priv, filp_priv, id, data ) ) { + DRM_ERROR( "Packet verification failed\n" ); + return DRM_ERR( EINVAL ); + } BEGIN_RING(sz+1); OUT_RING( CP_PACKET0( reg, (sz-1) ) ); @@ -1882,24 +2184,21 @@ static __inline__ int radeon_emit_vector static int radeon_emit_packet3( drm_device_t *dev, + drm_file_t *filp_priv, drm_radeon_cmd_buffer_t *cmdbuf ) { drm_radeon_private_t *dev_priv = dev->dev_private; - int cmdsz, tmp; - int *cmd = (int *)cmdbuf->buf; + unsigned int cmdsz; + int *cmd = (int *)cmdbuf->buf, ret; RING_LOCALS; - DRM_DEBUG("\n"); - if (DRM_GET_USER_UNCHECKED( tmp, &cmd[0])) - return DRM_ERR(EFAULT); - - cmdsz = 2 + ((tmp & RADEON_CP_PACKET_COUNT_MASK) >> 16); - - if ((tmp & 0xc0000000) != RADEON_CP_PACKET3 || - cmdsz * 4 > cmdbuf->bufsz) - return DRM_ERR(EINVAL); + if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv, + cmdbuf, &cmdsz ) ) ) { + DRM_ERROR( "Packet verification failed\n" ); + return ret; + } BEGIN_RING( cmdsz ); OUT_RING_USER_TABLE( cmd, cmdsz ); @@ -1912,27 +2211,25 @@ static int radeon_emit_packet3( drm_devi static int radeon_emit_packet3_cliprect( drm_device_t *dev, + drm_file_t *filp_priv, drm_radeon_cmd_buffer_t *cmdbuf, int orig_nbox ) { drm_radeon_private_t *dev_priv = dev->dev_private; drm_clip_rect_t box; - int cmdsz, tmp; - int *cmd = (int *)cmdbuf->buf; + unsigned int cmdsz; + int *cmd = (int *)cmdbuf->buf, ret; drm_clip_rect_t *boxes = cmdbuf->boxes; int i = 0; RING_LOCALS; DRM_DEBUG("\n"); - if (DRM_GET_USER_UNCHECKED( tmp, &cmd[0])) - return DRM_ERR(EFAULT); - - cmdsz = 2 + ((tmp & RADEON_CP_PACKET_COUNT_MASK) >> 16); - - if ((tmp & 0xc0000000) != RADEON_CP_PACKET3 || - cmdsz * 4 > cmdbuf->bufsz) - return DRM_ERR(EINVAL); + if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv, + cmdbuf, &cmdsz ) ) ) { + DRM_ERROR( "Packet verification failed\n" ); + return ret; + } if (!orig_nbox) goto out; @@ -2009,6 +2306,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) { DRM_DEVICE; drm_radeon_private_t *dev_priv = dev->dev_private; + drm_file_t *filp_priv; drm_device_dma_t *dma = dev->dma; drm_buf_t *buf = 0; int idx; @@ -2023,6 +2321,8 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) return DRM_ERR(EINVAL); } + DRM_GET_PRIV_WITH_RETURN( filp_priv, filp ); + DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t *)data, sizeof(cmdbuf) ); @@ -2053,7 +2353,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) switch (header.header.cmd_type) { case RADEON_CMD_PACKET: DRM_DEBUG("RADEON_CMD_PACKET\n"); - if (radeon_emit_packets( dev_priv, header, &cmdbuf )) { + if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_packets failed\n"); return DRM_ERR(EINVAL); } @@ -2096,7 +2396,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) case RADEON_CMD_PACKET3: DRM_DEBUG("RADEON_CMD_PACKET3\n"); - if (radeon_emit_packet3( dev, &cmdbuf )) { + if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) { DRM_ERROR("radeon_emit_packet3 failed\n"); return DRM_ERR(EINVAL); } @@ -2104,7 +2404,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) case RADEON_CMD_PACKET3_CLIP: DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n"); - if (radeon_emit_packet3_cliprect( dev, &cmdbuf, orig_nbox )) { + if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) { DRM_ERROR("radeon_emit_packet3_clip failed\n"); return DRM_ERR(EINVAL); } @@ -2214,3 +2514,31 @@ int radeon_cp_getparam( DRM_IOCTL_ARGS ) return 0; } + +int radeon_cp_setparam( DRM_IOCTL_ARGS ) { + DRM_DEVICE; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_file_t *filp_priv; + drm_radeon_setparam_t sp; + + if ( !dev_priv ) { + DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ ); + return DRM_ERR( EINVAL ); + } + + DRM_GET_PRIV_WITH_RETURN( filp_priv, filp ); + + DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t* )data, + sizeof( sp ) ); + + switch( sp.param ) { + case RADEON_SETPARAM_FB_LOCATION: + filp_priv->radeon_fb_delta = dev_priv->fb_location - sp.value; + break; + default: + DRM_DEBUG( "Invalid parameter %d\n", sp.param ); + return DRM_ERR( EINVAL ); + } + + return 0; +} --- linux-2.6.6-rc1/drivers/char/drm/sis.h 2003-09-27 18:57:44.000000000 -0700 +++ 25/drivers/char/drm/sis.h 2004-04-18 22:25:29.226387392 -0700 @@ -62,6 +62,13 @@ [DRM_IOCTL_NR(DRM_IOCTL_SIS_AGP_FREE)] = { sis_ioctl_agp_free, 1, 0 }, \ [DRM_IOCTL_NR(DRM_IOCTL_SIS_FB_INIT)] = { sis_fb_init, 1, 1 } +#define DRIVER_PCI_IDS \ + {0x1039, 0x0300, 0}, \ + {0x1039, 0x5300, 0}, \ + {0x1039, 0x6300, 0}, \ + {0x1039, 0x7300, 0}, \ + {0, 0, 0} + #define __HAVE_COUNTERS 5 /* Buffer customization: --- linux-2.6.6-rc1/drivers/char/drm/tdfx_drv.c 2003-06-14 12:18:21.000000000 -0700 +++ 25/drivers/char/drm/tdfx_drv.c 2004-04-18 22:25:29.227387240 -0700 @@ -34,47 +34,6 @@ #include "tdfx.h" #include "drmP.h" -#define DRIVER_AUTHOR "VA Linux Systems Inc." - -#define DRIVER_NAME "tdfx" -#define DRIVER_DESC "3dfx Banshee/Voodoo3+" -#define DRIVER_DATE "20010216" - -#define DRIVER_MAJOR 1 -#define DRIVER_MINOR 0 -#define DRIVER_PATCHLEVEL 0 - -#ifndef PCI_VENDOR_ID_3DFX -#define PCI_VENDOR_ID_3DFX 0x121A -#endif -#ifndef PCI_DEVICE_ID_3DFX_VOODOO5 -#define PCI_DEVICE_ID_3DFX_VOODOO5 0x0009 -#endif -#ifndef PCI_DEVICE_ID_3DFX_VOODOO4 -#define PCI_DEVICE_ID_3DFX_VOODOO4 0x0007 -#endif -#ifndef PCI_DEVICE_ID_3DFX_VOODOO3_3000 /* Voodoo3 3000 */ -#define PCI_DEVICE_ID_3DFX_VOODOO3_3000 0x0005 -#endif -#ifndef PCI_DEVICE_ID_3DFX_VOODOO3_2000 /* Voodoo3 3000 */ -#define PCI_DEVICE_ID_3DFX_VOODOO3_2000 0x0004 -#endif -#ifndef PCI_DEVICE_ID_3DFX_BANSHEE -#define PCI_DEVICE_ID_3DFX_BANSHEE 0x0003 -#endif - -static drm_pci_list_t DRM(idlist)[] = { - { PCI_VENDOR_ID_3DFX, PCI_DEVICE_ID_3DFX_BANSHEE }, - { PCI_VENDOR_ID_3DFX, PCI_DEVICE_ID_3DFX_VOODOO3_2000 }, - { PCI_VENDOR_ID_3DFX, PCI_DEVICE_ID_3DFX_VOODOO3_3000 }, - { PCI_VENDOR_ID_3DFX, PCI_DEVICE_ID_3DFX_VOODOO4 }, - { PCI_VENDOR_ID_3DFX, PCI_DEVICE_ID_3DFX_VOODOO5 }, - { 0, 0 } -}; - -#define DRIVER_CARD_LIST DRM(idlist) - - #include "drm_auth.h" #include "drm_bufs.h" #include "drm_context.h" --- linux-2.6.6-rc1/drivers/char/drm/tdfx.h 2003-06-14 12:18:22.000000000 -0700 +++ 25/drivers/char/drm/tdfx.h 2004-04-18 22:25:29.226387392 -0700 @@ -39,4 +39,22 @@ #define __HAVE_MTRR 1 #define __HAVE_CTX_BITMAP 1 +#define DRIVER_AUTHOR "VA Linux Systems Inc." + +#define DRIVER_NAME "tdfx" +#define DRIVER_DESC "3dfx Banshee/Voodoo3+" +#define DRIVER_DATE "20010216" + +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 0 +#define DRIVER_PATCHLEVEL 0 + +#define DRIVER_PCI_IDS \ + {0x121a, 0x0003, 0}, \ + {0x121a, 0x0004, 0}, \ + {0x121a, 0x0005, 0}, \ + {0x121a, 0x0007, 0}, \ + {0x121a, 0x0009, 0}, \ + {0, 0, 0} + #endif --- linux-2.6.6-rc1/drivers/char/dsp56k.c 2003-10-08 15:07:08.000000000 -0700 +++ 25/drivers/char/dsp56k.c 2004-04-18 22:25:24.691076864 -0700 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -149,6 +150,8 @@ static struct dsp56k_device { int tx_wsize, rx_wsize; } dsp56k; +static struct class_simple *dsp56k_class; + static int dsp56k_reset(void) { u_char status; @@ -502,6 +505,8 @@ static char banner[] __initdata = KERN_I static int __init dsp56k_init_driver(void) { + int err = 0; + if(!MACH_IS_ATARI || !ATARIHW_PRESENT(DSP56K)) { printk("DSP56k driver: Hardware not present\n"); return -ENODEV; @@ -511,17 +516,35 @@ static int __init dsp56k_init_driver(voi printk("DSP56k driver: Unable to register driver\n"); return -ENODEV; } + dsp56k_class = class_simple_create(THIS_MODULE, "dsp56k"); + if (IS_ERR(dsp56k_class)) { + err = PTR_ERR(dsp56k_class); + goto out_chrdev; + } + class_simple_device_add(dsp56k_class, MKDEV(DSP56K_MAJOR, 0), NULL, "dsp56k"); - devfs_mk_cdev(MKDEV(DSP56K_MAJOR, 0), + err = devfs_mk_cdev(MKDEV(DSP56K_MAJOR, 0), S_IFCHR | S_IRUSR | S_IWUSR, "dsp56k"); + if(err) + goto out_class; printk(banner); - return 0; + goto out; + +out_class: + class_simple_device_remove(MKDEV(DSP56K_MAJOR, 0)); + class_simple_destroy(dsp56k_class); +out_chrdev: + unregister_chrdev(DSP56K_MAJOR, "dsp56k"); +out: + return err; } module_init(dsp56k_init_driver); static void __exit dsp56k_cleanup_driver(void) { + class_simple_device_remove(MKDEV(DSP56K_MAJOR, 0)); + class_simple_destroy(dsp56k_class); unregister_chrdev(DSP56K_MAJOR, "dsp56k"); devfs_remove("dsp56k"); } --- linux-2.6.6-rc1/drivers/char/ftape/zftape/zftape-init.c 2003-09-08 13:58:57.000000000 -0700 +++ 25/drivers/char/ftape/zftape/zftape-init.c 2004-04-18 22:25:24.692076712 -0700 @@ -38,6 +38,7 @@ #include #include +#include #include "../zftape/zftape-init.h" #include "../zftape/zftape-read.h" @@ -103,6 +104,8 @@ static struct file_operations zft_cdev = .release = zft_close, }; +static struct class_simple *zft_class; + /* Open floppy tape device */ static int zft_open(struct inode *ino, struct file *filep) @@ -341,22 +344,29 @@ KERN_INFO "installing zftape VFS interface for ftape driver ..."); TRACE_CATCH(register_chrdev(QIC117_TAPE_MAJOR, "zft", &zft_cdev),); + zft_class = class_simple_create(THIS_MODULE, "zft"); for (i = 0; i < 4; i++) { + class_simple_device_add(zft_class, MKDEV(QIC117_TAPE_MAJOR, i), NULL, "qft%i", i); devfs_mk_cdev(MKDEV(QIC117_TAPE_MAJOR, i), S_IFCHR | S_IRUSR | S_IWUSR, "qft%i", i); + class_simple_device_add(zft_class, MKDEV(QIC117_TAPE_MAJOR, i + 4), NULL, "nqft%i", i); devfs_mk_cdev(MKDEV(QIC117_TAPE_MAJOR, i + 4), S_IFCHR | S_IRUSR | S_IWUSR, "nqft%i", i); + class_simple_device_add(zft_class, MKDEV(QIC117_TAPE_MAJOR, i + 16), NULL, "zqft%i", i); devfs_mk_cdev(MKDEV(QIC117_TAPE_MAJOR, i + 16), S_IFCHR | S_IRUSR | S_IWUSR, "zqft%i", i); + class_simple_device_add(zft_class, MKDEV(QIC117_TAPE_MAJOR, i + 20), NULL, "nzqft%i", i); devfs_mk_cdev(MKDEV(QIC117_TAPE_MAJOR, i + 20), S_IFCHR | S_IRUSR | S_IWUSR, "nzqft%i", i); + class_simple_device_add(zft_class, MKDEV(QIC117_TAPE_MAJOR, i + 32), NULL, "rawqft%i", i); devfs_mk_cdev(MKDEV(QIC117_TAPE_MAJOR, i + 32), S_IFCHR | S_IRUSR | S_IWUSR, "rawqft%i", i); + class_simple_device_add(zft_class, MKDEV(QIC117_TAPE_MAJOR, i + 36), NULL, "nrawrawqft%i", i); devfs_mk_cdev(MKDEV(QIC117_TAPE_MAJOR, i + 36), S_IFCHR | S_IRUSR | S_IWUSR, "nrawqft%i", i); @@ -386,12 +396,19 @@ static void zft_exit(void) } for (i = 0; i < 4; i++) { devfs_remove("qft%i", i); + class_simple_device_remove(MKDEV(QIC117_TAPE_MAJOR, i)); devfs_remove("nqft%i", i); + class_simple_device_remove(MKDEV(QIC117_TAPE_MAJOR, i + 4)); devfs_remove("zqft%i", i); + class_simple_device_remove(MKDEV(QIC117_TAPE_MAJOR, i + 16)); devfs_remove("nzqft%i", i); + class_simple_device_remove(MKDEV(QIC117_TAPE_MAJOR, i + 20)); devfs_remove("rawqft%i", i); + class_simple_device_remove(MKDEV(QIC117_TAPE_MAJOR, i + 32)); devfs_remove("nrawqft%i", i); + class_simple_device_remove(MKDEV(QIC117_TAPE_MAJOR, i + 36)); } + class_simple_destroy(zft_class); zft_uninit_mem(); /* release remaining memory, if any */ printk(KERN_INFO "zftape successfully unloaded.\n"); TRACE_EXIT; --- linux-2.6.6-rc1/drivers/char/ipmi/ipmi_si_intf.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/char/ipmi/ipmi_si_intf.c 2004-04-18 22:25:24.693076560 -0700 @@ -51,6 +51,7 @@ #include #include #include +#include #ifdef CONFIG_HIGH_RES_TIMERS #include # if defined(schedule_next_int) --- linux-2.6.6-rc1/drivers/char/istallion.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/char/istallion.c 2004-04-18 22:25:24.697075952 -0700 @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -795,6 +796,8 @@ static int stli_timeron; /*****************************************************************************/ +static struct class_simple *istallion_class; + #ifdef MODULE /* @@ -853,9 +856,12 @@ static void __exit istallion_module_exit return; } put_tty_driver(stli_serial); - for (i = 0; i < 4; i++) + for (i = 0; i < 4; i++) { devfs_remove("staliomem/%d", i); + class_simple_device_remove(MKDEV(STL_SIOMEMMAJOR, i)); + } devfs_remove("staliomem"); + class_simple_destroy(istallion_class); if ((i = unregister_chrdev(STL_SIOMEMMAJOR, "staliomem"))) printk("STALLION: failed to un-register serial memory device, " "errno=%d\n", -i); @@ -5310,10 +5316,13 @@ int __init stli_init(void) "device\n"); devfs_mk_dir("staliomem"); + istallion_class = class_simple_create(THIS_MODULE, "staliomem"); for (i = 0; i < 4; i++) { devfs_mk_cdev(MKDEV(STL_SIOMEMMAJOR, i), S_IFCHR | S_IRUSR | S_IWUSR, "staliomem/%d", i); + class_simple_device_add(istallion_class, MKDEV(STL_SIOMEMMAJOR, i), + NULL, "staliomem%d", i); } /* --- linux-2.6.6-rc1/drivers/char/keyboard.c 2004-02-17 20:48:42.000000000 -0800 +++ 25/drivers/char/keyboard.c 2004-04-18 22:25:30.335218824 -0700 @@ -1066,6 +1066,9 @@ void kbd_keycode(unsigned int keycode, i } if (sysrq_down && down && !rep) { handle_sysrq(kbd_sysrq_xlate[keycode], regs, tty); +#ifdef CONFIG_KGDB_SYSRQ + sysrq_down = 0; /* in case we miss the "up" event */ +#endif return; } #endif --- linux-2.6.6-rc1/drivers/char/mem.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/char/mem.c 2004-04-18 22:26:02.378347528 -0700 @@ -26,7 +26,6 @@ #include #include -#include #ifdef CONFIG_IA64 # include @@ -39,6 +38,7 @@ extern void fbmem_init(void); extern void tapechar_init(void); #endif +#ifdef pgprot_noncached /* * Architectures vary in how they handle caching for addresses * outside of main memory. @@ -64,7 +64,8 @@ static inline int uncached_access(struct && addr >= __pa(high_memory); #elif defined(CONFIG_IA64) /* - * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases. + * On ia64, we ignore O_SYNC because we cannot tolerate memory + * attribute aliases. */ return !(efi_mem_attributes(addr) & EFI_MEMORY_WB); #elif defined(CONFIG_PPC64) @@ -77,14 +78,15 @@ static inline int uncached_access(struct return !page_is_ram(addr); #else /* - * Accessing memory above the top the kernel knows about or through a file pointer - * that was marked O_SYNC will be done non-cached. + * Accessing memory above the top the kernel knows about or through a + * file pointer that was marked O_SYNC will be done non-cached. */ if (file->f_flags & O_SYNC) return 1; return addr >= __pa(high_memory); #endif } +#endif #ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE static inline int valid_phys_addr_range(unsigned long addr, size_t *count) @@ -181,28 +183,24 @@ static ssize_t write_mem(struct file * f return do_write_mem(__va(p), p, buf, count, ppos); } -static int mmap_mem(struct file * file, struct vm_area_struct * vma) +static int mmap_mem(struct file *file, struct vm_area_struct *vma) { unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; - int uncached; - uncached = uncached_access(file, offset); #ifdef pgprot_noncached - if (uncached) + if (uncached_access(file, offset)) vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); #endif - /* Don't try to swap out physical pages.. */ - vma->vm_flags |= VM_RESERVED; - /* - * Don't dump addresses that are not real memory to a core file. + * Don't try to swap out physical pages.. + * And treat /dev/mem mappings as "IO" regions: they may not + * describe valid pageframes. */ - if (uncached) - vma->vm_flags |= VM_IO; + vma->vm_flags |= VM_RESERVED|VM_IO; - if (remap_page_range(vma, vma->vm_start, offset, vma->vm_end-vma->vm_start, - vma->vm_page_prot)) + if (remap_page_range(vma, vma->vm_start, offset, + vma->vm_end-vma->vm_start, vma->vm_page_prot)) return -EAGAIN; return 0; } @@ -410,7 +408,7 @@ static inline size_t read_zero_pagealign if (count > size) count = size; - zap_page_range(vma, addr, count); + zap_page_range(vma, addr, count, NULL); zeromap_page_range(vma, addr, count, PAGE_COPY); size -= count; --- linux-2.6.6-rc1/drivers/char/stallion.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/char/stallion.c 2004-04-18 22:25:24.701075344 -0700 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -732,6 +733,8 @@ static struct file_operations stl_fsiome /*****************************************************************************/ +static struct class_simple *stallion_class; + #ifdef MODULE /* @@ -788,12 +791,15 @@ static void __exit stallion_module_exit( restore_flags(flags); return; } - for (i = 0; i < 4; i++) + for (i = 0; i < 4; i++) { devfs_remove("staliomem/%d", i); + class_simple_device_remove(MKDEV(STL_SIOMEMMAJOR, i)); + } devfs_remove("staliomem"); if ((i = unregister_chrdev(STL_SIOMEMMAJOR, "staliomem"))) printk("STALLION: failed to un-register serial memory device, " "errno=%d\n", -i); + class_simple_destroy(stallion_class); if (stl_tmpwritebuf != (char *) NULL) kfree(stl_tmpwritebuf); @@ -3181,10 +3187,12 @@ int __init stl_init(void) printk("STALLION: failed to register serial board device\n"); devfs_mk_dir("staliomem"); + stallion_class = class_simple_create(THIS_MODULE, "staliomem"); for (i = 0; i < 4; i++) { devfs_mk_cdev(MKDEV(STL_SIOMEMMAJOR, i), S_IFCHR|S_IRUSR|S_IWUSR, "staliomem/%d", i); + class_simple_device_add(stallion_class, MKDEV(STL_SIOMEMMAJOR, i), NULL, "staliomem%d", i); } stl_serial->owner = THIS_MODULE; --- linux-2.6.6-rc1/drivers/char/sysrq.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/drivers/char/sysrq.c 2004-04-18 22:25:30.335218824 -0700 @@ -35,6 +35,25 @@ #include #include +#ifdef CONFIG_KGDB_SYSRQ + +#define GDB_OP &kgdb_op +static void kgdb_sysrq(int key, struct pt_regs *pt_regs, struct tty_struct *tty) +{ + printk("kgdb sysrq\n"); + breakpoint(); +} + +static struct sysrq_key_op kgdb_op = { + .handler = kgdb_sysrq, + .help_msg = "kGdb|Fgdb", + .action_msg = "Debug breakpoint\n", +}; + +#else +#define GDB_OP NULL +#endif + extern void reset_vc(unsigned int); @@ -238,8 +257,8 @@ static struct sysrq_key_op *sysrq_key_ta /* c */ NULL, /* d */ NULL, /* e */ &sysrq_term_op, -/* f */ NULL, -/* g */ NULL, +/* f */ GDB_OP, +/* g */ GDB_OP, /* h */ NULL, /* i */ &sysrq_kill_op, /* j */ NULL, --- linux-2.6.6-rc1/drivers/char/tipar.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/char/tipar.c 2004-04-18 22:25:24.702075192 -0700 @@ -67,7 +67,7 @@ /* * Version Information */ -#define DRIVER_VERSION "1.17" +#define DRIVER_VERSION "1.19" #define DRIVER_AUTHOR "Romain Lievin " #define DRIVER_DESC "Device driver for TI/PC parallel link cables" #define DRIVER_LICENSE "GPL" @@ -361,10 +361,13 @@ tipar_ioctl(struct inode *inode, struct switch (cmd) { case IOCTL_TIPAR_DELAY: - delay = (int)arg; //get_user(delay, &arg); - break; + delay = (int)arg; //get_user(delay, &arg); + break; case IOCTL_TIPAR_TIMEOUT: - timeout = (int)arg; //get_user(timeout, &arg); + if (arg != 0) + timeout = (int)arg; + else + retval = -EINVAL; break; default: retval = -ENOTTY; @@ -399,7 +402,10 @@ tipar_setup(char *str) str = get_options(str, ARRAY_SIZE(ints), ints); if (ints[0] > 0) { - timeout = ints[1]; + if (ints[1] != 0) + timeout = ints[1]; + else + printk("tipar: wrong timeout value (0), using default value instead."); if (ints[0] > 1) { delay = ints[2]; } --- linux-2.6.6-rc1/drivers/char/tpqic02.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/char/tpqic02.c 2004-04-18 22:25:24.705074736 -0700 @@ -94,6 +94,7 @@ #include #include #include +#include #include #include @@ -229,6 +230,8 @@ static const char *format_names[] = { "600" /* untested. */ }; +static struct class_simple *tpqic02_class; + /* `exception_list' is needed for exception status reporting. * Exceptions 1..14 are defined by QIC-02 rev F. @@ -2696,23 +2699,32 @@ int __init qic02_tape_init(void) return -ENODEV; } + tpqic02_class = class_simple_create(THIS_MODULE, TPQIC02_NAME); + class_simple_device_add(tpqic02_class, MKDEV(QIC02_TAPE_MAJOR, 2), NULL, "ntpqic11"); devfs_mk_cdev(MKDEV(QIC02_TAPE_MAJOR, 2), S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, "ntpqic11"); + class_simple_device_add(tpqic02_class, MKDEV(QIC02_TAPE_MAJOR, 3), NULL, "tpqic11"); devfs_mk_cdev(MKDEV(QIC02_TAPE_MAJOR, 3), S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, "tpqic11"); + class_simple_device_add(tpqic02_class, MKDEV(QIC02_TAPE_MAJOR, 4), NULL, "ntpqic24"); devfs_mk_cdev(MKDEV(QIC02_TAPE_MAJOR, 4), S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, "ntpqic24"); + class_simple_device_add(tpqic02_class, MKDEV(QIC02_TAPE_MAJOR, 5), NULL, "tpqic24"); devfs_mk_cdev(MKDEV(QIC02_TAPE_MAJOR, 5), S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, "tpqic24"); + class_simple_device_add(tpqic02_class, MKDEV(QIC02_TAPE_MAJOR, 6), NULL, "ntpqic20"); devfs_mk_cdev(MKDEV(QIC02_TAPE_MAJOR, 6), S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, "ntpqic120"); + class_simple_device_add(tpqic02_class, MKDEV(QIC02_TAPE_MAJOR, 7), NULL, "tpqic20"); devfs_mk_cdev(MKDEV(QIC02_TAPE_MAJOR, 7), S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, "tpqic120"); + class_simple_device_add(tpqic02_class, MKDEV(QIC02_TAPE_MAJOR, 8), NULL, "ntpqic50"); devfs_mk_cdev(MKDEV(QIC02_TAPE_MAJOR, 8), S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, "ntpqic150"); + class_simple_device_add(tpqic02_class, MKDEV(QIC02_TAPE_MAJOR, 9), NULL, "tpqic50"); devfs_mk_cdev(MKDEV(QIC02_TAPE_MAJOR, 9), S_IFCHR|S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP, "tpqic150"); @@ -2757,13 +2769,23 @@ static void qic02_module_exit(void) qic02_release_resources(); devfs_remove("ntpqic11"); + class_simple_device_remove(MKDEV(QIC02_TAPE_MAJOR, 2)); devfs_remove("tpqic11"); + class_simple_device_remove(MKDEV(QIC02_TAPE_MAJOR, 3)); devfs_remove("ntpqic24"); + class_simple_device_remove(MKDEV(QIC02_TAPE_MAJOR, 4)); devfs_remove("tpqic24"); + class_simple_device_remove(MKDEV(QIC02_TAPE_MAJOR, 5)); devfs_remove("ntpqic120"); + class_simple_device_remove(MKDEV(QIC02_TAPE_MAJOR, 6)); devfs_remove("tpqic120"); + class_simple_device_remove(MKDEV(QIC02_TAPE_MAJOR, 7)); devfs_remove("ntpqic150"); + class_simple_device_remove(MKDEV(QIC02_TAPE_MAJOR, 8)); devfs_remove("tpqic150"); + class_simple_device_remove(MKDEV(QIC02_TAPE_MAJOR, 9)); + + class_simple_destroy(tpqic02_class); } static int qic02_module_init(void) --- linux-2.6.6-rc1/drivers/char/vc_screen.c 2003-09-08 13:58:57.000000000 -0700 +++ 25/drivers/char/vc_screen.c 2004-04-18 22:25:24.706074584 -0700 @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -469,6 +470,8 @@ static struct file_operations vcs_fops = .open = vcs_open, }; +static struct class_simple *vc_class; + void vcs_make_devfs(struct tty_struct *tty) { devfs_mk_cdev(MKDEV(VCS_MAJOR, tty->index + 1), @@ -477,19 +480,26 @@ void vcs_make_devfs(struct tty_struct *t devfs_mk_cdev(MKDEV(VCS_MAJOR, tty->index + 129), S_IFCHR|S_IRUSR|S_IWUSR, "vcc/a%u", tty->index + 1); + class_simple_device_add(vc_class, MKDEV(VCS_MAJOR, tty->index + 1), NULL, "vcs%u", tty->index + 1); + class_simple_device_add(vc_class, MKDEV(VCS_MAJOR, tty->index + 129), NULL, "vcsa%u", tty->index + 1); } void vcs_remove_devfs(struct tty_struct *tty) { devfs_remove("vcc/%u", tty->index + 1); devfs_remove("vcc/a%u", tty->index + 1); + class_simple_device_remove(MKDEV(VCS_MAJOR, tty->index + 1)); + class_simple_device_remove(MKDEV(VCS_MAJOR, tty->index + 129)); } int __init vcs_init(void) { if (register_chrdev(VCS_MAJOR, "vcs", &vcs_fops)) panic("unable to get major %d for vcs device", VCS_MAJOR); + vc_class = class_simple_create(THIS_MODULE, "vc"); devfs_mk_cdev(MKDEV(VCS_MAJOR, 0), S_IFCHR|S_IRUSR|S_IWUSR, "vcc/0"); devfs_mk_cdev(MKDEV(VCS_MAJOR, 128), S_IFCHR|S_IRUSR|S_IWUSR, "vcc/a0"); + class_simple_device_add(vc_class, MKDEV(VCS_MAJOR, 0), NULL, "vcs"); + class_simple_device_add(vc_class, MKDEV(VCS_MAJOR, 128), NULL, "vcsa"); return 0; } --- linux-2.6.6-rc1/drivers/char/vt.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/char/vt.c 2004-04-18 22:25:24.708074280 -0700 @@ -2617,6 +2617,8 @@ static struct tty_operations con_ops = { int __init vty_init(void) { + vcs_init(); + console_driver = alloc_tty_driver(MAX_NR_CONSOLES); if (!console_driver) panic("Couldn't allocate console driver\n"); @@ -2644,7 +2646,6 @@ int __init vty_init(void) #ifdef CONFIG_FRAMEBUFFER_CONSOLE fb_console_init(); #endif - vcs_init(); return 0; } --- linux-2.6.6-rc1/drivers/char/watchdog/amd7xx_tco.c 2004-04-03 20:39:12.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,374 +0,0 @@ -/* - * AMD 766/768 TCO Timer Driver - * (c) Copyright 2002 Zwane Mwaikambo - * All Rights Reserved. - * - * Parts from; - * Hardware driver for the AMD 768 Random Number Generator (RNG) - * (c) Copyright 2001 Red Hat Inc - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * The author(s) of this software shall not be held liable for damages - * of any nature resulting due to the use of this software. This - * software is provided AS-IS with no warranties. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define AMDTCO_MODULE_VER "build 20021116" -#define AMDTCO_MODULE_NAME "amd7xx_tco" -#define PFX AMDTCO_MODULE_NAME ": " - -#define MAX_TIMEOUT 38 /* max of 38 seconds, although the system will only - * reset itself after the second timeout */ - -/* pmbase registers */ -#define TCO_RELOAD_REG 0x40 /* bits 0-5 are current count, 6-7 are reserved */ -#define TCO_INITVAL_REG 0x41 /* bits 0-5 are value to load, 6-7 are reserved */ -#define TCO_TIMEOUT_MASK 0x3f -#define TCO_STATUS1_REG 0x44 -#define TCO_STATUS2_REG 0x46 -#define NDTO_STS2 (1 << 1) /* we're interested in the second timeout */ -#define BOOT_STS (1 << 2) /* will be set if NDTO_STS2 was set before reboot */ -#define TCO_CTRL1_REG 0x48 -#define TCO_HALT (1 << 11) -#define NO_REBOOT (1 << 10) /* in DevB:3x48 */ - -static char banner[] __initdata = KERN_INFO PFX AMDTCO_MODULE_VER "\n"; -static int timeout = MAX_TIMEOUT; -static u32 pmbase; /* PMxx I/O base */ -static struct pci_dev *dev; -static struct semaphore open_sem; -static spinlock_t amdtco_lock; /* only for device access */ -static char expect_close; - -module_param(timeout, int, 0); -MODULE_PARM_DESC(timeout, "range is 0-38 seconds, default is 38"); - -#ifdef CONFIG_WATCHDOG_NOWAYOUT -static int nowayout = 1; -#else -static int nowayout = 0; -#endif - -module_param(nowayout, int, 0); -MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=CONFIG_WATCHDOG_NOWAYOUT)"); - -static inline u8 seconds_to_ticks(int seconds) -{ - /* the internal timer is stored as ticks which decrement - * every 0.6 seconds */ - return (seconds * 10) / 6; -} - -static inline int ticks_to_seconds(u8 ticks) -{ - return (ticks * 6) / 10; -} - -static inline int amdtco_status(void) -{ - u16 reg; - int status = 0; - - reg = inb(pmbase+TCO_CTRL1_REG); - if ((reg & TCO_HALT) == 0) - status |= WDIOF_KEEPALIVEPING; - - reg = inb(pmbase+TCO_STATUS2_REG); - if (reg & BOOT_STS) - status |= WDIOF_CARDRESET; - - return status; -} - -static inline void amdtco_ping(void) -{ - outb(1, pmbase+TCO_RELOAD_REG); -} - -static inline int amdtco_gettimeout(void) -{ - u8 reg = inb(pmbase+TCO_RELOAD_REG) & TCO_TIMEOUT_MASK; - return ticks_to_seconds(reg); -} - -static inline void amdtco_settimeout(unsigned int timeout) -{ - u8 reg = seconds_to_ticks(timeout) & TCO_TIMEOUT_MASK; - outb(reg, pmbase+TCO_INITVAL_REG); -} - -static inline void amdtco_global_enable(void) -{ - u16 reg; - - spin_lock(&amdtco_lock); - - /* clear NO_REBOOT on DevB:3x48 p97 */ - pci_read_config_word(dev, 0x48, ®); - reg &= ~NO_REBOOT; - pci_write_config_word(dev, 0x48, reg); - - spin_unlock(&amdtco_lock); -} - -static inline void amdtco_enable(void) -{ - u16 reg; - - spin_lock(&amdtco_lock); - reg = inw(pmbase+TCO_CTRL1_REG); - reg &= ~TCO_HALT; - outw(reg, pmbase+TCO_CTRL1_REG); - spin_unlock(&amdtco_lock); -} - -static inline void amdtco_disable(void) -{ - u16 reg; - - spin_lock(&amdtco_lock); - reg = inw(pmbase+TCO_CTRL1_REG); - reg |= TCO_HALT; - outw(reg, pmbase+TCO_CTRL1_REG); - spin_unlock(&amdtco_lock); -} - -static int amdtco_fop_open(struct inode *inode, struct file *file) -{ - if (down_trylock(&open_sem)) - return -EBUSY; - - if (timeout > MAX_TIMEOUT) - timeout = MAX_TIMEOUT; - - amdtco_disable(); - amdtco_settimeout(timeout); - amdtco_global_enable(); - amdtco_enable(); - amdtco_ping(); - printk(KERN_INFO PFX "Watchdog enabled, timeout = %ds of %ds\n", - amdtco_gettimeout(), timeout); - - return 0; -} - - -static int amdtco_fop_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) -{ - int new_timeout; - int tmp; - - static struct watchdog_info ident = { - .options = WDIOF_SETTIMEOUT | WDIOF_CARDRESET, - .identity = "AMD 766/768", - }; - - switch (cmd) { - default: - return -ENOIOCTLCMD; - - case WDIOC_GETSUPPORT: - if (copy_to_user((struct watchdog_info *)arg, &ident, sizeof ident)) - return -EFAULT; - return 0; - - case WDIOC_GETSTATUS: - return put_user(amdtco_status(), (int *)arg); - - case WDIOC_KEEPALIVE: - amdtco_ping(); - return 0; - - case WDIOC_SETTIMEOUT: - if (get_user(new_timeout, (int *)arg)) - return -EFAULT; - - if (new_timeout < 0) - return -EINVAL; - - if (new_timeout > MAX_TIMEOUT) - new_timeout = MAX_TIMEOUT; - - timeout = new_timeout; - amdtco_settimeout(timeout); - /* fall through and return the new timeout */ - - case WDIOC_GETTIMEOUT: - return put_user(amdtco_gettimeout(), (int *)arg); - - case WDIOC_SETOPTIONS: - if (copy_from_user(&tmp, (int *)arg, sizeof tmp)) - return -EFAULT; - - if (tmp & WDIOS_DISABLECARD) - amdtco_disable(); - - if (tmp & WDIOS_ENABLECARD) - amdtco_enable(); - - return 0; - } -} - - -static int amdtco_fop_release(struct inode *inode, struct file *file) -{ - if (expect_close == 42) { - amdtco_disable(); - printk(KERN_INFO PFX "Watchdog disabled\n"); - } else { - amdtco_ping(); - printk(KERN_CRIT PFX "Unexpected close!, timeout in %d seconds\n", timeout); - } - - expect_close = 0; - up(&open_sem); - return 0; -} - - -static ssize_t amdtco_fop_write(struct file *file, const char *data, size_t len, loff_t *ppos) -{ - if (ppos != &file->f_pos) - return -ESPIPE; - - if (len) { - if (!nowayout) { - size_t i; - char c; - expect_close = 0; - - for (i = 0; i != len; i++) { - if (get_user(c, data + i)) - return -EFAULT; - - if (c == 'V') - expect_close = 42; - } - } - amdtco_ping(); - } - - return len; -} - - -static int amdtco_notify_sys(struct notifier_block *this, unsigned long code, void *unused) -{ - if (code == SYS_DOWN || code == SYS_HALT) - amdtco_disable(); - - return NOTIFY_DONE; -} - - -static struct notifier_block amdtco_notifier = -{ - .notifier_call = amdtco_notify_sys, -}; - -static struct file_operations amdtco_fops = -{ - .owner = THIS_MODULE, - .write = amdtco_fop_write, - .ioctl = amdtco_fop_ioctl, - .open = amdtco_fop_open, - .release = amdtco_fop_release, -}; - -static struct miscdevice amdtco_miscdev = -{ - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &amdtco_fops, -}; - -static struct pci_device_id amdtco_pci_tbl[] = { - /* AMD 766 PCI_IDs here */ - { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_OPUS_7443, PCI_ANY_ID, PCI_ANY_ID, }, - { 0, }, -}; - -MODULE_DEVICE_TABLE (pci, amdtco_pci_tbl); - -static int __init amdtco_init(void) -{ - int ret; - - sema_init(&open_sem, 1); - spin_lock_init(&amdtco_lock); - - dev = NULL; - while ((dev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - if (pci_match_device (amdtco_pci_tbl, dev) != NULL) - goto found_one; - } - - return -ENODEV; - -found_one: - - if ((ret = register_reboot_notifier(&amdtco_notifier))) { - printk(KERN_ERR PFX "Unable to register reboot notifier err = %d\n", ret); - goto out_clean; - } - - if ((ret = misc_register(&amdtco_miscdev))) { - printk(KERN_ERR PFX "Unable to register miscdev on minor %d\n", WATCHDOG_MINOR); - goto out_unreg_reboot; - } - - pci_read_config_dword(dev, 0x58, &pmbase); - pmbase &= 0x0000FF00; - - if (pmbase == 0) { - printk (KERN_ERR PFX "power management base not set\n"); - ret = -EIO; - goto out_unreg_misc; - } - - /* ret = 0; */ - printk(banner); - goto out_clean; - -out_unreg_misc: - misc_deregister(&amdtco_miscdev); -out_unreg_reboot: - unregister_reboot_notifier(&amdtco_notifier); -out_clean: - return ret; -} - -static void __exit amdtco_exit(void) -{ - misc_deregister(&amdtco_miscdev); - unregister_reboot_notifier(&amdtco_notifier); -} - -module_init(amdtco_init); -module_exit(amdtco_exit); - -MODULE_AUTHOR("Zwane Mwaikambo "); -MODULE_DESCRIPTION("AMD 766/768 TCO Timer Driver"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); --- linux-2.6.6-rc1/drivers/char/watchdog/Kconfig 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/char/watchdog/Kconfig 2004-04-18 22:25:57.800043536 -0700 @@ -143,19 +143,6 @@ config ALIM7101_WDT Most people will say N. -config AMD7XX_TCO - tristate "AMD 766/768 TCO Timer/Watchdog" - depends on WATCHDOG && X86 && PCI - help - This is the driver for the hardware watchdog built in to the - AMD 766/768 chipsets. - This watchdog simply watches your kernel to make sure it doesn't - freeze, and if it does, it reboots your computer after a certain - amount of time. - - You can compile this driver directly into the kernel, or use - it as a module. The module will be called amd7xx_tco. - config SC520_WDT tristate "AMD Elan SC520 processor Watchdog" depends on WATCHDOG && X86 --- linux-2.6.6-rc1/drivers/char/watchdog/Makefile 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/char/watchdog/Makefile 2004-04-18 22:25:57.800043536 -0700 @@ -32,7 +32,6 @@ obj-$(CONFIG_ALIM1535_WDT) += alim1535_w obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o obj-$(CONFIG_CPU5_WDT) += cpu5wdt.o -obj-$(CONFIG_AMD7XX_TCO) += amd7xx_tco.o obj-$(CONFIG_INDYDOG) += indydog.o obj-$(CONFIG_PCIPCWATCHDOG) += pcwd_pci.o obj-$(CONFIG_USBPCWATCHDOG) += pcwd_usb.o --- linux-2.6.6-rc1/drivers/cpufreq/cpufreq_userspace.c 2004-02-17 20:48:43.000000000 -0800 +++ 25/drivers/cpufreq/cpufreq_userspace.c 2004-04-18 22:25:51.594986848 -0700 @@ -167,7 +167,7 @@ cpufreq_procctl(ctl_table *ctl, int writ void __user *buffer, size_t *lenp) { char buf[16], *p; - int cpu = (int) ctl->extra1; + int cpu = (long)ctl->extra1; int len, left = *lenp; if (!left || (filp->f_pos && !write) || !cpu_online(cpu)) { @@ -205,7 +205,7 @@ cpufreq_sysctl(ctl_table *table, int __u void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, void **context) { - int cpu = (int) table->extra1; + int cpu = (long)table->extra1; if (!cpu_online(cpu)) return -EINVAL; --- linux-2.6.6-rc1/drivers/firmware/edd.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/firmware/edd.c 2004-04-18 22:25:24.710073976 -0700 @@ -125,13 +125,15 @@ static struct sysfs_ops edd_attr_ops = { static ssize_t edd_show_host_bus(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; int i; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } for (i = 0; i < 4; i++) { if (isprint(info->params.host_bus_type[i])) { @@ -169,13 +171,15 @@ edd_show_host_bus(struct edd_device *ede static ssize_t edd_show_interface(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; int i; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } for (i = 0; i < 8; i++) { if (isprint(info->params.interface_type[i])) { @@ -231,11 +235,13 @@ edd_show_interface(struct edd_device *ed static ssize_t edd_show_raw_data(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; ssize_t len = sizeof (info->params); - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } if (!(info->params.key == 0xBEDD || info->params.key == 0xDDBE)) len = info->params.length; @@ -251,11 +257,13 @@ edd_show_raw_data(struct edd_device *ede static ssize_t edd_show_version(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } p += scnprintf(p, left, "0x%02x\n", info->version); return (p - buf); @@ -272,11 +280,13 @@ edd_show_disk80_sig(struct edd_device *e static ssize_t edd_show_extensions(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } if (info->interface_support & EDD_EXT_FIXED_DISK_ACCESS) { p += scnprintf(p, left, "Fixed disk access\n"); @@ -296,11 +306,13 @@ edd_show_extensions(struct edd_device *e static ssize_t edd_show_info_flags(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } if (info->params.info_flags & EDD_INFO_DMA_BOUNDARY_ERROR_TRANSPARENT) p += scnprintf(p, left, "DMA boundary error transparent\n"); @@ -324,11 +336,13 @@ edd_show_info_flags(struct edd_device *e static ssize_t edd_show_legacy_cylinders(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } p += snprintf(p, left, "0x%x\n", info->legacy_cylinders); return (p - buf); @@ -337,11 +351,13 @@ edd_show_legacy_cylinders(struct edd_dev static ssize_t edd_show_legacy_heads(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } p += snprintf(p, left, "0x%x\n", info->legacy_heads); return (p - buf); @@ -350,11 +366,13 @@ edd_show_legacy_heads(struct edd_device static ssize_t edd_show_legacy_sectors(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } p += snprintf(p, left, "0x%x\n", info->legacy_sectors); return (p - buf); @@ -363,11 +381,13 @@ edd_show_legacy_sectors(struct edd_devic static ssize_t edd_show_default_cylinders(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } p += scnprintf(p, left, "0x%x\n", info->params.num_default_cylinders); return (p - buf); @@ -376,11 +396,13 @@ edd_show_default_cylinders(struct edd_de static ssize_t edd_show_default_heads(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } p += scnprintf(p, left, "0x%x\n", info->params.num_default_heads); return (p - buf); @@ -389,11 +411,13 @@ edd_show_default_heads(struct edd_device static ssize_t edd_show_default_sectors_per_track(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } p += scnprintf(p, left, "0x%x\n", info->params.sectors_per_track); return (p - buf); @@ -402,11 +426,13 @@ edd_show_default_sectors_per_track(struc static ssize_t edd_show_sectors(struct edd_device *edev, char *buf) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; char *p = buf; - if (!edev || !info || !buf) { + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info || !buf) return -EINVAL; - } p += scnprintf(p, left, "0x%llx\n", info->params.number_of_sectors); return (p - buf); @@ -426,8 +452,11 @@ edd_show_sectors(struct edd_device *edev static int edd_has_legacy_cylinders(struct edd_device *edev) { - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) + struct edd_info *info; + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info) return -EINVAL; return info->legacy_cylinders > 0; } @@ -435,8 +464,11 @@ edd_has_legacy_cylinders(struct edd_devi static int edd_has_legacy_heads(struct edd_device *edev) { - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) + struct edd_info *info; + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info) return -EINVAL; return info->legacy_heads > 0; } @@ -444,8 +476,11 @@ edd_has_legacy_heads(struct edd_device * static int edd_has_legacy_sectors(struct edd_device *edev) { - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) + struct edd_info *info; + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info) return -EINVAL; return info->legacy_sectors > 0; } @@ -453,8 +488,11 @@ edd_has_legacy_sectors(struct edd_device static int edd_has_default_cylinders(struct edd_device *edev) { - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) + struct edd_info *info; + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info) return -EINVAL; return info->params.num_default_cylinders > 0; } @@ -462,8 +500,11 @@ edd_has_default_cylinders(struct edd_dev static int edd_has_default_heads(struct edd_device *edev) { - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) + struct edd_info *info; + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info) return -EINVAL; return info->params.num_default_heads > 0; } @@ -471,8 +512,11 @@ edd_has_default_heads(struct edd_device static int edd_has_default_sectors_per_track(struct edd_device *edev) { - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) + struct edd_info *info; + if (!edev) + return -EINVAL; + info = edd_dev_get_info(edev); + if (!info) return -EINVAL; return info->params.sectors_per_track > 0; } @@ -480,11 +524,14 @@ edd_has_default_sectors_per_track(struct static int edd_has_edd30(struct edd_device *edev) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; int i, nonzero_path = 0; char c; - if (!edev || !info) + if (!edev) + return 0; + info = edd_dev_get_info(edev); + if (!info) return 0; if (!(info->params.key == 0xBEDD || info->params.key == 0xDDBE)) { @@ -508,8 +555,11 @@ edd_has_edd30(struct edd_device *edev) static int edd_has_disk80_sig(struct edd_device *edev) { - struct edd_info *info = edd_dev_get_info(edev); - if (!edev || !info) + struct edd_info *info; + if (!edev) + return 0; + info = edd_dev_get_info(edev); + if (!info) return 0; return info->device == 0x80; } @@ -597,9 +647,12 @@ static decl_subsys(edd,&ktype_edd,NULL); static int edd_dev_is_type(struct edd_device *edev, const char *type) { - struct edd_info *info = edd_dev_get_info(edev); + struct edd_info *info; + if (!edev) + return 0; + info = edd_dev_get_info(edev); - if (edev && type && info) { + if (type && info) { if (!strncmp(info->params.host_bus_type, type, strlen(type)) || !strncmp(info->params.interface_type, type, strlen(type))) return 1; --- linux-2.6.6-rc1/drivers/ide/ide.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/ide/ide.c 2004-04-18 22:26:01.548473688 -0700 @@ -203,34 +203,23 @@ static void setup_driver_defaults(ide_dr /* * Do not even *think* about calling this! */ -static void init_hwif_data (unsigned int index) +static void init_hwif_data(ide_hwif_t *hwif, unsigned int index) { unsigned int unit; - hw_regs_t hw; - ide_hwif_t *hwif = &ide_hwifs[index]; /* bulk initialize hwif & drive info with zeros */ memset(hwif, 0, sizeof(ide_hwif_t)); - memset(&hw, 0, sizeof(hw_regs_t)); /* fill in any non-zero initial values */ - hwif->index = index; - ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, &hwif->irq); - memcpy(&hwif->hw, &hw, sizeof(hw)); - memcpy(hwif->io_ports, hw.io_ports, sizeof(hw.io_ports)); - hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET]; -#ifdef CONFIG_BLK_DEV_HD - if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) - hwif->noprobe = 1; /* may be overridden by ide_setup() */ -#endif /* CONFIG_BLK_DEV_HD */ + hwif->index = index; hwif->major = ide_hwif_to_major[index]; + hwif->name[0] = 'i'; hwif->name[1] = 'd'; hwif->name[2] = 'e'; hwif->name[3] = '0' + index; - hwif->bus_state = BUSSTATE_ON; - hwif->reset_poll= NULL; - hwif->pre_reset = NULL; + + hwif->bus_state = BUSSTATE_ON; hwif->atapi_dma = 0; /* disable all atapi dma */ hwif->ultra_mask = 0x80; /* disable all ultra */ @@ -265,6 +254,24 @@ static void init_hwif_data (unsigned int } } +static void init_hwif_default(ide_hwif_t *hwif, unsigned int index) +{ + hw_regs_t hw; + + memset(&hw, 0, sizeof(hw_regs_t)); + + ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, &hwif->irq); + + memcpy(&hwif->hw, &hw, sizeof(hw)); + memcpy(hwif->io_ports, hw.io_ports, sizeof(hw.io_ports)); + + hwif->noprobe = !hwif->io_ports[IDE_DATA_OFFSET]; +#ifdef CONFIG_BLK_DEV_HD + if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA) + hwif->noprobe = 1; /* may be overridden by ide_setup() */ +#endif +} + /* * init_ide_data() sets reasonable default values into all fields * of all instances of the hwifs and drives, but only on the first call. @@ -285,6 +292,7 @@ static void init_hwif_data (unsigned int #define MAGIC_COOKIE 0x12345678 static void __init init_ide_data (void) { + ide_hwif_t *hwif; unsigned int index; static unsigned long magic_cookie = MAGIC_COOKIE; @@ -295,13 +303,21 @@ static void __init init_ide_data (void) setup_driver_defaults(&idedefault_driver); /* Initialise all interface structures */ - for (index = 0; index < MAX_HWIFS; ++index) - init_hwif_data(index); + for (index = 0; index < MAX_HWIFS; ++index) { + hwif = &ide_hwifs[index]; + init_hwif_data(hwif, index); + init_hwif_default(hwif, index); + hwif->irq = hwif->hw.irq = + ide_init_default_irq(hwif->io_ports[IDE_DATA_OFFSET]); + } +/* OBSOLETE: still needed on arm26 and arm */ +#ifdef CONFIG_ARM /* Add default hw interfaces */ initializing = 1; ide_init_default_hwifs(); initializing = 0; +#endif } /* @@ -569,8 +585,6 @@ void ide_hwif_release_regions(ide_hwif_t EXPORT_SYMBOL(ide_hwif_release_regions); -extern void init_hwif_data(unsigned int index); - /** * ide_unregister - free an ide interface * @index: index of interface (will change soon to a pointer) @@ -750,7 +764,10 @@ void ide_unregister (unsigned int index) } old_hwif = *hwif; - init_hwif_data(index); /* restore hwif data to pristine status */ + + init_hwif_data(hwif, index); /* restore hwif data to pristine status */ + init_hwif_default(hwif, index); + hwif->hwgroup = old_hwif.hwgroup; hwif->gendev.parent = old_hwif.gendev.parent; @@ -952,8 +969,10 @@ int ide_register_hw (hw_regs_t *hw, ide_ found: if (hwif->present) ide_unregister(index); - else if (!hwif->hold) - init_hwif_data(index); + else if (!hwif->hold) { + init_hwif_data(hwif, index); + init_hwif_default(hwif, index); + } if (hwif->present) return -1; memcpy(&hwif->hw, hw, sizeof(*hw)); --- linux-2.6.6-rc1/drivers/ide/ide-probe.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ide/ide-probe.c 2004-04-18 22:25:43.714184912 -0700 @@ -103,7 +103,8 @@ static inline int drive_is_flashcard (id if (id->config == 0x848a) return 1; /* CompactFlash */ if (!strncmp(id->model, "KODAK ATA_FLASH", 15) /* Kodak */ || !strncmp(id->model, "Hitachi CV", 10) /* Hitachi */ - || !strncmp(id->model, "SunDisk SDCFB", 13) /* SunDisk */ + || !strncmp(id->model, "SunDisk SDCFB", 13) /* old SanDisk */ + || !strncmp(id->model, "SanDisk SDCFB", 13) /* SanDisk */ || !strncmp(id->model, "HAGIWARA HPC", 12) /* Hagiwara */ || !strncmp(id->model, "LEXAR ATA_FLASH", 15) /* Lexar */ || !strncmp(id->model, "ATA_FLASH", 9)) /* Simple Tech */ @@ -917,8 +918,12 @@ static int ide_init_queue(ide_drive_t *d q->queuedata = HWGROUP(drive); blk_queue_segment_boundary(q, 0xffff); - if (!hwif->rqsize) - hwif->rqsize = hwif->no_lba48 ? 256 : 65536; + if (!hwif->rqsize) { + if (hwif->max_rqsize) + hwif->rqsize = hwif->max_rqsize(drive); + else + hwif->rqsize = hwif->no_lba48 ? 256 : 65536; + } if (hwif->rqsize < max_sectors) max_sectors = hwif->rqsize; blk_queue_max_sectors(q, max_sectors); --- linux-2.6.6-rc1/drivers/ide/pci/siimage.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ide/pci/siimage.c 2004-04-18 22:25:43.715184760 -0700 @@ -203,13 +203,12 @@ static byte siimage_ratemask (ide_drive_ else pci_read_config_byte(hwif->pci_dev, 0x8A, &scsc); - if(is_sata(hwif)) - { - if(strstr(drive->id->model, "Maxtor")) + if (is_sata(hwif)) { + if (strstr(drive->id->model, "Maxtor 4D060H3")) return 3; return 4; } - + if ((scsc & 0x30) == 0x10) /* 133 */ mode = 4; else if ((scsc & 0x30) == 0x20) /* 2xPCI */ @@ -1046,25 +1045,34 @@ static void __init init_mmio_iops_siimag hwif->mmio = 2; } -static int is_dev_seagate_sata(ide_drive_t *drive) +/* TODO firmware versions should be added - eric */ +static const char * sil_blacklist [] = { + "ST320012AS", + "ST330013AS", + "ST340017AS", + "ST360015AS", + "ST380023AS", + "ST3120023AS", + "ST340014ASL", + "ST360014ASL", + "ST380011ASL", + "ST3120022ASL", + "ST3160021ASL", +}; + +static unsigned int siimage_sata_max_rqsize(ide_drive_t *drive) { const char *s = &drive->id->model[0]; - unsigned len; + unsigned int n; - if (!drive->present) - return 0; - - len = strnlen(s, sizeof(drive->id->model)); - - if ((len > 4) && (!memcmp(s, "ST", 2))) { - if ((!memcmp(s + len - 2, "AS", 2)) || - (!memcmp(s + len - 3, "ASL", 3))) { - printk(KERN_INFO "%s: applying pessimistic Seagate " - "errata fix\n", drive->name); - return 1; + for (n = 0; n < ARRAY_SIZE(sil_blacklist); n++) + if (!memcmp(sil_blacklist[n], s, strlen(sil_blacklist[n]))) { + printk(KERN_INFO "%s: applying Seagate errata fix\n", + drive->name); + return 15; } - } - return 0; + + return 128; } /** @@ -1087,9 +1095,10 @@ static void __init init_iops_siimage (id hwif->hwif_data = 0; - hwif->rqsize = 128; - if (is_sata(hwif) && is_dev_seagate_sata(&hwif->drives[0])) - hwif->rqsize = 15; + if (is_sata(hwif) && (class_rev <= 0x01)) + hwif->max_rqsize = siimage_sata_max_rqsize; + else + hwif->rqsize = 128; if (pci_get_drvdata(dev) == NULL) return; --- linux-2.6.6-rc1/drivers/ieee1394/amdtp.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ieee1394/amdtp.c 2004-04-18 22:25:24.713073520 -0700 @@ -319,7 +319,7 @@ void ohci1394_stop_it_ctx(struct ti_ohci control = reg_read(ohci, OHCI1394_IsoXmitContextControlSet + ctx * 16); if ((control & OHCI1394_CONTEXT_ACTIVE) == 0) break; - + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } @@ -408,7 +408,7 @@ static void stream_shift_packet_lists(un /* Now that we know the list is non-empty, we can get the head * of the list without locking, because the process context - * only adds to the tail. + * only adds to the tail. */ pl = list_entry(s->dma_packet_lists.next, struct packet_list, link); last = &pl->packets[PACKET_LIST_SIZE - 1]; @@ -424,7 +424,7 @@ static void stream_shift_packet_lists(un if (last->db->payload_desc.status == 0) { HPSB_INFO("weird interrupt..."); return; - } + } /* If the last descriptor block does not specify a branch * address, we have a sample underflow. @@ -469,7 +469,7 @@ static struct packet *stream_current_pac return &s->current_packet_list->packets[s->current_packet]; } - + static void stream_queue_packet(struct stream *s) { s->current_packet++; @@ -543,13 +543,13 @@ void packet_initialize(struct packet *p, DMA_CTL_OUTPUT_MORE | DMA_CTL_IMMEDIATE | 8; if (next) { - p->db->payload_desc.control = + p->db->payload_desc.control = DMA_CTL_OUTPUT_LAST | DMA_CTL_BRANCH; p->db->payload_desc.branch = next->db_bus | 3; p->db->header_desc.skip = next->db_bus | 3; } else { - p->db->payload_desc.control = + p->db->payload_desc.control = DMA_CTL_OUTPUT_LAST | DMA_CTL_BRANCH | DMA_CTL_UPDATE | DMA_CTL_IRQ; p->db->payload_desc.branch = 0; @@ -580,7 +580,7 @@ struct packet_list *packet_list_alloc(st for (i = 0; i < PACKET_LIST_SIZE; i++) { if (i < PACKET_LIST_SIZE - 1) next = &pl->packets[i + 1]; - else + else next = NULL; packet_initialize(&pl->packets[i], next); } @@ -695,7 +695,7 @@ static u32 get_header_bits(struct stream case AMDTP_FORMAT_IEC958_PCM: case AMDTP_FORMAT_IEC958_AC3: return get_iec958_header_bits(s, sub_frame, sample); - + case AMDTP_FORMAT_RAW: return 0x40; @@ -739,18 +739,18 @@ static void fill_packet(struct stream *s /* Fill IEEE1394 headers */ packet->db->header_desc.header[0] = - (IEEE1394_SPEED_100 << 16) | (0x01 << 14) | + (IEEE1394_SPEED_100 << 16) | (0x01 << 14) | (s->iso_channel << 8) | (TCODE_ISO_DATA << 4); packet->db->header_desc.header[1] = size << 16; - + /* Calculate synchronization timestamp (syt). First we * determine syt_index, that is, the index in the packet of * the sample for which the timestamp is valid. */ syt_index = (s->syt_interval - s->dbc) & (s->syt_interval - 1); if (syt_index < nevents) { - syt = ((atomic_read(&s->cycle_count) << 12) | + syt = ((atomic_read(&s->cycle_count) << 12) | s->cycle_offset.integer) & 0xffff; - fraction_add(&s->cycle_offset, + fraction_add(&s->cycle_offset, &s->cycle_offset, &s->ticks_per_syt_offset); /* This next addition should be modulo 8000 (0x1f40), @@ -763,7 +763,7 @@ static void fill_packet(struct stream *s syt = 0xffff; atomic_inc(&s->cycle_count2); - + /* Fill cip header */ packet->payload->eoh0 = 0; packet->payload->sid = s->host->host->node_id & 0x3f; @@ -1072,7 +1072,7 @@ void stream_free(struct stream *s) * that sometimes generates an it transmit interrupt if we * later re-enable the context. */ - wait_event_interruptible(s->packet_list_wait, + wait_event_interruptible(s->packet_list_wait, list_empty(&s->dma_packet_lists)); ohci1394_stop_it_ctx(s->host->ohci, s->iso_tasklet.context, 1); @@ -1102,7 +1102,7 @@ static ssize_t amdtp_write(struct file * unsigned char *p; int i; size_t length; - + if (s->packet_pool == NULL) return -EBADFD; @@ -1123,16 +1123,16 @@ static ssize_t amdtp_write(struct file * return -EFAULT; if (s->input->length < s->input->size) continue; - + stream_flush(s); - + if (s->current_packet_list != NULL) continue; if (file->f_flags & O_NONBLOCK) return i + length > 0 ? i + length : -EAGAIN; - if (wait_event_interruptible(s->packet_list_wait, + if (wait_event_interruptible(s->packet_list_wait, !list_empty(&s->free_packet_lists))) return -EINTR; } @@ -1152,7 +1152,7 @@ static int amdtp_ioctl(struct inode *ino case AMDTP_IOC_CHANNEL: if (copy_from_user(&cfg, (struct amdtp_ioctl *) arg, sizeof cfg)) return -EFAULT; - else + else return stream_configure(s, cmd, &cfg); default: @@ -1266,6 +1266,7 @@ static int __init amdtp_init_module (voi { cdev_init(&amdtp_cdev, &amdtp_fops); amdtp_cdev.owner = THIS_MODULE; + kobject_set_name(&amdtp_cdev.kobj, "amdtp"); if (cdev_add(&amdtp_cdev, IEEE1394_AMDTP_DEV, 16)) { HPSB_ERR("amdtp: unable to add char device"); return -EIO; --- linux-2.6.6-rc1/drivers/ieee1394/amdtp.h 2003-06-14 12:18:08.000000000 -0700 +++ 25/drivers/ieee1394/amdtp.h 2004-04-18 22:25:24.713073520 -0700 @@ -24,7 +24,7 @@ * * The dimension field specifies the dimension of the signal, that is, * the number of audio channels. Only AMDTP_FORMAT_RAW supports - * settings greater than 2. + * settings greater than 2. * * The mode field specifies which transmission mode to use. The AMDTP * specifies two different transmission modes: blocking and --- linux-2.6.6-rc1/drivers/ieee1394/cmp.c 2004-02-03 20:42:35.000000000 -0800 +++ 25/drivers/ieee1394/cmp.c 2004-04-18 22:25:24.714073368 -0700 @@ -187,14 +187,14 @@ static int pcr_read(struct hpsb_host *ho int csraddr = addr - CSR_REGISTER_BASE; int plug; struct cmp_host *ch; - + if (length != 4) return RCODE_TYPE_ERROR; ch = hpsb_get_hostinfo(&cmp_highlevel, host); if (csraddr == 0x900) { *buf = cpu_to_be32(ch->u.ompr_quadlet); - return RCODE_COMPLETE; + return RCODE_COMPLETE; } else if (csraddr < 0x904 + ch->u.ompr.nplugs * 4) { plug = (csraddr - 0x904) / 4; @@ -206,7 +206,7 @@ static int pcr_read(struct hpsb_host *ho } else if (csraddr == 0x980) { *buf = cpu_to_be32(ch->v.impr_quadlet); - return RCODE_COMPLETE; + return RCODE_COMPLETE; } else if (csraddr < 0x984 + ch->v.impr.nplugs * 4) { plug = (csraddr - 0x984) / 4; @@ -225,10 +225,10 @@ static int pcr_lock(struct hpsb_host *ho struct cmp_host *ch; ch = hpsb_get_hostinfo(&cmp_highlevel, host); - - if (extcode != EXTCODE_COMPARE_SWAP) + + if (extcode != EXTCODE_COMPARE_SWAP) return RCODE_TYPE_ERROR; - + if (csraddr == 0x900) { /* FIXME: Ignore writes to bits 30-31 and 0-7 */ *store = cpu_to_be32(ch->u.ompr_quadlet); --- linux-2.6.6-rc1/drivers/ieee1394/csr1212.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/csr1212.c 2004-04-18 22:25:24.717072912 -0700 @@ -1,6 +1,6 @@ /* * csr1212.c -- IEEE 1212 Control and Status Register support for Linux - * + * * Copyright (C) 2003 Francois Retief * Steve Kinneberg * @@ -173,7 +173,7 @@ struct csr1212_csr *csr1212_create_csr(s if (!csr) return NULL; - csr->cache_head = + csr->cache_head = csr1212_rom_cache_malloc(CSR1212_CONFIG_ROM_SPACE_OFFSET, CSR1212_CONFIG_ROM_SPACE_SIZE); if (!csr->cache_head) { @@ -238,7 +238,7 @@ static struct csr1212_keyval *csr1212_ne struct csr1212_keyval *csr1212_new_immediate(u_int8_t key, u_int32_t value) { struct csr1212_keyval *kv = csr1212_new_keyval(CSR1212_KV_TYPE_IMMEDIATE, key); - + if (!kv) return NULL; @@ -253,11 +253,10 @@ struct csr1212_keyval *csr1212_new_leaf( if (!kv) return NULL; - + if (data_len > 0) { kv->value.leaf.data = CSR1212_MALLOC(data_len); - if (!kv->value.leaf.data) - { + if (!kv->value.leaf.data) { CSR1212_FREE(kv); return NULL; } @@ -572,7 +571,7 @@ struct csr1212_keyval *csr1212_new_modif CSR1212_MODIFIABLE_DESCRIPTOR_SET_MAX_SIZE(kv, max_size); CSR1212_MODIFIABLE_DESCRIPTOR_SET_ADDRESS_HI(kv, address); CSR1212_MODIFIABLE_DESCRIPTOR_SET_ADDRESS_LO(kv, address); - + return kv; } @@ -621,7 +620,7 @@ struct csr1212_keyval *csr1212_new_keywo /* make sure last quadlet is zeroed out */ *((u_int32_t*)&(buffer[(data_len - 1) & ~0x3])) = 0; - + /* Copy keyword(s) into leaf data buffer */ for (i = 0; i < strc; i++) { int len = strlen(strv[i]) + 1; @@ -643,7 +642,7 @@ void csr1212_detach_keyval_from_director return; dentry = csr1212_find_keyval(dir, kv); - + if (!dentry) return; @@ -788,8 +787,7 @@ static int csr1212_append_new_cache(stru return CSR1212_ENOMEM; } - if (csr1212_attach_keyval_to_directory(csr->root_kv, cache->ext_rom) != CSR1212_SUCCESS) - { + if (csr1212_attach_keyval_to_directory(csr->root_kv, cache->ext_rom) != CSR1212_SUCCESS) { csr1212_release_keyval(cache->ext_rom); csr->ops->release_addr(csr_addr, csr->private); CSR1212_FREE(cache); @@ -1119,12 +1117,11 @@ int csr1212_generate_csr_image(struct cs /* Remove unused, excess cache regions */ while (cache) { struct csr1212_csr_rom_cache *oc = cache; - + cache = cache->next; csr1212_remove_cache(csr, oc); } - /* Go through the list backward so that when done, the correct CRC * will be calculated for the Extended ROM areas. */ for(cache = csr->cache_tail; cache; cache = cache->prev) { @@ -1263,7 +1260,7 @@ static inline int csr1212_parse_dir_entr ret = CSR1212_ENOMEM; goto fail; } - + k->refcnt = 0; /* Don't keep local reference when parsing. */ break; @@ -1450,7 +1447,7 @@ int _csr1212_read_keyval(struct csr1212_ newcr = CSR1212_MALLOC(sizeof(struct csr1212_cache_region)); if (!newcr) return CSR1212_ENOMEM; - + newcr->offset_start = cache_index & ~(csr->max_rom - 1); newcr->offset_end = newcr->offset_start; newcr->next = cr; @@ -1474,7 +1471,7 @@ int _csr1212_read_keyval(struct csr1212_ newcr = CSR1212_MALLOC(sizeof(struct csr1212_cache_region)); if (!newcr) return CSR1212_ENOMEM; - + newcr->offset_start = cache_index & ~(csr->max_rom - 1); newcr->offset_end = newcr->offset_start; newcr->prev = cr; --- linux-2.6.6-rc1/drivers/ieee1394/csr1212.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/csr1212.h 2004-04-18 22:25:24.717072912 -0700 @@ -1,6 +1,6 @@ /* * csr1212.h -- IEEE 1212 Control and Status Register support for Linux - * + * * Copyright (C) 2003 Francois Retief * Steve Kinneberg * @@ -37,6 +37,7 @@ #include #include #include +#include #define CSR1212_MALLOC(size) kmalloc((size), in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) #define CSR1212_FREE(ptr) kfree(ptr) @@ -440,7 +441,7 @@ static inline u_int32_t *CSR1212_ICON_DE static const int pd[4] = { 0, 4, 16, 256 }; static const int cs[16] = { 4, 2 }; int ps = pd[CSR1212_ICON_DESCRIPTOR_LEAF_PALETTE_DEPTH(kv)]; - + return &kv->value.leaf.data[5 + (ps * cs[CSR1212_ICON_DESCRIPTOR_LEAF_COLOR_SPACE(kv)]) / sizeof(u_int32_t)]; @@ -705,7 +706,7 @@ static inline void csr1212_release_keyva * _kv is a struct csr1212_keyval * that'll point to the current keyval (loop index). * _dir is a struct csr1212_keyval * that points to the directory to be looped. * _pos is a struct csr1212_dentry * that is used internally for indexing. - * + * * kv will be NULL upon exit of the loop. */ #define csr1212_for_each_dir_entry(_csr, _kv, _dir, _pos) \ --- linux-2.6.6-rc1/drivers/ieee1394/csr.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/csr.c 2004-04-18 22:25:24.715073216 -0700 @@ -130,23 +130,23 @@ static void host_reset(struct hpsb_host host->csr.state &= ~0x100; } - host->csr.topology_map[1] = + host->csr.topology_map[1] = cpu_to_be32(be32_to_cpu(host->csr.topology_map[1]) + 1); - host->csr.topology_map[2] = cpu_to_be32(host->node_count << 16 + host->csr.topology_map[2] = cpu_to_be32(host->node_count << 16 | host->selfid_count); - host->csr.topology_map[0] = + host->csr.topology_map[0] = cpu_to_be32((host->selfid_count + 2) << 16 | csr_crc16(host->csr.topology_map + 1, host->selfid_count + 2)); - host->csr.speed_map[1] = + host->csr.speed_map[1] = cpu_to_be32(be32_to_cpu(host->csr.speed_map[1]) + 1); - host->csr.speed_map[0] = cpu_to_be32(0x3f1 << 16 + host->csr.speed_map[0] = cpu_to_be32(0x3f1 << 16 | csr_crc16(host->csr.speed_map+1, 0x3f1)); } -/* +/* * HI == seconds (bits 0:2) * LO == fraction units of 1/8000 of a second, as per 1394 (bits 19:31) * @@ -161,7 +161,7 @@ static void host_reset(struct hpsb_host static inline void calculate_expire(struct csr_control *csr) { unsigned long units; - + /* Take the seconds, and convert to units */ units = (unsigned long)(csr->split_timeout_hi & 0x07) << 13; @@ -288,7 +288,7 @@ static void remove_host(struct hpsb_host } -int hpsb_update_config_rom(struct hpsb_host *host, const quadlet_t *new_rom, +int hpsb_update_config_rom(struct hpsb_host *host, const quadlet_t *new_rom, size_t buffersize, unsigned char rom_version) { unsigned long flags; @@ -296,7 +296,7 @@ int hpsb_update_config_rom(struct hpsb_h HPSB_NOTICE("hpsb_update_config_rom() is deprecated"); - spin_lock_irqsave(&host->csr.lock, flags); + spin_lock_irqsave(&host->csr.lock, flags); if (rom_version != host->csr.generation) ret = -1; else if (buffersize > host->csr.rom->cache_head->size) @@ -329,10 +329,10 @@ static int read_maps(struct hpsb_host *h int csraddr = addr - CSR_REGISTER_BASE; const char *src; - spin_lock_irqsave(&host->csr.lock, flags); + spin_lock_irqsave(&host->csr.lock, flags); if (csraddr < CSR_SPEED_MAP) { - src = ((char *)host->csr.topology_map) + csraddr + src = ((char *)host->csr.topology_map) + csraddr - CSR_TOPOLOGY_MAP; } else { src = ((char *)host->csr.speed_map) + csraddr - CSR_SPEED_MAP; @@ -352,7 +352,7 @@ static int read_regs(struct hpsb_host *h int csraddr = addr - CSR_REGISTER_BASE; int oldcycle; quadlet_t ret; - + if ((csraddr | length) & 0x3) return RCODE_TYPE_ERROR; @@ -404,7 +404,7 @@ static int read_regs(struct hpsb_host *h /* cycle time wrapped around */ host->csr.bus_time += (1 << 7); } - *(buf++) = cpu_to_be32(host->csr.bus_time + *(buf++) = cpu_to_be32(host->csr.bus_time | (host->csr.cycle_time >> 25)); out; @@ -464,7 +464,7 @@ static int write_regs(struct hpsb_host * quadlet_t *data, u64 addr, size_t length, u16 flags) { int csraddr = addr - CSR_REGISTER_BASE; - + if ((csraddr | length) & 0x3) return RCODE_TYPE_ERROR; @@ -494,12 +494,12 @@ static int write_regs(struct hpsb_host * return RCODE_ADDRESS_ERROR; case CSR_SPLIT_TIMEOUT_HI: - host->csr.split_timeout_hi = + host->csr.split_timeout_hi = be32_to_cpu(*(data++)) & 0x00000007; calculate_expire(&host->csr); out; case CSR_SPLIT_TIMEOUT_LO: - host->csr.split_timeout_lo = + host->csr.split_timeout_lo = be32_to_cpu(*(data++)) & 0xfff80000; calculate_expire(&host->csr); out; --- linux-2.6.6-rc1/drivers/ieee1394/dma.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ieee1394/dma.c 2004-04-18 22:25:24.718072760 -0700 @@ -96,7 +96,7 @@ int dma_region_alloc(struct dma_region * /* fill scatter/gather list with pages */ for (i = 0; i < dma->n_pages; i++) { unsigned long va = (unsigned long) dma->kvirt + (i << PAGE_SHIFT); - + dma->sglist[i].page = vmalloc_to_page((void *)va); dma->sglist[i].length = PAGE_SIZE; } @@ -196,6 +196,8 @@ void dma_region_sync_for_device(struct d pci_dma_sync_sg_for_device(dma->dev, &dma->sglist[first], last - first + 1, dma->direction); } +#ifdef CONFIG_MMU + /* nopage() handler for mmap access */ static struct page* @@ -251,3 +253,12 @@ int dma_region_mmap(struct dma_region *d return 0; } + +#else /* CONFIG_MMU */ + +int dma_region_mmap(struct dma_region *dma, struct file *file, struct vm_area_struct *vma) +{ + return -EINVAL; +} + +#endif /* CONFIG_MMU */ --- linux-2.6.6-rc1/drivers/ieee1394/dma.h 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ieee1394/dma.h 2004-04-18 22:25:24.718072760 -0700 @@ -14,7 +14,7 @@ #include /* struct dma_prog_region - + a small, physically-contiguous DMA buffer with random-access, synchronous usage characteristics */ @@ -37,7 +37,7 @@ static inline dma_addr_t dma_prog_region } /* struct dma_region - + a large, non-physically-contiguous DMA buffer with streaming, asynchronous usage characteristics */ --- linux-2.6.6-rc1/drivers/ieee1394/dv1394.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ieee1394/dv1394.c 2004-04-18 22:25:24.728071240 -0700 @@ -47,11 +47,11 @@ TODO: - tunable frame-drop behavior: either loop last frame, or halt transmission - + - use a scatter/gather buffer for DMA programs (f->descriptor_pool) so that we don't rely on allocating 64KB of contiguous kernel memory via pci_alloc_consistent() - + DONE: - during reception, better handling of dropped frames and continuity errors - during reception, prevent DMA from bypassing the irq tasklets @@ -82,7 +82,7 @@ - expose NTSC and PAL as separate devices (can be overridden) */ - + #include #include #include @@ -117,7 +117,7 @@ #include "nodemgr.h" #include "hosts.h" #include "ieee1394_core.h" -#include "highlevel.h" +#include "highlevel.h" #include "dv1394.h" #include "dv1394-private.h" @@ -215,7 +215,7 @@ static struct frame* frame_new(unsigned debug_printk("dv1394: frame_new: allocated CIP header pool at virt 0x%08lx (contig) dma 0x%08lx size %ld\n", (unsigned long) f->header_pool, (unsigned long) f->header_pool_dma, PAGE_SIZE); - + f->descriptor_pool_size = MAX_PACKETS * sizeof(struct DMA_descriptor_block); /* make it an even # of pages */ f->descriptor_pool_size += PAGE_SIZE - (f->descriptor_pool_size%PAGE_SIZE); @@ -228,10 +228,10 @@ static struct frame* frame_new(unsigned kfree(f); return NULL; } - + debug_printk("dv1394: frame_new: allocated DMA program memory at virt 0x%08lx (contig) dma 0x%08lx size %ld\n", (unsigned long) f->descriptor_pool, (unsigned long) f->descriptor_pool_dma, f->descriptor_pool_size); - + f->data = 0; frame_reset(f); @@ -248,9 +248,9 @@ static void frame_delete(struct frame *f -/* +/* frame_prepare() - build the DMA program for transmitting - + Frame_prepare() must be called OUTSIDE the video->spinlock. However, frame_prepare() must still be serialized, so it should be called WITH the video->sem taken. @@ -265,7 +265,7 @@ static void frame_prepare(struct video_c dma_addr_t block_dma; struct CIP_header *cip; dma_addr_t cip_dma; - + unsigned int n_descriptors, full_packets, packets_per_frame, payload_size; /* these flags denote packets that need special attention */ @@ -278,7 +278,7 @@ static void frame_prepare(struct video_c unsigned long irq_flags; irq_printk("frame_prepare( %d ) ---------------------\n", this_frame); - + full_packets = 0; @@ -304,7 +304,7 @@ static void frame_prepare(struct video_c return; } - /* the block surely won't cross a page boundary, + /* the block surely won't cross a page boundary, since an even number of descriptor_blocks fit on a page */ block = &(f->descriptor_pool[f->n_packets]); @@ -312,22 +312,22 @@ static void frame_prepare(struct video_c to the kernel base address of the descriptor pool + DMA base address of the descriptor pool */ block_dma = ((unsigned long) block - (unsigned long) f->descriptor_pool) + f->descriptor_pool_dma; - + /* the whole CIP pool fits on one page, so no worries about boundaries */ - if ( ((unsigned long) &(f->header_pool[f->n_packets]) - (unsigned long) f->header_pool) + if ( ((unsigned long) &(f->header_pool[f->n_packets]) - (unsigned long) f->header_pool) > PAGE_SIZE) { printk(KERN_ERR "dv1394: FATAL ERROR: no room to allocate CIP header\n"); return; } cip = &(f->header_pool[f->n_packets]); - + /* DMA address of the CIP header = offset of cip relative to kernel base address of the header pool + DMA base address of the header pool */ cip_dma = (unsigned long) cip % PAGE_SIZE + f->header_pool_dma; - + /* is this an empty packet? */ if (video->cip_accum > (video->cip_d - video->cip_n)) { @@ -362,7 +362,7 @@ static void frame_prepare(struct video_c for this purpose, because that would leave very little time to set the timestamp before DMA starts on the next frame. */ - + if (f->n_packets == 0) { first_packet = 1; } else if ( full_packets == (packets_per_frame-1) ) { @@ -370,7 +370,7 @@ static void frame_prepare(struct video_c } else if (f->n_packets == packets_per_frame) { mid_packet = 1; } - + /********************/ /* setup CIP header */ @@ -396,10 +396,10 @@ static void frame_prepare(struct video_c fill_cip_header(cip, /* the node ID number of the OHCI card */ reg_read(video->ohci, OHCI1394_NodeID) & 0x3F, - video->continuity_counter, + video->continuity_counter, video->pal_or_ntsc, 0xFFFF /* the timestamp is filled in later */); - + /* advance counter, only for full packets */ if ( ! empty_packet ) video->continuity_counter++; @@ -423,7 +423,7 @@ static void frame_prepare(struct video_c sizeof(struct CIP_header), /* data size */ cip_dma); - + if (first_packet) f->frame_begin_timestamp = &(block->u.out.u.empty.ol.q[3]); else if (mid_packet) @@ -445,7 +445,7 @@ static void frame_prepare(struct video_c sizeof(struct CIP_header), /* data size */ cip_dma); - + /* third (and possibly fourth) descriptor - for DV data */ /* the 480-byte payload can cross a page boundary; if so, we need to split it into two DMA descriptors */ @@ -464,9 +464,9 @@ static void frame_prepare(struct video_c data_p - (unsigned long) video->dv_buf.kvirt)); fill_output_last( &(block->u.out.u.full.u.cross.ol), - + /* want completion status on all interesting packets */ - (first_packet || mid_packet || last_packet) ? 1 : 0, + (first_packet || mid_packet || last_packet) ? 1 : 0, /* want interrupt on all interesting packets */ (first_packet || mid_packet || last_packet) ? 1 : 0, @@ -492,14 +492,14 @@ static void frame_prepare(struct video_c n_descriptors = 5; if (first_packet) f->first_n_descriptors = n_descriptors; - + full_packets++; } else { /* fits on one page */ fill_output_last( &(block->u.out.u.full.u.nocross.ol), - + /* want completion status on all interesting packets */ (first_packet || mid_packet || last_packet) ? 1 : 0, @@ -508,11 +508,11 @@ static void frame_prepare(struct video_c 480, /* data size (480 bytes of DV data) */ - + /* DMA address of data_p */ dma_region_offset_to_bus(&video->dv_buf, data_p - (unsigned long) video->dv_buf.kvirt)); - + if (first_packet) f->frame_begin_timestamp = &(block->u.out.u.full.u.nocross.ol.q[3]); else if (mid_packet) @@ -531,8 +531,8 @@ static void frame_prepare(struct video_c full_packets++; } } - - /* link this descriptor block into the DMA program by filling in + + /* link this descriptor block into the DMA program by filling in the branch address of the previous block */ /* note: we are not linked into the active DMA chain yet */ @@ -545,10 +545,10 @@ static void frame_prepare(struct video_c f->n_packets++; - + } - /* when we first assemble a new frame, set the final branch + /* when we first assemble a new frame, set the final branch to loop back up to the top */ *(f->frame_end_branch) = cpu_to_le32(f->descriptor_pool_dma | f->first_n_descriptors); @@ -572,11 +572,11 @@ static void frame_prepare(struct video_c this_frame, video->active_frame, video->n_clear_frames, video->first_clear_frame, last_frame); irq_printk(" begin_ts %08lx mid_ts %08lx end_ts %08lx end_br %08lx\n", - (unsigned long) f->frame_begin_timestamp, - (unsigned long) f->mid_frame_timestamp, - (unsigned long) f->frame_end_timestamp, + (unsigned long) f->frame_begin_timestamp, + (unsigned long) f->mid_frame_timestamp, + (unsigned long) f->frame_end_timestamp, (unsigned long) f->frame_end_branch); - + if (video->active_frame != -1) { /* if DMA is already active, we are almost done */ @@ -589,7 +589,7 @@ static void frame_prepare(struct video_c /* this write MUST precede the next one, or we could silently drop frames */ wmb(); - + /* disable the want_status semaphore on the last packet */ temp = le32_to_cpu(*(video->frames[last_frame]->frame_end_branch - 2)); temp &= 0xF7CFFFFF; @@ -605,7 +605,7 @@ static void frame_prepare(struct video_c dropped frame. Hopefully this window is too small to really matter, and the consequence is rather harmless. */ - + irq_printk(" new frame %d linked onto DMA chain\n", this_frame); @@ -614,13 +614,13 @@ static void frame_prepare(struct video_c } } else { - + u32 transmit_sec, transmit_cyc; u32 ts_cyc, ts_off; /* DMA is stopped, so this is the very first frame */ video->active_frame = this_frame; - + /* set CommandPtr to address and size of first descriptor block */ reg_write(video->ohci, video->ohci_IsoXmitCommandPtr, video->frames[video->active_frame]->descriptor_pool_dma | @@ -641,7 +641,7 @@ static void frame_prepare(struct video_c transmit_sec += transmit_cyc/8000; transmit_cyc %= 8000; - + ts_off = ct_off; ts_cyc = transmit_cyc + 3; ts_cyc %= 8000; @@ -657,7 +657,7 @@ static void frame_prepare(struct video_c f->cip_syt2->b[6] = f->assigned_timestamp >> 8; f->cip_syt2->b[7] = f->assigned_timestamp & 0xFF; } - + /* --- start DMA --- */ /* clear all bits in ContextControl register */ @@ -668,8 +668,8 @@ static void frame_prepare(struct video_c /* the OHCI card has the ability to start ISO transmission on a particular cycle (start-on-cycle). This way we can ensure that the first DV frame will have an accurate timestamp. - - However, start-on-cycle only appears to work if the OHCI card + + However, start-on-cycle only appears to work if the OHCI card is cycle master! Since the consequences of messing up the first timestamp are minimal*, just disable start-on-cycle for now. @@ -690,7 +690,7 @@ static void frame_prepare(struct video_c /* set the 'run' bit */ reg_write(video->ohci, video->ohci_IsoXmitContextControlSet, 0x8000); flush_pci_write(video->ohci); - + /* --- DMA should be running now --- */ debug_printk(" Cycle = %4u ContextControl = %08x CmdPtr = %08x\n", @@ -715,19 +715,19 @@ static void frame_prepare(struct video_c i++; } - printk("set = %08x, cmdPtr = %08x\n", + printk("set = %08x, cmdPtr = %08x\n", reg_read(video->ohci, video->ohci_IsoXmitContextControlSet), reg_read(video->ohci, video->ohci_IsoXmitCommandPtr) ); - + if ( ! (reg_read(video->ohci, video->ohci_IsoXmitContextControlSet) & (1 << 10)) ) { - printk("DMA did NOT go active after 20ms, event = %x\n", + printk("DMA did NOT go active after 20ms, event = %x\n", reg_read(video->ohci, video->ohci_IsoXmitContextControlSet) & 0x1F); } else printk("DMA is RUNNING!\n"); } #endif - + } @@ -738,11 +738,11 @@ static void frame_prepare(struct video_c /*** RECEIVE FUNCTIONS *****************************************************/ -/* +/* frame method put_packet - map and copy the packet data to its location in the frame - based upon DIF section and sequence + map and copy the packet data to its location in the frame + based upon DIF section and sequence */ static void inline @@ -754,28 +754,28 @@ frame_put_packet (struct frame *f, struc /* sanity check */ if (dif_sequence > 11 || dif_block > 149) return; - + switch (section_type) { case 0: /* 1 Header block */ memcpy( (void *) f->data + dif_sequence * 150 * 80, p->data, 480); break; - + case 1: /* 2 Subcode blocks */ memcpy( (void *) f->data + dif_sequence * 150 * 80 + (1 + dif_block) * 80, p->data, 480); break; - + case 2: /* 3 VAUX blocks */ memcpy( (void *) f->data + dif_sequence * 150 * 80 + (3 + dif_block) * 80, p->data, 480); break; - + case 3: /* 9 Audio blocks interleaved with video */ memcpy( (void *) f->data + dif_sequence * 150 * 80 + (6 + dif_block * 16) * 80, p->data, 480); break; - + case 4: /* 135 Video blocks interleaved with audio */ memcpy( (void *) f->data + dif_sequence * 150 * 80 + (7 + (dif_block / 15) + dif_block) * 80, p->data, 480); break; - + default: /* we can not handle any other data */ break; } @@ -786,25 +786,25 @@ static void start_dma_receive(struct vid { if (video->first_run == 1) { video->first_run = 0; - + /* start DMA once all of the frames are READY */ video->n_clear_frames = 0; video->first_clear_frame = -1; video->current_packet = 0; video->active_frame = 0; - + /* reset iso recv control register */ reg_write(video->ohci, video->ohci_IsoRcvContextControlClear, 0xFFFFFFFF); wmb(); - + /* clear bufferFill, set isochHeader and speed (0=100) */ reg_write(video->ohci, video->ohci_IsoRcvContextControlSet, 0x40000000); - + /* match on all tags, listen on channel */ reg_write(video->ohci, video->ohci_IsoRcvContextMatch, 0xf0000000 | video->channel); - + /* address and first descriptor block + Z=1 */ - reg_write(video->ohci, video->ohci_IsoRcvCommandPtr, + reg_write(video->ohci, video->ohci_IsoRcvCommandPtr, video->frames[0]->descriptor_pool_dma | 1); /* Z=1 */ wmb(); @@ -813,13 +813,13 @@ static void start_dma_receive(struct vid /* run */ reg_write(video->ohci, video->ohci_IsoRcvContextControlSet, 0x8000); flush_pci_write(video->ohci); - + debug_printk("dv1394: DMA started\n"); - + #if DV1394_DEBUG_LEVEL >= 2 { int i; - + for (i = 0; i < 1000; ++i) { mdelay(1); if (reg_read(video->ohci, video->ohci_IsoRcvContextControlSet) & (1 << 10)) { @@ -828,15 +828,14 @@ static void start_dma_receive(struct vid } } if ( reg_read(video->ohci, video->ohci_IsoRcvContextControlSet) & (1 << 11) ) { - printk("DEAD, event = %x\n", + printk("DEAD, event = %x\n", reg_read(video->ohci, video->ohci_IsoRcvContextControlSet) & 0x1F); } else printk("RUNNING!\n"); } #endif - } - else if ( reg_read(video->ohci, video->ohci_IsoRcvContextControlSet) & (1 << 11) ) { - debug_printk("DEAD, event = %x\n", + } else if ( reg_read(video->ohci, video->ohci_IsoRcvContextControlSet) & (1 << 11) ) { + debug_printk("DEAD, event = %x\n", reg_read(video->ohci, video->ohci_IsoRcvContextControlSet) & 0x1F); /* wake */ @@ -845,7 +844,7 @@ static void start_dma_receive(struct vid } -/* +/* receive_packets() - build the DMA program for receiving */ @@ -875,24 +874,24 @@ static void receive_packets(struct video /* locate a descriptor block and packet from the buffer */ block = &(f->descriptor_pool[i]); block_dma = ((unsigned long) block - (unsigned long) f->descriptor_pool) + f->descriptor_pool_dma; - + data = ((struct packet*)video->packet_buf.kvirt) + f->frame_num * MAX_PACKETS + i; - data_dma = dma_region_offset_to_bus( &video->packet_buf, + data_dma = dma_region_offset_to_bus( &video->packet_buf, ((unsigned long) data - (unsigned long) video->packet_buf.kvirt) ); - + /* setup DMA descriptor block */ want_interrupt = ((i % (MAX_PACKETS/2)) == 0 || i == (MAX_PACKETS-1)); fill_input_last( &(block->u.in.il), want_interrupt, 512, data_dma); - + /* link descriptors */ last_branch_address = f->frame_end_branch; - + if (last_branch_address != NULL) *(last_branch_address) = cpu_to_le32(block_dma | 1); /* set Z=1 */ - + f->frame_end_branch = &(block->u.in.il.q[2]); } - + } /* next j */ spin_unlock_irqrestore(&video->spinlock, irq_flags); @@ -913,7 +912,7 @@ static int do_dv1394_init(struct video_c debug_printk("dv1394: initialising %d\n", video->id); if (init->api_version != DV1394_API_VERSION) return -EINVAL; - + /* first sanitize all the parameters */ if ( (init->n_frames < 2) || (init->n_frames > DV1394_MAX_FRAMES) ) return -EINVAL; @@ -949,7 +948,7 @@ static int do_dv1394_init(struct video_c /* (the card should not be reset if the parameters are screwy) */ do_dv1394_shutdown(video, 0); - + /* try to claim the ISO channel */ spin_lock_irqsave(&video->ohci->IR_channel_lock, flags); if (video->ohci->ISO_channel_usage & chan_mask) { @@ -991,19 +990,19 @@ static int do_dv1394_init(struct video_c } video->syt_offset = init->syt_offset; - + /* find and claim DMA contexts on the OHCI card */ if (video->ohci_it_ctx == -1) { ohci1394_init_iso_tasklet(&video->it_tasklet, OHCI_ISO_TRANSMIT, it_tasklet_func, (unsigned long) video); - if (ohci1394_register_iso_tasklet(video->ohci, &video->it_tasklet) < 0) { + if (ohci1394_register_iso_tasklet(video->ohci, &video->it_tasklet) < 0) { printk(KERN_ERR "dv1394: could not find an available IT DMA context\n"); retval = -EBUSY; goto err; } - + video->ohci_it_ctx = video->it_tasklet.context; debug_printk("dv1394: claimed IT DMA context %d\n", video->ohci_it_ctx); } @@ -1020,7 +1019,7 @@ static int do_dv1394_init(struct video_c video->ohci_ir_ctx = video->ir_tasklet.context; debug_printk("dv1394: claimed IR DMA context %d\n", video->ohci_ir_ctx); } - + /* allocate struct frames */ for (i = 0; i < init->n_frames; i++) { video->frames[i] = frame_new(i, video); @@ -1037,14 +1036,14 @@ static int do_dv1394_init(struct video_c retval = dma_region_alloc(&video->dv_buf, new_buf_size, video->ohci->dev, PCI_DMA_TODEVICE); if (retval) goto err; - + video->dv_buf_size = new_buf_size; debug_printk("dv1394: Allocated %d frame buffers, total %u pages (%u DMA pages), %lu bytes\n", video->n_frames, video->dv_buf.n_pages, video->dv_buf.n_dma_pages, video->dv_buf_size); } - + /* set up the frame->data pointers */ for (i = 0; i < video->n_frames; i++) video->frames[i]->data = (unsigned long) video->dv_buf.kvirt + i * video->frame_size; @@ -1054,17 +1053,17 @@ static int do_dv1394_init(struct video_c video->packet_buf_size = sizeof(struct packet) * video->n_frames * MAX_PACKETS; if (video->packet_buf_size % PAGE_SIZE) video->packet_buf_size += PAGE_SIZE - (video->packet_buf_size % PAGE_SIZE); - + retval = dma_region_alloc(&video->packet_buf, video->packet_buf_size, video->ohci->dev, PCI_DMA_FROMDEVICE); if (retval) goto err; - - debug_printk("dv1394: Allocated %d packets in buffer, total %u pages (%u DMA pages), %lu bytes\n", + + debug_printk("dv1394: Allocated %d packets in buffer, total %u pages (%u DMA pages), %lu bytes\n", video->n_frames*MAX_PACKETS, video->packet_buf.n_pages, video->packet_buf.n_dma_pages, video->packet_buf_size); } - + /* set up register offsets for IT context */ /* IT DMA context registers are spaced 16 bytes apart */ video->ohci_IsoXmitContextControlSet = OHCI1394_IsoXmitContextControlSet+16*video->ohci_it_ctx; @@ -1085,7 +1084,7 @@ static int do_dv1394_init(struct video_c /* enable interrupts for IR context */ reg_write(video->ohci, OHCI1394_IsoRecvIntMaskSet, (1 << video->ohci_ir_ctx) ); debug_printk("dv1394: interrupts enabled for IR context %d\n", video->ohci_ir_ctx); - + return 0; err: @@ -1105,7 +1104,7 @@ static int do_dv1394_init_default(struct /* the following are now set via devfs */ init.channel = video->channel; init.format = video->pal_or_ntsc; - init.cip_n = video->cip_n; + init.cip_n = video->cip_n; init.cip_d = video->cip_d; init.syt_offset = video->syt_offset; @@ -1135,17 +1134,17 @@ static void stop_dma(struct video_card * reg_write(video->ohci, video->ohci_IsoXmitContextControlClear, (1 << 15)); reg_write(video->ohci, video->ohci_IsoRcvContextControlClear, (1 << 15)); flush_pci_write(video->ohci); - + video->active_frame = -1; video->first_run = 1; - + /* wait until DMA really stops */ i = 0; while (i < 1000) { - + /* wait 0.1 millisecond */ - udelay(100); - + udelay(100); + if ( (reg_read(video->ohci, video->ohci_IsoXmitContextControlClear) & (1 << 10)) || (reg_read(video->ohci, video->ohci_IsoRcvContextControlClear) & (1 << 10)) ) { /* still active */ @@ -1155,10 +1154,10 @@ static void stop_dma(struct video_card * debug_printk("dv1394: stop_dma: DMA stopped safely after %d ms\n", i/10); break; } - + i++; } - + if (i == 1000) { printk(KERN_ERR "dv1394: stop_dma: DMA still going after %d ms!\n", i/10); } @@ -1175,12 +1174,12 @@ out: static void do_dv1394_shutdown(struct video_card *video, int free_dv_buf) { int i; - + debug_printk("dv1394: shutdown...\n"); /* stop DMA if in progress */ stop_dma(video); - + /* release the DMA contexts */ if (video->ohci_it_ctx != -1) { video->ohci_IsoXmitContextControlSet = 0; @@ -1189,7 +1188,7 @@ static void do_dv1394_shutdown(struct vi /* disable interrupts for IT context */ reg_write(video->ohci, OHCI1394_IsoXmitIntMaskClear, (1 << video->ohci_it_ctx)); - + /* remove tasklet */ ohci1394_unregister_iso_tasklet(video->ohci, &video->it_tasklet); debug_printk("dv1394: IT context %d released\n", video->ohci_it_ctx); @@ -1215,16 +1214,16 @@ static void do_dv1394_shutdown(struct vi if (video->channel != -1) { u64 chan_mask; unsigned long flags; - + chan_mask = (u64)1 << video->channel; - + spin_lock_irqsave(&video->ohci->IR_channel_lock, flags); video->ohci->ISO_channel_usage &= ~(chan_mask); spin_unlock_irqrestore(&video->ohci->IR_channel_lock, flags); - + video->channel = -1; } - + /* free the frame structs */ for (i = 0; i < DV1394_MAX_FRAMES; i++) { if (video->frames[i]) @@ -1233,10 +1232,10 @@ static void do_dv1394_shutdown(struct vi } video->n_frames = 0; - + /* we can't free the DMA buffer unless it is guaranteed that no more user-space mappings exist */ - + if (free_dv_buf) { dma_region_free(&video->dv_buf); video->dv_buf_size = 0; @@ -1324,11 +1323,11 @@ static int dv1394_fasync(int fd, struct { /* I just copied this code verbatim from Alan Cox's mouse driver example (linux/Documentation/DocBook/) */ - + struct video_card *video = file_to_video_card(file); - + int retval = fasync_helper(fd, file, on, &video->fasync); - + if (retval < 0) return retval; return 0; @@ -1362,19 +1361,19 @@ static ssize_t dv1394_write(struct file ret = 0; add_wait_queue(&video->waitq, &wait); - + while (count > 0) { /* must set TASK_INTERRUPTIBLE *before* checking for free buffers; otherwise we could miss a wakeup if the interrupt fires between the check and the schedule() */ - + set_current_state(TASK_INTERRUPTIBLE); - + spin_lock_irqsave(&video->spinlock, flags); - + target_frame = video->first_clear_frame; - + spin_unlock_irqrestore(&video->spinlock, flags); if (video->frames[target_frame]->state == FRAME_CLEAR) { @@ -1390,7 +1389,7 @@ static ssize_t dv1394_write(struct file if (cnt > count) cnt = count; - if (cnt <= 0) { + if (cnt <= 0) { /* no room left, gotta wait */ if (file->f_flags & O_NONBLOCK) { if (!ret) @@ -1404,7 +1403,7 @@ static ssize_t dv1394_write(struct file } schedule(); - + continue; /* start over from 'while(count > 0)...' */ } @@ -1423,7 +1422,7 @@ static ssize_t dv1394_write(struct file if (video->write_off == video->frame_size * ((target_frame + 1) % video->n_frames)) frame_prepare(video, target_frame); } - + remove_wait_queue(&video->waitq, &wait); set_current_state(TASK_RUNNING); up(&video->sem); @@ -1456,9 +1455,9 @@ static ssize_t dv1394_read(struct file * return ret; } video->continuity_counter = -1; - + receive_packets(video); - + start_dma_receive(video); } @@ -1470,7 +1469,7 @@ static ssize_t dv1394_read(struct file * /* must set TASK_INTERRUPTIBLE *before* checking for free buffers; otherwise we could miss a wakeup if the interrupt fires between the check and the schedule() */ - + set_current_state(TASK_INTERRUPTIBLE); spin_lock_irqsave(&video->spinlock, flags); @@ -1494,7 +1493,7 @@ static ssize_t dv1394_read(struct file * if (cnt > count) cnt = count; - if (cnt <= 0) { + if (cnt <= 0) { /* no room left, gotta wait */ if (file->f_flags & O_NONBLOCK) { if (!ret) @@ -1508,7 +1507,7 @@ static ssize_t dv1394_read(struct file * } schedule(); - + continue; /* start over from 'while(count > 0)...' */ } @@ -1531,7 +1530,7 @@ static ssize_t dv1394_read(struct file * spin_unlock_irqrestore(&video->spinlock, flags); } } - + remove_wait_queue(&video->waitq, &wait); set_current_state(TASK_RUNNING); up(&video->sem); @@ -1579,19 +1578,19 @@ static int dv1394_ioctl(struct inode *in ret = -EINVAL; goto out; } - + while (n_submit > 0) { add_wait_queue(&video->waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); - + spin_lock_irqsave(&video->spinlock, flags); /* wait until video->first_clear_frame is really CLEAR */ while (video->frames[video->first_clear_frame]->state != FRAME_CLEAR) { spin_unlock_irqrestore(&video->spinlock, flags); - + if (signal_pending(current)) { remove_wait_queue(&video->waitq, &wait); set_current_state(TASK_RUNNING); @@ -1601,14 +1600,14 @@ static int dv1394_ioctl(struct inode *in schedule(); set_current_state(TASK_INTERRUPTIBLE); - + spin_lock_irqsave(&video->spinlock, flags); } spin_unlock_irqrestore(&video->spinlock, flags); remove_wait_queue(&video->waitq, &wait); set_current_state(TASK_RUNNING); - + frame_prepare(video, video->first_clear_frame); n_submit--; @@ -1625,7 +1624,7 @@ static int dv1394_ioctl(struct inode *in ret = -EINVAL; goto out; } - + n_wait = (unsigned int) arg; /* since we re-run the last frame on underflow, we will @@ -1636,16 +1635,16 @@ static int dv1394_ioctl(struct inode *in ret = -EINVAL; goto out; } - + add_wait_queue(&video->waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); - + spin_lock_irqsave(&video->spinlock, flags); while (video->n_clear_frames < n_wait) { - + spin_unlock_irqrestore(&video->spinlock, flags); - + if (signal_pending(current)) { remove_wait_queue(&video->waitq, &wait); set_current_state(TASK_RUNNING); @@ -1655,7 +1654,7 @@ static int dv1394_ioctl(struct inode *in schedule(); set_current_state(TASK_INTERRUPTIBLE); - + spin_lock_irqsave(&video->spinlock, flags); } @@ -1674,7 +1673,7 @@ static int dv1394_ioctl(struct inode *in ret = -EINVAL; goto out; } - + n_recv = (unsigned int) arg; /* at least one frame must be active */ @@ -1682,7 +1681,7 @@ static int dv1394_ioctl(struct inode *in ret = -EINVAL; goto out; } - + spin_lock_irqsave(&video->spinlock, flags); /* release the clear frames */ @@ -1693,7 +1692,7 @@ static int dv1394_ioctl(struct inode *in /* reset dropped_frames */ video->dropped_frames = 0; - + spin_unlock_irqrestore(&video->spinlock, flags); ret = 0; @@ -1706,11 +1705,11 @@ static int dv1394_ioctl(struct inode *in if (ret) goto out; } - + video->continuity_counter = -1; - + receive_packets(video); - + start_dma_receive(video); ret = 0; @@ -1765,7 +1764,7 @@ static int dv1394_ioctl(struct inode *in /* reset dropped_frames */ video->dropped_frames = 0; - + spin_unlock_irqrestore(&video->spinlock, flags); if (copy_to_user((void*)arg, &status, sizeof(status))) { @@ -1798,11 +1797,11 @@ static int dv1394_open(struct inode *ino has already been set to video by devfs */ if (file->private_data) { video = (struct video_card*) file->private_data; - + } else { /* look up the card by ID */ unsigned long flags; - + spin_lock_irqsave(&dv1394_cards_lock, flags); if (!list_empty(&dv1394_cards)) { struct video_card *p; @@ -1819,10 +1818,10 @@ static int dv1394_open(struct inode *ino debug_printk("dv1394: OHCI card %d not found", ieee1394_file_to_instance(file)); return -ENODEV; } - + file->private_data = (void*) video; } - + #ifndef DV1394_ALLOW_MORE_THAN_ONE_OPEN if ( test_and_set_bit(0, &video->open) ) { @@ -1845,7 +1844,7 @@ static int dv1394_release(struct inode * /* clean up async I/O users */ dv1394_fasync(-1, file, 0); - + /* give someone else a turn */ clear_bit(0, &video->open); @@ -1865,19 +1864,19 @@ static void it_tasklet_func(unsigned lon if (!video->dma_running) goto out; - irq_printk("ContextControl = %08x, CommandPtr = %08x\n", + irq_printk("ContextControl = %08x, CommandPtr = %08x\n", reg_read(video->ohci, video->ohci_IsoXmitContextControlSet), reg_read(video->ohci, video->ohci_IsoXmitCommandPtr) ); - + if ( (video->ohci_it_ctx != -1) && (reg_read(video->ohci, video->ohci_IsoXmitContextControlSet) & (1 << 10)) ) { struct frame *f; unsigned int frame, i; - + if (video->active_frame == -1) frame = 0; else @@ -1901,7 +1900,7 @@ static void it_tasklet_func(unsigned lon int prev_frame; struct frame *prev_f; - + /* don't reset, need this later *(f->frame_begin_timestamp) = 0; */ irq_printk(" BEGIN\n"); @@ -1910,11 +1909,11 @@ static void it_tasklet_func(unsigned lon if (prev_frame == -1) prev_frame += video->n_frames; prev_f = video->frames[prev_frame]; - + /* make sure we can actually garbage collect this frame */ if ( (prev_f->state == FRAME_READY) && - prev_f->done && (!f->done) ) + prev_f->done && (!f->done) ) { frame_reset(prev_f); video->n_clear_frames++; @@ -1929,7 +1928,7 @@ static void it_tasklet_func(unsigned lon f->done = 1; } - + /* see if we need to set the timestamp for the next frame */ if ( *(f->mid_frame_timestamp) ) { struct frame *next_frame; @@ -1957,9 +1956,9 @@ static void it_tasklet_func(unsigned lon plus the length of the last frame sent, plus the syt latency */ ts_cyc = begin_ts & 0xF; /* advance one frame, plus syt latency (typically 2-3) */ - ts_cyc += f->n_packets + video->syt_offset ; + ts_cyc += f->n_packets + video->syt_offset ; - ts_off = 0; + ts_off = 0; ts_cyc += ts_off/3072; ts_off %= 3072; @@ -1986,14 +1985,12 @@ static void it_tasklet_func(unsigned lon video->dropped_frames++; } - - } /* for (each frame) */ } if (wake) { kill_fasync(&video->fasync, SIGIO, POLL_OUT); - + /* wake readers/writers/ioctl'ers */ wake_up_interruptible(&video->waitq); } @@ -2011,10 +2008,9 @@ static void ir_tasklet_func(unsigned lon if (!video->dma_running) goto out; - + if ( (video->ohci_ir_ctx != -1) && - (reg_read(video->ohci, video->ohci_IsoRcvContextControlSet) & (1 << 10)) ) - { + (reg_read(video->ohci, video->ohci_IsoRcvContextControlSet) & (1 << 10)) ) { int sof=0; /* start-of-frame flag */ struct frame *f; @@ -2036,14 +2032,14 @@ static void ir_tasklet_func(unsigned lon dma_region_sync_for_cpu(&video->packet_buf, (unsigned long) p - (unsigned long) video->packet_buf.kvirt, sizeof(struct packet)); - + packet_length = le16_to_cpu(p->data_length); packet_time = le16_to_cpu(p->timestamp); - + irq_printk("received packet %02d, timestamp=%04x, length=%04x, sof=%02x%02x\n", video->current_packet, - packet_time, packet_length, + packet_time, packet_length, p->data[0], p->data[1]); - + /* get the descriptor based on packet_buffer cursor */ f = video->frames[video->current_packet / MAX_PACKETS]; block = &(f->descriptor_pool[video->current_packet % MAX_PACKETS]); @@ -2053,14 +2049,14 @@ static void ir_tasklet_func(unsigned lon /* get the current frame */ f = video->frames[video->active_frame]; - + /* exclude empty packet */ if (packet_length > 8 && xferstatus == 0x11) { /* check for start of frame */ - /* DRD> Changed to check section type ([0]>>5==0) + /* DRD> Changed to check section type ([0]>>5==0) and dif sequence ([1]>>4==0) */ sof = ( (p->data[0] >> 5) == 0 && (p->data[1] >> 4) == 0); - + dbc = (int) (p->cip_h1 >> 24); if ( video->continuity_counter != -1 && dbc > ((video->continuity_counter + 1) % 256) ) { @@ -2071,12 +2067,12 @@ static void ir_tasklet_func(unsigned lon video->first_clear_frame = -1; } video->continuity_counter = dbc; - + if (!video->first_frame) { if (sof) { video->first_frame = 1; } - + } else if (sof) { /* close current frame */ frame_reset(f); /* f->state = STATE_CLEAR */ @@ -2089,7 +2085,7 @@ static void ir_tasklet_func(unsigned lon } if (video->first_clear_frame == -1) video->first_clear_frame = video->active_frame; - + /* get the next frame */ video->active_frame = (video->active_frame + 1) % video->n_frames; f = video->frames[video->active_frame]; @@ -2101,22 +2097,22 @@ static void ir_tasklet_func(unsigned lon /* open next frame */ f->state = FRAME_READY; } - + /* copy to buffer */ if (f->n_packets > (video->frame_size / 480)) { printk(KERN_ERR "frame buffer overflow during receive\n"); } - + frame_put_packet(f, p); - + } /* first_frame */ } - + /* stop, end of ready packets */ else if (xferstatus == 0) { break; } - + /* reset xferStatus & resCount */ block->u.in.il.q[3] = cpu_to_le32(512); @@ -2127,7 +2123,7 @@ static void ir_tasklet_func(unsigned lon next_dma = ((unsigned long) block - (unsigned long) f->descriptor_pool) + f->descriptor_pool_dma; next->u.in.il.q[0] |= 3 << 20; /* enable interrupt */ next->u.in.il.q[2] = 0; /* disable branch */ - + /* link previous to next */ prev_i = (next_i == 0) ? (MAX_PACKETS * video->n_frames - 1) : (next_i - 1); f = video->frames[prev_i / MAX_PACKETS]; @@ -2145,20 +2141,20 @@ static void ir_tasklet_func(unsigned lon /* advance packet_buffer cursor */ video->current_packet = (video->current_packet + 1) % (MAX_PACKETS * video->n_frames); - + } /* for all packets */ - + wake = 1; /* why the hell not? */ - + } /* receive interrupt */ - + if (wake) { kill_fasync(&video->fasync, SIGIO, POLL_IN); /* wake readers/writers/ioctl'ers */ wake_up_interruptible(&video->waitq); } - + out: spin_unlock(&video->spinlock); } @@ -2216,13 +2212,13 @@ static int dv1394_init(struct ti_ohci *o printk(KERN_ERR "dv1394: cannot allocate video_card\n"); goto err; } - + memset(video, 0, sizeof(struct video_card)); - + video->ohci = ohci; /* lower 2 bits of id indicate which of four "plugs" per host */ - video->id = ohci->host->id << 2; + video->id = ohci->host->id << 2; if (format == DV1394_NTSC) video->id |= mode; else @@ -2234,16 +2230,16 @@ static int dv1394_init(struct ti_ohci *o video->ohci_IsoXmitContextControlSet = 0; video->ohci_IsoXmitContextControlClear = 0; video->ohci_IsoXmitCommandPtr = 0; - + video->ohci_IsoRcvContextControlSet = 0; video->ohci_IsoRcvContextControlClear = 0; video->ohci_IsoRcvCommandPtr = 0; video->ohci_IsoRcvContextMatch = 0; - + video->n_frames = 0; /* flag that video is not initialized */ video->channel = 63; /* default to broadcast channel */ video->active_frame = -1; - + /* initialize the following */ video->pal_or_ntsc = format; video->cip_n = 0; /* 0 = use builtin default */ @@ -2270,7 +2266,7 @@ static int dv1394_init(struct ti_ohci *o INIT_LIST_HEAD(&video->list); list_add_tail(&video->list, &dv1394_cards); spin_unlock_irqrestore(&dv1394_cards_lock, flags); - + if (devfs_mk_cdev(MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394*16 + video->id), S_IFCHR|S_IRUGO|S_IWUGO, @@ -2281,7 +2277,7 @@ static int dv1394_init(struct ti_ohci *o goto err_free; debug_printk("dv1394: dv1394_init() OK on ID %d\n", video->id); - + return 0; err_free: @@ -2293,7 +2289,7 @@ static int dv1394_init(struct ti_ohci *o static void dv1394_un_init(struct video_card *video) { char buf[32]; - + /* obviously nobody has the driver open at this point */ do_dv1394_shutdown(video, 1); snprintf(buf, sizeof(buf), "dv/host%d/%s/%s", (video->id >> 2), @@ -2305,13 +2301,13 @@ static void dv1394_un_init(struct video_ kfree(video); } - + static void dv1394_remove_host (struct hpsb_host *host) { struct video_card *video; unsigned long flags; int id = host->id; - + /* We only work with the OHCI-1394 driver */ if (strcmp(host->driver->name, OHCI1394_DRIVER_NAME)) return; @@ -2355,7 +2351,7 @@ static void dv1394_add_host (struct hpsb devfs_mk_dir("ieee1394/dv/host%d", id); devfs_mk_dir("ieee1394/dv/host%d/NTSC", id); devfs_mk_dir("ieee1394/dv/host%d/PAL", id); - + dv1394_init(ohci, DV1394_NTSC, MODE_RECEIVE); dv1394_init(ohci, DV1394_NTSC, MODE_TRANSMIT); dv1394_init(ohci, DV1394_PAL, MODE_RECEIVE); @@ -2373,7 +2369,7 @@ static void dv1394_host_reset(struct hps struct ti_ohci *ohci; struct video_card *video = NULL, *tmp_vid; unsigned long flags; - + /* We only work with the OHCI-1394 driver */ if (strcmp(host->driver->name, OHCI1394_DRIVER_NAME)) return; @@ -2394,7 +2390,7 @@ static void dv1394_host_reset(struct hps if (!video) return; - + spin_lock_irqsave(&video->spinlock, flags); if (!video->dma_running) @@ -2403,7 +2399,7 @@ static void dv1394_host_reset(struct hps /* check IT context */ if (video->ohci_it_ctx != -1) { u32 ctx; - + ctx = reg_read(video->ohci, video->ohci_IsoXmitContextControlSet); /* if (RUN but not ACTIVE) */ @@ -2415,17 +2411,17 @@ static void dv1394_host_reset(struct hps /* to be safe, assume a frame has been dropped. User-space programs should handle this condition like an underflow. */ video->dropped_frames++; - + /* for some reason you must clear, then re-set the RUN bit to restart DMA */ - + /* clear RUN */ reg_write(video->ohci, video->ohci_IsoXmitContextControlClear, (1 << 15)); flush_pci_write(video->ohci); - + /* set RUN */ reg_write(video->ohci, video->ohci_IsoXmitContextControlSet, (1 << 15)); flush_pci_write(video->ohci); - + /* set the WAKE bit (just in case; this isn't strictly necessary) */ reg_write(video->ohci, video->ohci_IsoXmitContextControlSet, (1 << 12)); flush_pci_write(video->ohci); @@ -2435,11 +2431,11 @@ static void dv1394_host_reset(struct hps reg_read(video->ohci, video->ohci_IsoXmitCommandPtr)); } } - + /* check IR context */ if (video->ohci_ir_ctx != -1) { u32 ctx; - + ctx = reg_read(video->ohci, video->ohci_IsoRcvContextControlSet); /* if (RUN but not ACTIVE) */ @@ -2454,15 +2450,15 @@ static void dv1394_host_reset(struct hps /* for some reason you must clear, then re-set the RUN bit to restart DMA */ /* XXX this doesn't work for me, I can't get IR DMA to restart :[ */ - + /* clear RUN */ reg_write(video->ohci, video->ohci_IsoRcvContextControlClear, (1 << 15)); flush_pci_write(video->ohci); - + /* set RUN */ reg_write(video->ohci, video->ohci_IsoRcvContextControlSet, (1 << 15)); flush_pci_write(video->ohci); - + /* set the WAKE bit (just in case; this isn't strictly necessary) */ reg_write(video->ohci, video->ohci_IsoRcvContextControlSet, (1 << 12)); flush_pci_write(video->ohci); @@ -2475,7 +2471,7 @@ static void dv1394_host_reset(struct hps out: spin_unlock_irqrestore(&video->spinlock, flags); - + /* wake readers/writers/ioctl'ers */ wake_up_interruptible(&video->waitq); } @@ -2616,6 +2612,7 @@ static int __init dv1394_init_module(voi cdev_init(&dv1394_cdev, &dv1394_fops); dv1394_cdev.owner = THIS_MODULE; + kobject_set_name(&dv1394_cdev.kobj, "dv1394"); ret = cdev_add(&dv1394_cdev, IEEE1394_DV1394_DEV, 16); if (ret) { printk(KERN_ERR "dv1394: unable to register character device\n"); --- linux-2.6.6-rc1/drivers/ieee1394/dv1394.h 2003-07-27 12:14:38.000000000 -0700 +++ 25/drivers/ieee1394/dv1394.h 2004-04-18 22:25:24.729071088 -0700 @@ -49,7 +49,7 @@ To set the DV output parameters (e.g. whether you want NTSC or PAL video), use the DV1394_INIT ioctl, passing in the parameters you want in a struct dv1394_init. - + Example 1: To play a raw .DV file: cat foo.DV > /dev/dv1394 (cat will use write() internally) @@ -72,9 +72,9 @@ 2) For more control over buffering, and to avoid unnecessary copies - of the DV data, you can use the more sophisticated the mmap() interface. - First, call the DV1394_INIT ioctl to specify your parameters, - including the number of frames in the ringbuffer. Then, calling mmap() + of the DV data, you can use the more sophisticated the mmap() interface. + First, call the DV1394_INIT ioctl to specify your parameters, + including the number of frames in the ringbuffer. Then, calling mmap() on the dv1394 device will give you direct access to the ringbuffer from which the DV card reads your frame data. @@ -99,7 +99,7 @@ *--------------------------------------* | CLEAR | DV data | DV data | CLEAR | *--------------------------------------* - + transmission goes in this direction --->>> @@ -110,10 +110,10 @@ will continue to transmit frame 2, and will increase the dropped_frames counter each time it repeats the transmission). - + If you called DV1394_GET_STATUS at this instant, you would receive the following values: - + n_frames = 4 active_frame = 1 first_clear_frame = 3 @@ -144,9 +144,9 @@ (checks of system call return values omitted for brevity; always check return values in your code!) - + while ( frames left ) { - + struct pollfd *pfd = ...; pfd->fd = dv1394_fd; @@ -154,12 +154,12 @@ pfd->events = POLLOUT | POLLIN; (OUT for transmit, IN for receive) (add other sources of I/O here) - + poll(pfd, 1, -1); (or select(); add a timeout if you want) if (pfd->revents) { struct dv1394_status status; - + ioctl(dv1394_fd, DV1394_GET_STATUS, &status); if (status.dropped_frames > 0) { @@ -183,7 +183,7 @@ should close the dv1394 file descriptor (and munmap() all ringbuffer mappings, if you are using them), then re-open the dv1394 device (and re-map the ringbuffer). - + */ @@ -215,7 +215,7 @@ enum pal_or_ntsc { struct dv1394_init { /* DV1394_API_VERSION */ unsigned int api_version; - + /* isochronous transmission channel to use */ unsigned int channel; @@ -227,7 +227,7 @@ struct dv1394_init { enum pal_or_ntsc format; /* the following are used only for transmission */ - + /* set these to zero unless you want a non-default empty packet rate (see below) */ unsigned long cip_n; @@ -244,7 +244,7 @@ struct dv1394_init { would imply a different size for the ringbuffer). If you need a different buffer size, simply close and re-open the device, then initialize it with your new settings. */ - + /* Q: What are cip_n and cip_d? */ /* @@ -261,13 +261,13 @@ struct dv1394_init { The default empty packet insertion rate seems to work for many people; if your DV output is stable, you can simply ignore this discussion. However, we have exposed the empty packet rate as a parameter to support devices that - do not work with the default rate. + do not work with the default rate. The decision to insert an empty packet is made with a numerator/denominator algorithm. Empty packets are produced at an average rate of CIP_N / CIP_D. You can alter the empty packet rate by passing non-zero values for cip_n and cip_d to the INIT ioctl. - + */ --- linux-2.6.6-rc1/drivers/ieee1394/dv1394-private.h 2003-07-27 12:14:38.000000000 -0700 +++ 25/drivers/ieee1394/dv1394-private.h 2004-04-18 22:25:24.720072456 -0700 @@ -34,11 +34,11 @@ /* none of this is exposed to user-space */ -/* +/* the 8-byte CIP (Common Isochronous Packet) header that precedes each packet of DV data. - See the IEC 61883 standard. + See the IEC 61883 standard. */ struct CIP_header { unsigned char b[8]; }; @@ -71,10 +71,10 @@ static inline void fill_cip_header(struc -/* +/* DMA commands used to program the OHCI's DMA engine - See the Texas Instruments OHCI 1394 chipset documentation. + See the Texas Instruments OHCI 1394 chipset documentation. */ struct output_more_immediate { u32 q[8]; }; @@ -95,17 +95,17 @@ static inline void fill_output_more_imme omi->q[1] = 0; omi->q[2] = 0; omi->q[3] = 0; - + /* IT packet header */ omi->q[4] = cpu_to_le32( (0x0 << 16) /* IEEE1394_SPEED_100 */ | (tag << 14) | (channel << 8) - | (TCODE_ISO_DATA << 4) + | (TCODE_ISO_DATA << 4) | (sync_tag) ); /* reserved field; mimic behavior of my Sony DSR-40 */ omi->q[5] = cpu_to_le32((payload_size << 16) | (0x7F << 8) | 0xA0); - + omi->q[6] = 0; omi->q[7] = 0; } @@ -186,11 +186,11 @@ static inline void fill_input_last(struc -/* +/* A "DMA descriptor block" consists of several contiguous DMA commands. - struct DMA_descriptor_block encapsulates all of the commands necessary - to send one packet of DV data. - + struct DMA_descriptor_block encapsulates all of the commands necessary + to send one packet of DV data. + There are three different types of these blocks: 1) command to send an empty packet (CIP header only, no DV data): @@ -225,44 +225,44 @@ struct DMA_descriptor_block { union { struct { /* iso header, common to all output block types */ - struct output_more_immediate omi; - + struct output_more_immediate omi; + union { /* empty packet */ struct { struct output_last ol; /* CIP header */ } empty; - + /* full packet */ struct { struct output_more om; /* CIP header */ - + union { /* payload does not cross page boundary */ struct { struct output_last ol; /* data payload */ } nocross; - + /* payload crosses page boundary */ struct { struct output_more om; /* data payload */ struct output_last ol; /* data payload */ } cross; } u; - + } full; } u; } out; struct { - struct input_last il; + struct input_last il; } in; } u; - /* ensure that PAGE_SIZE % sizeof(struct DMA_descriptor_block) == 0 + /* ensure that PAGE_SIZE % sizeof(struct DMA_descriptor_block) == 0 by padding out to 128 bytes */ - u32 __pad__[12]; + u32 __pad__[12]; }; @@ -281,7 +281,7 @@ struct frame { /* index of this frame in video_card->frames[] */ unsigned int frame_num; - /* FRAME_CLEAR - DMA program not set up, waiting for data + /* FRAME_CLEAR - DMA program not set up, waiting for data FRAME_READY - DMA program written, ready to transmit Changes to these should be locked against the interrupt @@ -290,7 +290,7 @@ struct frame { FRAME_CLEAR = 0, FRAME_READY } state; - + /* whether this frame has been DMA'ed already; used only from the IRQ handler to determine whether the frame can be reset */ int done; @@ -299,7 +299,7 @@ struct frame { /* kernel virtual pointer to the start of this frame's data in the user ringbuffer. Use only for CPU access; to get the DMA bus address you must go through the video->user_dma mapping */ - unsigned long data; + unsigned long data; /* Max # of packets per frame */ #define MAX_PACKETS 500 @@ -310,7 +310,7 @@ struct frame { struct CIP_header *header_pool; dma_addr_t header_pool_dma; - + /* a physically contiguous memory pool for allocating DMA descriptor blocks; usually around 64KB in size !descriptor_pool must be aligned to PAGE_SIZE! */ @@ -338,7 +338,7 @@ struct frame { /* pointer to the first packet's CIP header (where the timestamp goes) */ struct CIP_header *cip_syt1; - + /* pointer to the second packet's CIP header (only set if the first packet was empty) */ struct CIP_header *cip_syt2; @@ -384,7 +384,7 @@ static void frame_delete(struct frame *f static void frame_reset(struct frame *f); /* struct video_card contains all data associated with one instance - of the dv1394 driver + of the dv1394 driver */ enum modes { MODE_RECEIVE, @@ -411,7 +411,7 @@ struct video_card { u32 ohci_IsoXmitContextControlSet; u32 ohci_IsoXmitContextControlClear; u32 ohci_IsoXmitCommandPtr; - + /* OHCI card IR DMA context number, -1 if not in use */ struct ohci1394_iso_tasklet ir_tasklet; int ohci_ir_ctx; @@ -421,10 +421,10 @@ struct video_card { u32 ohci_IsoRcvContextControlClear; u32 ohci_IsoRcvCommandPtr; u32 ohci_IsoRcvContextMatch; - - + + /* CONCURRENCY CONTROL */ - + /* there are THREE levels of locking associated with video_card. */ /* @@ -435,7 +435,7 @@ struct video_card { */ unsigned long open; - /* + /* 2) the spinlock - this provides mutual exclusion between the interrupt handler and process-context operations. Generally you must take the spinlock under the following conditions: @@ -458,7 +458,7 @@ struct video_card { /* flag to prevent spurious interrupts (which OHCI seems to generate a lot :) from accessing the struct */ int dma_running; - + /* 3) the sleeping semaphore 'sem' - this is used from process context only, to serialize various operations on the video_card. Even though only one @@ -477,24 +477,24 @@ struct video_card { /* support asynchronous I/O signals (SIGIO) */ struct fasync_struct *fasync; - + /* the large, non-contiguous (rvmalloc()) ringbuffer for DV data, exposed to user-space via mmap() */ unsigned long dv_buf_size; struct dma_region dv_buf; - + /* next byte in the ringbuffer that a write() call will fill */ size_t write_off; struct frame *frames[DV1394_MAX_FRAMES]; - + /* n_frames also serves as an indicator that this struct video_card is initialized and ready to run DMA buffers */ int n_frames; /* this is the frame that is currently "owned" by the OHCI DMA controller - (set to -1 iff DMA is not running) + (set to -1 iff DMA is not running) ! must lock against the interrupt handler when accessing it ! @@ -511,7 +511,6 @@ struct video_card { The interrupt handler will NEVER advance active_frame to a frame that is not READY. - */ int active_frame; int first_run; @@ -521,10 +520,10 @@ struct video_card { /* altered ONLY from process context. Must check first_clear_frame->state; if it's READY, that means the ringbuffer is full with READY frames; if it's CLEAR, that means one or more ringbuffer frames are CLEAR */ - unsigned int first_clear_frame; + unsigned int first_clear_frame; /* altered both by process and interrupt */ - unsigned int n_clear_frames; + unsigned int n_clear_frames; /* only altered by the interrupt */ unsigned int dropped_frames; @@ -548,17 +547,17 @@ struct video_card { /* the isochronous channel to use, -1 if video card is inactive */ int channel; - + /* physically contiguous packet ringbuffer for receive */ struct dma_region packet_buf; unsigned long packet_buf_size; - + unsigned int current_packet; int first_frame; /* received first start frame marker? */ enum modes mode; }; -/* +/* if the video_card is not initialized, then the ONLY fields that are valid are: ohci open @@ -575,7 +574,7 @@ static int do_dv1394_init_default(struct static void do_dv1394_shutdown(struct video_card *video, int free_user_buf); -/* NTSC empty packet rate accurate to within 0.01%, +/* NTSC empty packet rate accurate to within 0.01%, calibrated against a Sony DSR-40 DVCAM deck */ #define CIP_N_NTSC 68000000 --- linux-2.6.6-rc1/drivers/ieee1394/eth1394.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/eth1394.c 2004-04-18 22:25:24.731070784 -0700 @@ -1,6 +1,6 @@ /* * eth1394.c -- Ethernet driver for Linux IEEE-1394 Subsystem - * + * * Copyright (C) 2001-2003 Ben Collins * 2000 Bonin Franck * 2003 Steve Kinneberg @@ -89,7 +89,7 @@ #define TRACE() printk(KERN_ERR "%s:%s[%d] ---- TRACE\n", driver_name, __FUNCTION__, __LINE__) static char version[] __devinitdata = - "$Rev: 1175 $ Ben Collins "; + "$Rev: 1198 $ Ben Collins "; struct fragment_info { struct list_head list; @@ -216,7 +216,7 @@ static struct hpsb_highlevel eth1394_hig /* This is called after an "ifup" */ static int ether1394_open (struct net_device *dev) { - struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; + struct eth1394_priv *priv = dev->priv; int ret = 0; /* Something bad happened, don't even try */ @@ -278,7 +278,7 @@ static void ether1394_tx_timeout (struct static int ether1394_change_mtu(struct net_device *dev, int new_mtu) { - struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; + struct eth1394_priv *priv = dev->priv; if ((new_mtu < 68) || (new_mtu > min(ETH1394_DATA_LEN, @@ -479,7 +479,7 @@ static void ether1394_reset_priv (struct { unsigned long flags; int i; - struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; + struct eth1394_priv *priv = dev->priv; struct hpsb_host *host = priv->host; u64 guid = *((u64*)&(host->csr.rom->bus_info_data[3])); u16 maxpayload = 1 << (host->csr.max_rec + 1); @@ -652,7 +652,7 @@ out: static void ether1394_remove_host (struct hpsb_host *host) { struct eth1394_host_info *hi; - + hi = hpsb_get_hostinfo(ð1394_highlevel, host); if (hi != NULL) { struct eth1394_priv *priv = (struct eth1394_priv *)hi->dev->priv; @@ -660,7 +660,7 @@ static void ether1394_remove_host (struc hpsb_unregister_addrspace(ð1394_highlevel, host, priv->local_fifo); - if (priv->iso != NULL) + if (priv->iso != NULL) hpsb_iso_shutdown(priv->iso); if (hi->dev) { @@ -731,18 +731,16 @@ static int ether1394_header(struct sk_bu eth->h_proto = htons(type); - if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) - { + if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) { memset(eth->h_dest, 0, dev->addr_len); return(dev->hard_header_len); } - if (daddr) - { + if (daddr) { memcpy(eth->h_dest,daddr,dev->addr_len); return dev->hard_header_len; } - + return -dev->hard_header_len; } @@ -760,15 +758,15 @@ static int ether1394_rebuild_header(stru struct eth1394hdr *eth = (struct eth1394hdr *)skb->data; struct net_device *dev = skb->dev; - switch (eth->h_proto) - { + switch (eth->h_proto) { + #ifdef CONFIG_INET case __constant_htons(ETH_P_IP): return arp_find((unsigned char*)ð->h_dest, skb); -#endif +#endif default: ETH1394_PRINT(KERN_DEBUG, dev->name, - "unable to resolve type %04x addresses.\n", + "unable to resolve type %04x addresses.\n", eth->h_proto); break; } @@ -797,7 +795,7 @@ static int ether1394_header_cache(struct eth->h_proto = type; memcpy(eth->h_dest, neigh->ha, dev->addr_len); - + hh->hh_len = ETH1394_HLEN; return 0; } @@ -867,7 +865,7 @@ static inline u16 ether1394_parse_encap( nodeid_t srcid, nodeid_t destid, u16 ether_type) { - struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; + struct eth1394_priv *priv = dev->priv; u64 dest_hw; unsigned short ret = 0; @@ -1010,7 +1008,7 @@ static inline int new_fragment(struct li } new = kmalloc(sizeof(struct fragment_info), GFP_ATOMIC); - if (!new) + if (!new) return -ENOMEM; new->offset = offset; @@ -1192,7 +1190,7 @@ static int ether1394_data_handler(struct purge_partial_datagram(pdgl->prev); pdg->sz--; } - + retval = new_partial_datagram(dev, pdgl, dgl, dg_size, buf + hdr_len, fg_off, fg_len); @@ -1374,7 +1372,7 @@ static void ether1394_iso(struct hpsb_is * arphdr) is the same format as the ip1394 header, so they overlap. The rest * needs to be munged a bit. The remainder of the arphdr is formatted based * on hwaddr len and ipaddr len. We know what they'll be, so it's easy to - * judge. + * judge. * * Now that the EUI is used for the hardware address all we need to do to make * this work for 1394 is to insert 2 quadlets that contain max_rec size, @@ -1452,7 +1450,7 @@ static inline unsigned int ether1394_enc hdr->common.lf = ETH1394_HDR_LF_IF; hdr->sf.fg_off = 0; break; - + default: hdr->sf.fg_off += adj_max_payload; bufhdr = (union eth1394_hdr *)skb_pull(skb, adj_max_payload); @@ -1499,7 +1497,7 @@ static inline int ether1394_prep_write_p ETH1394_PRINT_G(KERN_ERR, "No more tlabels left while sending " "to node " NODE_BUS_FMT "\n", NODE_BUS_ARGS(host, node)); return -1; - } + } p->header[0] = (p->node_id << 16) | (p->tlabel << 10) | (1 << 8) | (TCODE_WRITEB << 4); @@ -1538,7 +1536,6 @@ static inline void ether1394_free_packet { if (packet->tcode != TCODE_STREAM_DATA) hpsb_free_tlabel(packet); - packet->data = NULL; hpsb_free_packet(packet); } @@ -1583,9 +1580,9 @@ static inline void ether1394_dg_complete { struct sk_buff *skb = ptask->skb; struct net_device *dev = skb->dev; - struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; + struct eth1394_priv *priv = dev->priv; unsigned long flags; - + /* Statistics */ spin_lock_irqsave(&priv->lock, flags); if (fail) { @@ -1616,8 +1613,7 @@ static void ether1394_complete_cb(void * ether1394_free_packet(packet); ptask->outstanding_pkts--; - if (ptask->outstanding_pkts > 0 && !fail) - { + if (ptask->outstanding_pkts > 0 && !fail) { int tx_len; /* Add the encapsulation header to the fragment */ @@ -1637,7 +1633,7 @@ static int ether1394_tx (struct sk_buff { int kmflags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; struct eth1394hdr *eth; - struct eth1394_priv *priv = (struct eth1394_priv *)dev->priv; + struct eth1394_priv *priv = dev->priv; int proto; unsigned long flags; nodeid_t dest_node; @@ -1797,7 +1793,7 @@ static int ether1394_ethtool_ioctl(struc case ETHTOOL_GDRVINFO: { struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO }; strcpy (info.driver, driver_name); - strcpy (info.version, "$Rev: 1175 $"); + strcpy (info.version, "$Rev: 1198 $"); /* FIXME XXX provide sane businfo */ strcpy (info.bus_info, "ieee1394"); if (copy_to_user (useraddr, &info, sizeof (info))) --- linux-2.6.6-rc1/drivers/ieee1394/highlevel.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/highlevel.c 2004-04-18 22:25:24.732070632 -0700 @@ -493,7 +493,7 @@ int hpsb_listen_channel(struct hpsb_high return 0; } -void hpsb_unlisten_channel(struct hpsb_highlevel *hl, struct hpsb_host *host, +void hpsb_unlisten_channel(struct hpsb_highlevel *hl, struct hpsb_host *host, unsigned int channel) { if (channel > 63) { --- linux-2.6.6-rc1/drivers/ieee1394/highlevel.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/highlevel.h 2004-04-18 22:25:24.733070480 -0700 @@ -5,7 +5,7 @@ struct hpsb_address_serve { struct list_head host_list; /* per host list */ - + struct list_head hl_list; /* hpsb_highlevel list */ struct hpsb_address_ops *op; @@ -19,7 +19,7 @@ struct hpsb_address_serve { /* * The above structs are internal to highlevel driver handling. Only the - * following structures are of interest to actual highlevel drivers. + * following structures are of interest to actual highlevel drivers. */ struct hpsb_highlevel { @@ -68,8 +68,8 @@ struct hpsb_highlevel { struct hpsb_address_ops { /* - * Null function pointers will make the respective operation complete - * with RCODE_TYPE_ERROR. Makes for easy to implement read-only + * Null function pointers will make the respective operation complete + * with RCODE_TYPE_ERROR. Makes for easy to implement read-only * registers (just leave everything but read NULL). * * All functions shall return appropriate IEEE 1394 rcodes. @@ -77,7 +77,7 @@ struct hpsb_address_ops { /* These functions have to implement block reads for themselves. */ /* These functions either return a response code - or a negative number. In the first case a response will be generated; in the + or a negative number. In the first case a response will be generated; in the later case, no response will be sent and the driver, that handled the request will send the response itself */ @@ -104,7 +104,7 @@ void highlevel_host_reset(struct hpsb_ho a packet arrives. The flags argument contains the second word of the first header quadlet of the incoming packet (containing transaction label, retry code, transaction code and priority). These functions either return a response code - or a negative number. In the first case a response will be generated; in the + or a negative number. In the first case a response will be generated; in the later case, no response will be sent and the driver, that handled the request will send the response itself. */ @@ -155,7 +155,7 @@ int hpsb_unregister_addrspace(struct hps * Enable or disable receving a certain isochronous channel through the * iso_receive op. */ -int hpsb_listen_channel(struct hpsb_highlevel *hl, struct hpsb_host *host, +int hpsb_listen_channel(struct hpsb_highlevel *hl, struct hpsb_host *host, unsigned int channel); void hpsb_unlisten_channel(struct hpsb_highlevel *hl, struct hpsb_host *host, unsigned int channel); --- linux-2.6.6-rc1/drivers/ieee1394/hosts.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/hosts.c 2004-04-18 22:25:24.733070480 -0700 @@ -126,9 +126,7 @@ struct hpsb_host *hpsb_alloc_host(struct h->hostdata = h + 1; h->driver = drv; - INIT_LIST_HEAD(&h->pending_packets); - spin_lock_init(&h->pending_pkt_lock); - + skb_queue_head_init(&h->pending_packet_queue); INIT_LIST_HEAD(&h->addr_space); init_timer(&h->delayed_reset); --- linux-2.6.6-rc1/drivers/ieee1394/hosts.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/hosts.h 2004-04-18 22:25:24.734070328 -0700 @@ -5,6 +5,8 @@ #include #include #include +#include + #include #include "ieee1394_types.h" @@ -21,8 +23,8 @@ struct hpsb_host { atomic_t generation; - struct list_head pending_packets; - spinlock_t pending_pkt_lock; + struct sk_buff_head pending_packet_queue; + struct timer_list timeout; unsigned long timeout_interval; @@ -164,7 +166,7 @@ struct hpsb_host_driver { * called. Return 0 on success, negative errno on failure. * NOTE: The function must be callable in interrupt context. */ - int (*transmit_packet) (struct hpsb_host *host, + int (*transmit_packet) (struct hpsb_host *host, struct hpsb_packet *packet); /* This function requests miscellanous services from the driver, see --- linux-2.6.6-rc1/drivers/ieee1394/ieee1394_core.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ieee1394/ieee1394_core.c 2004-04-18 22:25:24.738069720 -0700 @@ -31,6 +31,8 @@ #include #include #include +#include + #include #include @@ -56,8 +58,6 @@ MODULE_PARM_DESC(disable_nodemgr, "Disab /* We are GPL, so treat us special */ MODULE_LICENSE("GPL"); -static kmem_cache_t *hpsb_packet_cache; - /* Some globals used */ const char *hpsb_speedto_str[] = { "S100", "S200", "S400", "S800", "S1600", "S3200" }; @@ -122,30 +122,27 @@ void hpsb_set_packet_complete_task(struc struct hpsb_packet *hpsb_alloc_packet(size_t data_size) { struct hpsb_packet *packet = NULL; - void *data = NULL; - int gfp_flags = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL; + struct sk_buff *skb; - packet = kmem_cache_alloc(hpsb_packet_cache, gfp_flags); - if (packet == NULL) + data_size = ((data_size + 3) & ~3); + + skb = alloc_skb(data_size + sizeof(*packet), GFP_ATOMIC); + if (skb == NULL) return NULL; - memset(packet, 0, sizeof(*packet)); + memset(skb->data, 0, data_size + sizeof(*packet)); + + packet = (struct hpsb_packet *)skb->data; + packet->skb = skb; packet->header = packet->embedded_header; - INIT_LIST_HEAD(&packet->list); packet->state = hpsb_unused; packet->generation = -1; + INIT_LIST_HEAD(&packet->driver_list); atomic_set(&packet->refcnt, 1); if (data_size) { - data_size = (data_size + 3) & ~3; - data = kmalloc(data_size + 8, gfp_flags); - if (data == NULL) { - kmem_cache_free(hpsb_packet_cache, packet); - return NULL; - } - - packet->data = data; + packet->data = (quadlet_t *)(skb->data + sizeof(*packet)); packet->data_size = data_size; } @@ -162,8 +159,8 @@ struct hpsb_packet *hpsb_alloc_packet(si void hpsb_free_packet(struct hpsb_packet *packet) { if (packet && atomic_dec_and_test(&packet->refcnt)) { - kfree(packet->data); - kmem_cache_free(hpsb_packet_cache, packet); + BUG_ON(!list_empty(&packet->driver_list)); + kfree_skb(packet->skb); } } @@ -219,13 +216,13 @@ static int check_selfids(struct hpsb_hos if (!sid->extended) { nodeid++; esid_seq = 0; - + if (sid->phy_id != nodeid) { HPSB_INFO("SelfIDs failed monotony check with " "%d", sid->phy_id); return 0; } - + if (sid->link_active) { host->nodes_active++; if (sid->contender) @@ -234,7 +231,7 @@ static int check_selfids(struct hpsb_hos } else { esid = (struct ext_selfid *)sid; - if ((esid->phy_id != nodeid) + if ((esid->phy_id != nodeid) || (esid->seq_nr != esid_seq)) { HPSB_INFO("SelfIDs failed monotony check with " "%d/%d", esid->phy_id, esid->seq_nr); @@ -244,24 +241,24 @@ static int check_selfids(struct hpsb_hos } sid++; } - + esid = (struct ext_selfid *)(sid - 1); while (esid->extended) { if ((esid->porta == 0x2) || (esid->portb == 0x2) || (esid->portc == 0x2) || (esid->portd == 0x2) || (esid->porte == 0x2) || (esid->portf == 0x2) || (esid->portg == 0x2) || (esid->porth == 0x2)) { - HPSB_INFO("SelfIDs failed root check on " - "extended SelfID"); - return 0; + HPSB_INFO("SelfIDs failed root check on " + "extended SelfID"); + return 0; } esid--; } sid = (struct selfid *)esid; if ((sid->port0 == 0x2) || (sid->port1 == 0x2) || (sid->port2 == 0x2)) { - HPSB_INFO("SelfIDs failed root check"); - return 0; + HPSB_INFO("SelfIDs failed root check"); + return 0; } host->node_count = nodeid + 1; @@ -400,7 +397,7 @@ void hpsb_selfid_complete(struct hpsb_ho } -void hpsb_packet_sent(struct hpsb_host *host, struct hpsb_packet *packet, +void hpsb_packet_sent(struct hpsb_host *host, struct hpsb_packet *packet, int ackcode) { packet->ack_code = ackcode; @@ -413,7 +410,7 @@ void hpsb_packet_sent(struct hpsb_host * if (ackcode != ACK_PENDING || !packet->expect_response) { atomic_dec(&packet->refcnt); - list_del(&packet->list); + skb_unlink(packet->skb); packet->state = hpsb_complete; queue_packet_complete(packet); return; @@ -505,17 +502,17 @@ int hpsb_send_packet(struct hpsb_packet packet->state = hpsb_queued; - if (!packet->no_waiter || packet->expect_response) { - unsigned long flags; + /* This just seems silly to me */ + WARN_ON(packet->no_waiter && packet->expect_response); + if (!packet->no_waiter || packet->expect_response) { atomic_inc(&packet->refcnt); - spin_lock_irqsave(&host->pending_pkt_lock, flags); - list_add_tail(&packet->list, &host->pending_packets); - spin_unlock_irqrestore(&host->pending_pkt_lock, flags); + skb_queue_tail(&host->pending_packet_queue, packet->skb); } - if (packet->node_id == host->node_id) - { /* it is a local request, so handle it locally */ + if (packet->node_id == host->node_id) { + /* it is a local request, so handle it locally */ + quadlet_t *data; size_t size = packet->data_size + packet->header_size; @@ -547,6 +544,7 @@ int hpsb_send_packet(struct hpsb_packet + NODEID_TO_NODE(packet->node_id)]; } +#ifdef CONFIG_IEEE1394_VERBOSEDEBUG switch (packet->speed_code) { case 2: dump_packet("send packet 400:", packet->header, @@ -560,6 +558,7 @@ int hpsb_send_packet(struct hpsb_packet dump_packet("send packet 100:", packet->header, packet->header_size); } +#endif return host->driver->transmit_packet(host, packet); } @@ -595,80 +594,78 @@ static void send_packet_nocare(struct hp } -void handle_packet_response(struct hpsb_host *host, int tcode, quadlet_t *data, - size_t size) +static void handle_packet_response(struct hpsb_host *host, int tcode, + quadlet_t *data, size_t size) { struct hpsb_packet *packet = NULL; - struct list_head *lh; + struct sk_buff *skb; int tcode_match = 0; int tlabel; unsigned long flags; tlabel = (data[0] >> 10) & 0x3f; - spin_lock_irqsave(&host->pending_pkt_lock, flags); + spin_lock_irqsave(&host->pending_packet_queue.lock, flags); - list_for_each(lh, &host->pending_packets) { - packet = list_entry(lh, struct hpsb_packet, list); + skb_queue_walk(&host->pending_packet_queue, skb) { + packet = (struct hpsb_packet *)skb->data; if ((packet->tlabel == tlabel) && (packet->node_id == (data[1] >> 16))){ break; } + + packet = NULL; } - if (lh == &host->pending_packets) { + if (packet == NULL) { HPSB_DEBUG("unsolicited response packet received - no tlabel match"); dump_packet("contents:", data, 16); - spin_unlock_irqrestore(&host->pending_pkt_lock, flags); + spin_unlock_irqrestore(&host->pending_packet_queue.lock, flags); return; } switch (packet->tcode) { case TCODE_WRITEQ: case TCODE_WRITEB: - if (tcode == TCODE_WRITE_RESPONSE) tcode_match = 1; + if (tcode != TCODE_WRITE_RESPONSE) + break; + tcode_match = 1; + memcpy(packet->header, data, 12); break; case TCODE_READQ: - if (tcode == TCODE_READQ_RESPONSE) tcode_match = 1; + if (tcode != TCODE_READQ_RESPONSE) + break; + tcode_match = 1; + memcpy(packet->header, data, 16); break; case TCODE_READB: - if (tcode == TCODE_READB_RESPONSE) tcode_match = 1; + if (tcode != TCODE_READB_RESPONSE) + break; + tcode_match = 1; + BUG_ON(packet->skb->len - sizeof(*packet) < size - 16); + memcpy(packet->header, data, 16); + memcpy(packet->data, data + 4, size - 16); break; case TCODE_LOCK_REQUEST: - if (tcode == TCODE_LOCK_RESPONSE) tcode_match = 1; + if (tcode != TCODE_LOCK_RESPONSE) + break; + tcode_match = 1; + size = min((size - 16), (size_t)8); + BUG_ON(packet->skb->len - sizeof(*packet) < size); + memcpy(packet->header, data, 16); + memcpy(packet->data, data + 4, size); break; } - if (!tcode_match || (packet->tlabel != tlabel) - || (packet->node_id != (data[1] >> 16))) { + if (!tcode_match) { HPSB_INFO("unsolicited response packet received - tcode mismatch"); dump_packet("contents:", data, 16); - - spin_unlock_irqrestore(&host->pending_pkt_lock, flags); + spin_unlock_irqrestore(&host->pending_packet_queue.lock, flags); return; } - list_del(&packet->list); - - spin_unlock_irqrestore(&host->pending_pkt_lock, flags); - - /* FIXME - update size fields? */ - switch (tcode) { - case TCODE_WRITE_RESPONSE: - memcpy(packet->header, data, 12); - break; - case TCODE_READQ_RESPONSE: - memcpy(packet->header, data, 16); - break; - case TCODE_READB_RESPONSE: - memcpy(packet->header, data, 16); - memcpy(packet->data, data + 4, size - 16); - break; - case TCODE_LOCK_RESPONSE: - memcpy(packet->header, data, 16); - memcpy(packet->data, data + 4, (size - 16) > 8 ? 8 : size - 16); - break; - } + __skb_unlink(skb, skb->list); + spin_unlock_irqrestore(&host->pending_packet_queue.lock, flags); if (packet->state == hpsb_queued) { packet->sendtime = jiffies; @@ -685,10 +682,8 @@ static struct hpsb_packet *create_reply_ { struct hpsb_packet *p; - dsize += (dsize % 4 ? 4 - (dsize % 4) : 0); - p = hpsb_alloc_packet(dsize); - if (p == NULL) { + if (unlikely(p == NULL)) { /* FIXME - send data_error response */ return NULL; } @@ -702,9 +697,8 @@ static struct hpsb_packet *create_reply_ p->generation = get_hpsb_generation(host); - if (dsize % 4) { - p->data[dsize / 4] = 0; - } + if (dsize % 4) + p->data[dsize / 4] = 0; return p; } @@ -851,11 +845,11 @@ static void handle_incoming_packet(struc fill_async_lock_resp(packet, rcode, extcode, 4); break; case 8: - if ((extcode != EXTCODE_FETCH_ADD) + if ((extcode != EXTCODE_FETCH_ADD) && (extcode != EXTCODE_LITTLE_ADD)) { rcode = highlevel_lock(host, source, packet->data, addr, - data[5], data[4], + data[5], data[4], extcode, flags); fill_async_lock_resp(packet, rcode, extcode, 4); } else { @@ -870,7 +864,7 @@ static void handle_incoming_packet(struc rcode = highlevel_lock64(host, source, (octlet_t *)packet->data, addr, *(octlet_t *)(data + 6), - *(octlet_t *)(data + 4), + *(octlet_t *)(data + 4), extcode, flags); fill_async_lock_resp(packet, rcode, extcode, 8); break; @@ -932,7 +926,7 @@ void hpsb_packet_received(struct hpsb_ho break; default: - HPSB_NOTICE("received packet with bogus transaction code %d", + HPSB_NOTICE("received packet with bogus transaction code %d", tcode); break; } @@ -941,74 +935,75 @@ void hpsb_packet_received(struct hpsb_ho void abort_requests(struct hpsb_host *host) { - unsigned long flags; - struct hpsb_packet *packet, *packet_next; - LIST_HEAD(llist); + struct hpsb_packet *packet; + struct sk_buff *skb; + + host->driver->devctl(host, CANCEL_REQUESTS, 0); - host->driver->devctl(host, CANCEL_REQUESTS, 0); + while ((skb = skb_dequeue(&host->pending_packet_queue)) != NULL) { + packet = (struct hpsb_packet *)skb->data; - spin_lock_irqsave(&host->pending_pkt_lock, flags); - list_splice(&host->pending_packets, &llist); - INIT_LIST_HEAD(&host->pending_packets); - spin_unlock_irqrestore(&host->pending_pkt_lock, flags); - - list_for_each_entry_safe(packet, packet_next, &llist, list) { - list_del(&packet->list); - packet->state = hpsb_complete; - packet->ack_code = ACKX_ABORTED; + packet->state = hpsb_complete; + packet->ack_code = ACKX_ABORTED; queue_packet_complete(packet); - } + } } void abort_timedouts(unsigned long __opaque) { struct hpsb_host *host = (struct hpsb_host *)__opaque; - unsigned long flags; - struct hpsb_packet *packet, *packet_next; - unsigned long expire; - LIST_HEAD(expiredlist); + unsigned long flags; + struct hpsb_packet *packet; + struct sk_buff *skb; + unsigned long expire; - spin_lock_irqsave(&host->csr.lock, flags); + spin_lock_irqsave(&host->csr.lock, flags); expire = host->csr.expire; - spin_unlock_irqrestore(&host->csr.lock, flags); - - spin_lock_irqsave(&host->pending_pkt_lock, flags); + spin_unlock_irqrestore(&host->csr.lock, flags); - list_for_each_entry_safe(packet, packet_next, &host->pending_packets, list) { - if (time_before(packet->sendtime + expire, jiffies)) { - list_del(&packet->list); - list_add(&packet->list, &expiredlist); - } - } + /* Hold the lock around this, since we aren't dequeuing all + * packets, just ones we need. */ + spin_lock_irqsave(&host->pending_packet_queue.lock, flags); + + while (!skb_queue_empty(&host->pending_packet_queue)) { + skb = skb_peek(&host->pending_packet_queue); + + packet = (struct hpsb_packet *)skb->data; + + if (time_before(packet->sendtime + expire, jiffies)) { + __skb_unlink(skb, skb->list); + packet->state = hpsb_complete; + packet->ack_code = ACKX_TIMEOUT; + queue_packet_complete(packet); + } else { + /* Since packets are added to the tail, the oldest + * ones are first, always. When we get to one that + * isn't timed out, the rest aren't either. */ + break; + } + } - if (!list_empty(&host->pending_packets)) + if (!skb_queue_empty(&host->pending_packet_queue)) mod_timer(&host->timeout, jiffies + host->timeout_interval); - spin_unlock_irqrestore(&host->pending_pkt_lock, flags); - - list_for_each_entry_safe(packet, packet_next, &expiredlist, list) { - list_del(&packet->list); - packet->state = hpsb_complete; - packet->ack_code = ACKX_TIMEOUT; - queue_packet_complete(packet); - } + spin_unlock_irqrestore(&host->pending_packet_queue.lock, flags); } + +/* Kernel thread and vars, which handles packets that are completed. Only + * packets that have a "complete" function are sent here. This way, the + * completion is run out of kernel context, and doesn't block the rest of + * the stack. */ static int khpsbpkt_pid = -1; static DECLARE_COMPLETION(khpsbpkt_complete); -static LIST_HEAD(hpsbpkt_list); +struct sk_buff_head hpsbpkt_queue; static DECLARE_MUTEX_LOCKED(khpsbpkt_sig); -static spinlock_t khpsbpkt_lock = SPIN_LOCK_UNLOCKED; static void queue_packet_complete(struct hpsb_packet *packet) { if (packet->complete_routine != NULL) { - unsigned long flags; - - spin_lock_irqsave(&khpsbpkt_lock, flags); - list_add_tail(&packet->list, &hpsbpkt_list); - spin_unlock_irqrestore(&khpsbpkt_lock, flags); + skb_queue_tail(&hpsbpkt_queue, packet->skb); /* Signal the kernel thread to handle this */ up(&khpsbpkt_sig); @@ -1018,24 +1013,24 @@ static void queue_packet_complete(struct static int hpsbpkt_thread(void *__hi) { - struct hpsb_packet *packet, *next; - unsigned long flags; + struct sk_buff *skb; + struct hpsb_packet *packet; + void (*complete_routine)(void*); + void *complete_data; daemonize("khpsbpkt"); - allow_signal(SIGTERM); while (!down_interruptible(&khpsbpkt_sig)) { - spin_lock_irqsave(&khpsbpkt_lock, flags); - list_for_each_entry_safe(packet, next, &hpsbpkt_list, list) { - void (*complete_routine)(void*) = packet->complete_routine; - void *complete_data = packet->complete_data; + while ((skb = skb_dequeue(&hpsbpkt_queue)) != NULL) { + packet = (struct hpsb_packet *)skb->data; + + complete_routine = packet->complete_routine; + complete_data = packet->complete_data; - list_del(&packet->list); packet->complete_routine = packet->complete_data = NULL; complete_routine(complete_data); } - spin_unlock_irqrestore(&khpsbpkt_lock, flags); } complete_and_exit(&khpsbpkt_complete, 0); @@ -1046,6 +1041,8 @@ static int __init ieee1394_init(void) { int i; + skb_queue_head_init(&hpsbpkt_queue); + if (hpsb_init_config_roms()) { HPSB_ERR("Failed to initialize some config rom entries.\n"); HPSB_ERR("Some features may not be available\n"); @@ -1066,9 +1063,6 @@ static int __init ieee1394_init(void) devfs_mk_dir("ieee1394"); - hpsb_packet_cache = kmem_cache_create("hpsb_packet", sizeof(struct hpsb_packet), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - bus_register(&ieee1394_bus_type); for (i = 0; fw_bus_attrs[i]; i++) bus_create_file(&ieee1394_bus_type, fw_bus_attrs[i]); @@ -1104,8 +1098,6 @@ static void __exit ieee1394_cleanup(void wait_for_completion(&khpsbpkt_complete); } - kmem_cache_destroy(hpsb_packet_cache); - hpsb_cleanup_config_roms(); unregister_chrdev_region(IEEE1394_CORE_DEV, 256); --- linux-2.6.6-rc1/drivers/ieee1394/ieee1394_core.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/ieee1394_core.h 2004-04-18 22:25:24.738069720 -0700 @@ -12,9 +12,13 @@ struct hpsb_packet { /* This struct is basically read-only for hosts with the exception of * the data buffer contents and xnext - see below. */ - struct list_head list; - /* This can be used for host driver internal linking. */ + /* This can be used for host driver internal linking. + * + * NOTE: This must be left in init state when the driver is done + * with it (e.g. by using list_del_init()), since the core does + * some sanity checks to make sure the packet is not on a + * driver_list when free'ing it. */ struct list_head driver_list; nodeid_t node_id; @@ -27,10 +31,9 @@ struct hpsb_packet { * queued = queued for sending * pending = sent, waiting for response * complete = processing completed, successful or not - * incoming = incoming packet */ - enum { - hpsb_unused, hpsb_queued, hpsb_pending, hpsb_complete, hpsb_incoming + enum { + hpsb_unused, hpsb_queued, hpsb_pending, hpsb_complete } __attribute__((packed)) state; /* These are core internal. */ @@ -67,6 +70,9 @@ struct hpsb_packet { void (*complete_routine)(void *); void *complete_data; + /* XXX This is just a hack at the moment */ + struct sk_buff *skb; + /* Store jiffies for implementing bus timeouts. */ unsigned long sendtime; @@ -141,7 +147,7 @@ int hpsb_bus_reset(struct hpsb_host *hos */ void hpsb_selfid_received(struct hpsb_host *host, quadlet_t sid); -/* +/* * Notify completion of SelfID stage to the core and report new physical ID * and whether host is root now. */ --- linux-2.6.6-rc1/drivers/ieee1394/ieee1394.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/ieee1394.h 2004-04-18 22:25:24.734070328 -0700 @@ -39,7 +39,7 @@ #define ACK_TARDY 0xb #define ACK_CONFLICT_ERROR 0xc #define ACK_DATA_ERROR 0xd -#define ACK_TYPE_ERROR 0xe +#define ACK_TYPE_ERROR 0xe #define ACK_ADDRESS_ERROR 0xf /* Non-standard "ACK codes" for internal use */ @@ -74,7 +74,7 @@ extern const char *hpsb_speedto_str[]; #define SELFID_PORT_CHILD 0x3 #define SELFID_PORT_PARENT 0x2 #define SELFID_PORT_NCONN 0x1 -#define SELFID_PORT_NONE 0x0 +#define SELFID_PORT_NONE 0x0 #include --- linux-2.6.6-rc1/drivers/ieee1394/ieee1394_transactions.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/ieee1394_transactions.c 2004-04-18 22:25:24.740069416 -0700 @@ -67,7 +67,7 @@ static void fill_async_writeblock(struct packet->data_size = length + (length % 4 ? 4 - (length % 4) : 0); } -static void fill_async_lock(struct hpsb_packet *packet, u64 addr, int extcode, +static void fill_async_lock(struct hpsb_packet *packet, u64 addr, int extcode, int length) { PREP_ASYNC_HEAD_ADDRESS(TCODE_LOCK_REQUEST); @@ -89,10 +89,10 @@ static void fill_iso_packet(struct hpsb_ packet->tcode = TCODE_ISO_DATA; } -static void fill_phy_packet(struct hpsb_packet *packet, quadlet_t data) -{ +static void fill_phy_packet(struct hpsb_packet *packet, quadlet_t data) +{ packet->header[0] = data; - packet->header[1] = ~data; + packet->header[1] = ~data; packet->header_size = 8; packet->data_size = 0; packet->expect_response = 0; @@ -145,7 +145,7 @@ int hpsb_get_tlabel(struct hpsb_packet * } spin_lock_irqsave(&tp->lock, flags); - + packet->tlabel = find_next_zero_bit(tp->pool, 64, tp->next); if (packet->tlabel > 63) packet->tlabel = find_first_zero_bit(tp->pool, 64); @@ -158,7 +158,7 @@ int hpsb_get_tlabel(struct hpsb_packet * return 0; } -/** +/** * hpsb_free_tlabel - free an allocated transaction label * @packet: packet whos tlabel/tpool needs to be cleared * @@ -173,7 +173,7 @@ void hpsb_free_tlabel(struct hpsb_packet { unsigned long flags; struct hpsb_tlabel_pool *tp; - + tp = &packet->host->tpool[packet->node_id & NODE_MASK]; BUG_ON(packet->tlabel > 63 || packet->tlabel < 0); @@ -204,7 +204,7 @@ int hpsb_packet_success(struct hpsb_pack return -EINVAL; default: HPSB_ERR("received reserved rcode %d from node %d", - (packet->header[1] >> 12) & 0xf, + (packet->header[1] >> 12) & 0xf, packet->node_id); return -EAGAIN; } @@ -268,7 +268,7 @@ struct hpsb_packet *hpsb_make_readpacket if (length == 0) return NULL; - packet = hpsb_alloc_packet((length + 3) & ~3); + packet = hpsb_alloc_packet(length); if (!packet) return NULL; @@ -296,7 +296,7 @@ struct hpsb_packet *hpsb_make_writepacke if (length == 0) return NULL; - packet = hpsb_alloc_packet((length + 3) & ~3); + packet = hpsb_alloc_packet(length); if (!packet) return NULL; @@ -330,7 +330,7 @@ struct hpsb_packet *hpsb_make_streampack if (length == 0) return NULL; - packet = hpsb_alloc_packet((length + 3) & ~3); + packet = hpsb_alloc_packet(length); if (!packet) return NULL; @@ -338,7 +338,7 @@ struct hpsb_packet *hpsb_make_streampack packet->data[length >> 2] = 0; } packet->host = host; - + if (hpsb_get_tlabel(packet)) { hpsb_free_packet(packet); return NULL; @@ -430,17 +430,17 @@ struct hpsb_packet *hpsb_make_lock64pack } struct hpsb_packet *hpsb_make_phypacket(struct hpsb_host *host, - quadlet_t data) + quadlet_t data) { - struct hpsb_packet *p; + struct hpsb_packet *p; - p = hpsb_alloc_packet(0); - if (!p) return NULL; + p = hpsb_alloc_packet(0); + if (!p) return NULL; - p->host = host; - fill_phy_packet(p, data); + p->host = host; + fill_phy_packet(p, data); - return p; + return p; } struct hpsb_packet *hpsb_make_isopacket(struct hpsb_host *host, @@ -470,7 +470,7 @@ int hpsb_read(struct hpsb_host *host, no { struct hpsb_packet *packet; int retval = 0; - + if (length == 0) return -EINVAL; @@ -544,7 +544,7 @@ int hpsb_lock(struct hpsb_host *host, no BUG_ON(in_interrupt()); // We can't be called in an interrupt, yet - packet = hpsb_make_lockpacket(host, node, addr, extcode, data, arg); + packet = hpsb_make_lockpacket(host, node, addr, extcode, data, arg); if (!packet) return -ENOMEM; @@ -607,7 +607,7 @@ int hpsb_send_gasp(struct hpsb_host *hos HPSB_VERBOSE("Send GASP: channel = %d, length = %Zd", channel, length); length += 8; - + packet = hpsb_make_streampacket(host, NULL, length, channel, 3, 0); if (!packet) return -ENOMEM; --- linux-2.6.6-rc1/drivers/ieee1394/iso.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/iso.c 2004-04-18 22:25:24.740069416 -0700 @@ -274,7 +274,7 @@ int hpsb_iso_recv_start(struct hpsb_iso cycle %= 8000; isoctl_args[0] = cycle; - + if (tag_mask < 0) /* match all tags */ tag_mask = 0xF; @@ -358,7 +358,7 @@ int hpsb_iso_xmit_queue_packet(struct hp } } -out: +out: spin_unlock_irqrestore(&iso->lock, flags); return rv; } --- linux-2.6.6-rc1/drivers/ieee1394/Kconfig 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/Kconfig 2004-04-18 22:25:24.711073824 -0700 @@ -124,7 +124,7 @@ config IEEE1394_SBP2_PHYS_DMA config IEEE1394_ETH1394 tristate "Ethernet over 1394" - depends on IEEE1394 && EXPERIMENTAL + depends on IEEE1394 && EXPERIMENTAL && INET select IEEE1394_CONFIG_ROM_IP1394 select IEEE1394_EXTRA_CONFIG_ROMS help --- linux-2.6.6-rc1/drivers/ieee1394/nodemgr.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/nodemgr.c 2004-04-18 22:25:24.742069112 -0700 @@ -88,7 +88,7 @@ static struct csr1212_bus_ops nodemgr_cs }; -/* +/* * Basically what we do here is start off retrieving the bus_info block. * From there will fill in some info about the node, verify it is of IEEE * 1394 type, and that the crc checks out ok. After that we start off with @@ -102,7 +102,7 @@ static struct csr1212_bus_ops nodemgr_cs * that's easy to parse by the protocol interface. */ -/* +/* * The nodemgr relies heavily on the Driver Model for device callbacks and * driver/device mappings. The old nodemgr used to handle all this itself, * but now we are much simpler because of the LDM. @@ -273,7 +273,7 @@ static ssize_t fw_show_ne_bus_options(st ne->busopt.irmc, ne->busopt.cmc, ne->busopt.isc, ne->busopt.bmc, ne->busopt.pmc, ne->busopt.generation, ne->busopt.lnkspd, - ne->busopt.max_rec, + ne->busopt.max_rec, ne->busopt.max_rom, ne->busopt.cyc_clk_acc); } @@ -328,7 +328,7 @@ static ssize_t fw_get_ignore_driver(stru struct unit_directory *ud = container_of(dev, struct unit_directory, device); return sprintf(buf, "%d\n", ud->ignore_driver); -} +} static DEVICE_ATTR(ignore_driver, S_IWUSR | S_IRUGO, fw_get_ignore_driver, fw_set_ignore_driver); @@ -356,7 +356,6 @@ static int nodemgr_rescan_bus_thread(voi { /* No userlevel access needed */ daemonize("kfwrescan"); - allow_signal(SIGTERM); bus_rescan_devices(&ieee1394_bus_type); @@ -726,7 +725,7 @@ static void nodemgr_update_bus_options(s ne->busopt.max_rom = (busoptions >> 8) & 0x3; ne->busopt.generation = (busoptions >> 4) & 0xf; ne->busopt.lnkspd = busoptions & 0x7; - + HPSB_VERBOSE("NodeMgr: raw=0x%08x irmc=%d cmc=%d isc=%d bmc=%d pmc=%d " "cyc_clk_acc=%d max_rec=%d max_rom=%d gen=%d lspd=%d", busoptions, ne->busopt.irmc, ne->busopt.cmc, @@ -1012,7 +1011,7 @@ static void nodemgr_process_root_directo case CSR1212_KV_ID_UNIT: nodemgr_process_unit_directory(hi, ne, kv, &ud_id, NULL); - break; + break; case CSR1212_KV_ID_DESCRIPTOR: if (last_key_id == CSR1212_KV_ID_VENDOR) { @@ -1056,13 +1055,14 @@ static int nodemgr_hotplug(struct class_ #define PUT_ENVP(fmt,val) \ do { \ + int printed; \ envp[i++] = buffer; \ - length += snprintf(buffer, buffer_size - length, \ + printed = snprintf(buffer, buffer_size - length, \ fmt, val); \ - if ((buffer_size - length <= 0) || (i >= num_envp)) \ + if ((buffer_size - (length+printed) <= 0) || (i >= num_envp)) \ return -ENOMEM; \ - ++length; \ - buffer += length; \ + length += printed+1; \ + buffer += printed+1; \ } while (0) PUT_ENVP("VENDOR_ID=%06x", ud->vendor_id); @@ -1084,7 +1084,7 @@ static int nodemgr_hotplug(struct class_ char *buffer, int buffer_size) { return -ENODEV; -} +} #endif /* CONFIG_HOTPLUG */ @@ -1150,7 +1150,6 @@ static void nodemgr_update_node(struct n ne->generation = generation; } - static void nodemgr_node_scan_one(struct host_info *hi, @@ -1381,8 +1380,9 @@ static void nodemgr_node_probe(struct ho static int nodemgr_do_irm_duties(struct hpsb_host *host, int cycles) { quadlet_t bc; - - if (!host->is_irm) + + /* if irm_id == -1 then there is no IRM on this bus */ + if (!host->is_irm || host->irm_id == (nodeid_t)-1) return 1; host->csr.broadcast_channel |= 0x40000000; /* set validity bit */ @@ -1467,7 +1467,6 @@ static int nodemgr_host_thread(void *__h /* No userlevel access needed */ daemonize(hi->daemon_name); - allow_signal(SIGTERM); /* Setup our device-model entries */ nodemgr_create_host_dev_files(host); @@ -1611,7 +1610,7 @@ int hpsb_node_read(struct node_entry *ne addr, buffer, length); } -int hpsb_node_write(struct node_entry *ne, u64 addr, +int hpsb_node_write(struct node_entry *ne, u64 addr, quadlet_t *buffer, size_t length) { unsigned int generation = ne->generation; @@ -1621,7 +1620,7 @@ int hpsb_node_write(struct node_entry *n addr, buffer, length); } -int hpsb_node_lock(struct node_entry *ne, u64 addr, +int hpsb_node_lock(struct node_entry *ne, u64 addr, int extcode, quadlet_t *data, quadlet_t arg) { unsigned int generation = ne->generation; --- linux-2.6.6-rc1/drivers/ieee1394/nodemgr.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/nodemgr.h 2004-04-18 22:25:24.742069112 -0700 @@ -169,7 +169,7 @@ struct hpsb_host *hpsb_get_host_by_ne(st /* * This will fill in the given, pre-initialised hpsb_packet with the current * information from the node entry (host, node ID, generation number). It will - * return false if the node owning the GUID is not accessible (and not modify the + * return false if the node owning the GUID is not accessible (and not modify the * hpsb_packet) and return true otherwise. * * Note that packet sending may still fail in hpsb_send_packet if a bus reset @@ -181,9 +181,9 @@ void hpsb_node_fill_packet(struct node_e int hpsb_node_read(struct node_entry *ne, u64 addr, quadlet_t *buffer, size_t length); -int hpsb_node_write(struct node_entry *ne, u64 addr, +int hpsb_node_write(struct node_entry *ne, u64 addr, quadlet_t *buffer, size_t length); -int hpsb_node_lock(struct node_entry *ne, u64 addr, +int hpsb_node_lock(struct node_entry *ne, u64 addr, int extcode, quadlet_t *data, quadlet_t arg); --- linux-2.6.6-rc1/drivers/ieee1394/ohci1394.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ieee1394/ohci1394.c 2004-04-18 22:25:24.750067896 -0700 @@ -32,7 +32,7 @@ * Things implemented, but still in test phase: * . Iso Transmit * . Async Stream Packets Transmit (Receive done via Iso interface) - * + * * Things not implemented: * . DMA error recovery * @@ -41,7 +41,7 @@ * added LONG_RESET_ROOT and SHORT_RESET_ROOT for root holdoff --kk */ -/* +/* * Acknowledgments: * * Adam J Richter @@ -162,7 +162,7 @@ printk(level "%s: " fmt "\n" , OHCI1394_ printk(level "%s: fw-host%d: " fmt "\n" , OHCI1394_DRIVER_NAME, ohci->host->id , ## args) static char version[] __devinitdata = - "$Rev: 1172 $ Ben Collins "; + "$Rev: 1203 $ Ben Collins "; /* Module Parameters */ static int phys_dma = 1; @@ -185,7 +185,7 @@ static int alloc_dma_trm_ctx(struct ti_o static void ohci1394_pci_remove(struct pci_dev *pdev); #ifndef __LITTLE_ENDIAN -static unsigned hdr_sizes[] = +static unsigned hdr_sizes[] = { 3, /* TCODE_WRITEQ */ 4, /* TCODE_WRITEB */ @@ -221,7 +221,7 @@ static inline void packet_swab(quadlet_t * IEEE-1394 functionality section * ***********************************/ -static u8 get_phy_reg(struct ti_ohci *ohci, u8 addr) +static u8 get_phy_reg(struct ti_ohci *ohci, u8 addr) { int i; unsigned long flags; @@ -243,9 +243,9 @@ static u8 get_phy_reg(struct ti_ohci *oh if (i >= OHCI_LOOP_COUNT) PRINT (KERN_ERR, "Get PHY Reg timeout [0x%08x/0x%08x/%d]", r, r & 0x80000000, i); - + spin_unlock_irqrestore (&ohci->phy_reg_lock, flags); - + return (r & 0x00ff0000) >> 16; } @@ -303,7 +303,7 @@ static void handle_selfid(struct ti_ohci else q0 = q[0]; - if ((self_id_count & 0x80000000) || + if ((self_id_count & 0x80000000) || ((self_id_count & 0x00FF0000) != (q0 & 0x00FF0000))) { PRINT(KERN_ERR, "Error in reception of SelfID packets [0x%08x/0x%08x] (count: %d)", @@ -335,7 +335,7 @@ static void handle_selfid(struct ti_ohci q0 = q[0]; q1 = q[1]; } - + if (q0 == ~q1) { DBGMSG ("SelfID packet 0x%x received", q0); hpsb_selfid_received(host, cpu_to_be32(q0)); @@ -358,7 +358,7 @@ static void ohci_soft_reset(struct ti_oh int i; reg_write(ohci, OHCI1394_HCControlSet, OHCI1394_HCControl_softReset); - + for (i = 0; i < OHCI_LOOP_COUNT; i++) { if (!(reg_read(ohci, OHCI1394_HCControlSet) & OHCI1394_HCControl_softReset)) break; @@ -367,32 +367,6 @@ static void ohci_soft_reset(struct ti_oh DBGMSG ("Soft reset finished"); } -static int run_context(struct ti_ohci *ohci, int reg, char *msg) -{ - u32 nodeId; - - /* check that the node id is valid */ - nodeId = reg_read(ohci, OHCI1394_NodeID); - if (!(nodeId&0x80000000)) { - PRINT(KERN_ERR, - "Running dma failed because Node ID is not valid"); - return -1; - } - - /* check that the node number != 63 */ - if ((nodeId&0x3f)==63) { - PRINT(KERN_ERR, - "Running dma failed because Node ID == 63"); - return -1; - } - - /* Run the dma context */ - reg_write(ohci, reg, 0x8000); - - if (msg) PRINT(KERN_DEBUG, "%s", msg); - - return 0; -} /* Generate the dma receive prgs and start the context */ static void initialize_dma_rcv_ctx(struct dma_rcv_ctx *d, int generate_irq) @@ -404,7 +378,7 @@ static void initialize_dma_rcv_ctx(struc for (i=0; inum_desc; i++) { u32 c; - + c = DMA_CTL_INPUT_MORE | DMA_CTL_UPDATE | DMA_CTL_BRANCH; if (generate_irq) c |= DMA_CTL_IRQ; @@ -433,7 +407,7 @@ static void initialize_dma_rcv_ctx(struc /* Set bufferFill, isochHeader, multichannel for IR context */ reg_write(ohci, d->ctrlSet, 0xd0000000); - + /* Set the context match register to match on all tags */ reg_write(ohci, d->ctxtMatch, 0xf0000000); @@ -505,7 +479,7 @@ static void ohci_initialize(struct ti_oh spin_lock_init(&ohci->phy_reg_lock); spin_lock_init(&ohci->event_lock); - + /* Put some defaults to these undefined bus options */ buf = reg_read(ohci, OHCI1394_BusOptions); buf |= 0xE0000000; /* Enable IRMC, CMC and ISC */ @@ -521,7 +495,7 @@ static void ohci_initialize(struct ti_oh /* Clear link control register */ reg_write(ohci, OHCI1394_LinkControlClear, 0xffffffff); - + /* Enable cycle timer and cycle master and set the IRM * contender bit in our self ID packets. */ reg_write(ohci, OHCI1394_LinkControlSet, OHCI1394_LinkControl_CycleTimerEnable | @@ -539,10 +513,10 @@ static void ohci_initialize(struct ti_oh reg_write(ohci, OHCI1394_ConfigROMmap, ohci->csr_config_rom_bus); /* Now get our max packet size */ - ohci->max_packet_size = + ohci->max_packet_size = 1<<(((reg_read(ohci, OHCI1394_BusOptions)>>12)&0xf)+1); - /* Don't accept phy packets into AR request context */ + /* Don't accept phy packets into AR request context */ reg_write(ohci, OHCI1394_LinkControlClear, 0x00000400); /* Clear the interrupt mask */ @@ -561,15 +535,15 @@ static void ohci_initialize(struct ti_oh initialize_dma_trm_ctx(&ohci->at_req_context); initialize_dma_trm_ctx(&ohci->at_resp_context); - /* - * Accept AT requests from all nodes. This probably + /* + * Accept AT requests from all nodes. This probably * will have to be controlled from the subsystem * on a per node basis. */ reg_write(ohci,OHCI1394_AsReqFilterHiSet, 0x80000000); /* Specify AT retries */ - reg_write(ohci, OHCI1394_ATRetries, + reg_write(ohci, OHCI1394_ATRetries, OHCI1394_MAX_AT_REQ_RETRIES | (OHCI1394_MAX_AT_RESP_RETRIES<<4) | (OHCI1394_MAX_PHYS_RESP_RETRIES<<8)); @@ -580,8 +554,8 @@ static void ohci_initialize(struct ti_oh /* Enable interrupts */ reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_unrecoverableError | - OHCI1394_masterIntEnable | - OHCI1394_busReset | + OHCI1394_masterIntEnable | + OHCI1394_busReset | OHCI1394_selfIDComplete | OHCI1394_RSPkt | OHCI1394_RQPkt | @@ -620,13 +594,46 @@ static void ohci_initialize(struct ti_oh if (status & 0x20) set_phy_reg(ohci, 8, status & ~1); } + + /* Serial EEPROM Sanity check. */ + if ((ohci->max_packet_size < 512) || + (ohci->max_packet_size > 4096)) { + /* Serial EEPROM contents are suspect, set a sane max packet + * size and print the raw contents for bug reports if verbose + * debug is enabled. */ +#ifdef CONFIG_IEEE1394_VERBOSEDEBUG + int i; +#endif + + PRINT(KERN_DEBUG, "Serial EEPROM has suspicious values, " + "attempting to setting max_packet_size to 512 bytes"); + reg_write(ohci, OHCI1394_BusOptions, + (reg_read(ohci, OHCI1394_BusOptions) & 0xf007) | 0x8002); + ohci->max_packet_size = 512; +#ifdef CONFIG_IEEE1394_VERBOSEDEBUG + PRINT(KERN_DEBUG, " EEPROM Present: %d", + (reg_read(ohci, OHCI1394_Version) >> 24) & 0x1); + reg_write(ohci, OHCI1394_GUID_ROM, 0x80000000); + + for (i = 0; + ((i < 1000) && + (reg_read(ohci, OHCI1394_GUID_ROM) & 0x80000000)); i++) + udelay(10); + + for (i = 0; i < 0x20; i++) { + reg_write(ohci, OHCI1394_GUID_ROM, 0x02000000); + PRINT(KERN_DEBUG, " EEPROM %02x: %02x", i, + (reg_read(ohci, OHCI1394_GUID_ROM) >> 16) & 0xff); + } +#endif + } } -/* +/* * Insert a packet in the DMA fifo and generate the DMA prg * FIXME: rewrite the program in order to accept packets crossing * page boundaries. - * check also that a single dma descriptor doesn't cross a + * check also that a single dma descriptor doesn't cross a * page boundary. */ static void insert_packet(struct ti_ohci *ohci, @@ -644,13 +651,13 @@ static void insert_packet(struct ti_ohci d->prg_cpu[idx]->begin.branchAddress = 0; if (d->type == DMA_CTX_ASYNC_RESP) { - /* + /* * For response packets, we need to put a timeout value in - * the 16 lower bits of the status... let's try 1 sec timeout - */ + * the 16 lower bits of the status... let's try 1 sec timeout + */ cycleTimer = reg_read(ohci, OHCI1394_IsochronousCycleTimer); d->prg_cpu[idx]->begin.status = cpu_to_le32( - (((((cycleTimer>>25)&0x7)+1)&0x7)<<13) | + (((((cycleTimer>>25)&0x7)+1)&0x7)<<13) | ((cycleTimer&0x01fff000)>>12)); DBGMSG("cycleTimer: %08x timeStamp: %08x", @@ -674,7 +681,7 @@ static void insert_packet(struct ti_ohci } else { /* Sending a normal async request or response */ d->prg_cpu[idx]->data[1] = - (packet->header[1] & 0xFFFF) | + (packet->header[1] & 0xFFFF) | (packet->header[0] & 0xFFFF0000); d->prg_cpu[idx]->data[2] = packet->header[2]; d->prg_cpu[idx]->data[3] = packet->header[3]; @@ -694,10 +701,10 @@ static void insert_packet(struct ti_ohci } d->prg_cpu[idx]->end.control = cpu_to_le32(DMA_CTL_OUTPUT_LAST | - DMA_CTL_IRQ | + DMA_CTL_IRQ | DMA_CTL_BRANCH | packet->data_size); - /* + /* * Check that the packet data buffer * does not cross a page boundary. * @@ -706,7 +713,7 @@ static void insert_packet(struct ti_ohci * problem. */ #if 0 - if (cross_bound((unsigned long)packet->data, + if (cross_bound((unsigned long)packet->data, packet->data_size)>0) { /* FIXME: do something about it */ PRINT(KERN_ERR, @@ -723,28 +730,28 @@ static void insert_packet(struct ti_ohci d->prg_cpu[idx]->end.branchAddress = 0; d->prg_cpu[idx]->end.status = 0; - if (d->branchAddrPtr) + if (d->branchAddrPtr) *(d->branchAddrPtr) = cpu_to_le32(d->prg_bus[idx] | 0x3); d->branchAddrPtr = &(d->prg_cpu[idx]->end.branchAddress); } else { /* quadlet transmit */ if (packet->type == hpsb_raw) - d->prg_cpu[idx]->begin.control = + d->prg_cpu[idx]->begin.control = cpu_to_le32(DMA_CTL_OUTPUT_LAST | DMA_CTL_IMMEDIATE | - DMA_CTL_IRQ | + DMA_CTL_IRQ | DMA_CTL_BRANCH | (packet->header_size + 4)); else d->prg_cpu[idx]->begin.control = cpu_to_le32(DMA_CTL_OUTPUT_LAST | DMA_CTL_IMMEDIATE | - DMA_CTL_IRQ | + DMA_CTL_IRQ | DMA_CTL_BRANCH | packet->header_size); - if (d->branchAddrPtr) + if (d->branchAddrPtr) *(d->branchAddrPtr) = cpu_to_le32(d->prg_bus[idx] | 0x2); d->branchAddrPtr = @@ -756,11 +763,11 @@ static void insert_packet(struct ti_ohci (packet->header[0] & 0xFFFF); d->prg_cpu[idx]->data[1] = packet->header[0] & 0xFFFF0000; packet_swab(d->prg_cpu[idx]->data, packet->tcode); - - d->prg_cpu[idx]->begin.control = - cpu_to_le32(DMA_CTL_OUTPUT_MORE | + + d->prg_cpu[idx]->begin.control = + cpu_to_le32(DMA_CTL_OUTPUT_MORE | DMA_CTL_IMMEDIATE | 0x8); - d->prg_cpu[idx]->end.control = + d->prg_cpu[idx]->end.control = cpu_to_le32(DMA_CTL_OUTPUT_LAST | DMA_CTL_UPDATE | DMA_CTL_IRQ | @@ -790,7 +797,7 @@ static void insert_packet(struct ti_ohci d->prg_cpu[idx]->end.address, d->prg_cpu[idx]->end.branchAddress, d->prg_cpu[idx]->end.status); - if (d->branchAddrPtr) + if (d->branchAddrPtr) *(d->branchAddrPtr) = cpu_to_le32(d->prg_bus[idx] | 0x3); d->branchAddrPtr = &(d->prg_cpu[idx]->end.branchAddress); } @@ -798,7 +805,7 @@ static void insert_packet(struct ti_ohci /* queue the packet in the appropriate context queue */ list_add_tail(&packet->driver_list, &d->fifo_list); - d->prg_ind = (d->prg_ind+1)%d->num_desc; + d->prg_ind = (d->prg_ind + 1) % d->num_desc; } /* @@ -806,46 +813,54 @@ static void insert_packet(struct ti_ohci * and runs or wakes up the DMA prg if necessary. * * The function MUST be called with the d->lock held. - */ -static int dma_trm_flush(struct ti_ohci *ohci, struct dma_trm_ctx *d) + */ +static void dma_trm_flush(struct ti_ohci *ohci, struct dma_trm_ctx *d) { - struct hpsb_packet *p; - int idx,z; + struct hpsb_packet *packet, *ptmp; + int idx = d->prg_ind; + int z = 0; - if (list_empty(&d->pending_list) || d->free_prgs == 0) - return 0; + /* insert the packets into the dma fifo */ + list_for_each_entry_safe(packet, ptmp, &d->pending_list, driver_list) { + if (!d->free_prgs) + break; - p = driver_packet(d->pending_list.next); - idx = d->prg_ind; - z = (p->data_size) ? 3 : 2; + /* For the first packet only */ + if (!z) + z = (packet->data_size) ? 3 : 2; - /* insert the packets into the dma fifo */ - while (d->free_prgs > 0 && !list_empty(&d->pending_list)) { - struct hpsb_packet *p = driver_packet(d->pending_list.next); - list_del(&p->driver_list); - insert_packet(ohci, d, p); + /* Insert the packet */ + list_del_init(&packet->driver_list); + insert_packet(ohci, d, packet); } - if (d->free_prgs == 0) - DBGMSG("Transmit DMA FIFO ctx=%d is full... waiting", d->ctx); + /* Nothing must have been done, either no free_prgs or no packets */ + if (z == 0) + return; - /* Is the context running ? (should be unless it is + /* Is the context running ? (should be unless it is the first packet to be sent in this context) */ if (!(reg_read(ohci, d->ctrlSet) & 0x8000)) { + u32 nodeId = reg_read(ohci, OHCI1394_NodeID); + DBGMSG("Starting transmit DMA ctx=%d",d->ctx); - reg_write(ohci, d->cmdPtr, d->prg_bus[idx]|z); - run_context(ohci, d->ctrlSet, NULL); - } - else { + reg_write(ohci, d->cmdPtr, d->prg_bus[idx] | z); + + /* Check that the node id is valid, and not 63 */ + if (!(nodeId & 0x80000000) || (nodeId & 0x3f) == 63) + PRINT(KERN_ERR, "Running dma failed because Node ID is not valid"); + else + reg_write(ohci, d->ctrlSet, 0x8000); + } else { /* Wake up the dma context if necessary */ - if (!(reg_read(ohci, d->ctrlSet) & 0x400)) { + if (!(reg_read(ohci, d->ctrlSet) & 0x400)) DBGMSG("Waking transmit DMA ctx=%d",d->ctx); - } /* do this always, to avoid race condition */ reg_write(ohci, d->ctrlSet, 0x1000); } - return 1; + + return; } /* Transmission of an async or iso packet */ @@ -871,7 +886,7 @@ static int ohci_transmit(struct hpsb_hos * interrupt context, so we bail out if that is the * case. I don't see anyone sending ISO packets from * interrupt context anyway... */ - + if (ohci->it_legacy_context.ohci == NULL) { if (in_interrupt()) { PRINT(KERN_ERR, @@ -889,11 +904,11 @@ static int ohci_transmit(struct hpsb_hos initialize_dma_trm_ctx(&ohci->it_legacy_context); } - + d = &ohci->it_legacy_context; } else if ((packet->tcode & 0x02) && (packet->tcode != TCODE_ISO_DATA)) d = &ohci->at_resp_context; - else + else d = &ohci->at_req_context; spin_lock_irqsave(&d->lock,flags); @@ -986,7 +1001,7 @@ static int ohci_devctl(struct hpsb_host * enable cycleTimer, cycleMaster */ DBGMSG("Cycle master enabled"); - reg_write(ohci, OHCI1394_LinkControlSet, + reg_write(ohci, OHCI1394_LinkControlSet, OHCI1394_LinkControl_CycleTimerEnable | OHCI1394_LinkControl_CycleMaster); } @@ -1011,7 +1026,7 @@ static int ohci_devctl(struct hpsb_host if (arg<0 || arg>63) { PRINT(KERN_ERR, - "%s: IS0 listen channel %d is out of range", + "%s: IS0 listen channel %d is out of range", __FUNCTION__, arg); return -EFAULT; } @@ -1038,7 +1053,7 @@ static int ohci_devctl(struct hpsb_host if (ohci->ISO_channel_usage & mask) { PRINT(KERN_ERR, - "%s: IS0 listen channel %d is already used", + "%s: IS0 listen channel %d is already used", __FUNCTION__, arg); spin_unlock_irqrestore(&ohci->IR_channel_lock, flags); return -EFAULT; @@ -1047,12 +1062,12 @@ static int ohci_devctl(struct hpsb_host ohci->ISO_channel_usage |= mask; ohci->ir_legacy_channels |= mask; - if (arg>31) - reg_write(ohci, OHCI1394_IRMultiChanMaskHiSet, - 1<<(arg-32)); + if (arg>31) + reg_write(ohci, OHCI1394_IRMultiChanMaskHiSet, + 1<<(arg-32)); else - reg_write(ohci, OHCI1394_IRMultiChanMaskLoSet, - 1<IR_channel_lock, flags); DBGMSG("Listening enabled on channel %d", arg); @@ -1064,32 +1079,32 @@ static int ohci_devctl(struct hpsb_host if (arg<0 || arg>63) { PRINT(KERN_ERR, - "%s: IS0 unlisten channel %d is out of range", + "%s: IS0 unlisten channel %d is out of range", __FUNCTION__, arg); return -EFAULT; } mask = (u64)0x1<IR_channel_lock, flags); if (!(ohci->ISO_channel_usage & mask)) { PRINT(KERN_ERR, - "%s: IS0 unlisten channel %d is not used", + "%s: IS0 unlisten channel %d is not used", __FUNCTION__, arg); spin_unlock_irqrestore(&ohci->IR_channel_lock, flags); return -EFAULT; } - + ohci->ISO_channel_usage &= ~mask; ohci->ir_legacy_channels &= ~mask; - if (arg>31) - reg_write(ohci, OHCI1394_IRMultiChanMaskHiClear, - 1<<(arg-32)); + if (arg>31) + reg_write(ohci, OHCI1394_IRMultiChanMaskHiClear, + 1<<(arg-32)); else - reg_write(ohci, OHCI1394_IRMultiChanMaskLoClear, - 1<IR_channel_lock, flags); DBGMSG("Listening disabled on channel %d", arg); @@ -1215,7 +1230,7 @@ static int ohci_iso_recv_init(struct hps /* iso->irq_interval is in packets - translate that to blocks */ if (iso->irq_interval == 1) - recv->block_irq_interval = 1; + recv->block_irq_interval = 1; else recv->block_irq_interval = iso->irq_interval * ((recv->nblocks+1)/iso->buf_packets); @@ -1241,7 +1256,7 @@ static int ohci_iso_recv_init(struct hps for (recv->buf_stride = 8; recv->buf_stride < max_packet_size; recv->buf_stride *= 2); - + if (recv->buf_stride*iso->buf_packets > iso->buf_size || recv->buf_stride > PAGE_SIZE) { /* this shouldn't happen, but anyway... */ @@ -1285,7 +1300,7 @@ static int ohci_iso_recv_init(struct hps reg_write(recv->ohci, OHCI1394_IRMultiChanMaskHiClear, 0xFFFFFFFF); reg_write(recv->ohci, OHCI1394_IRMultiChanMaskLoClear, 0xFFFFFFFF); } - + /* write the DMA program */ ohci_iso_recv_program(iso); @@ -1293,7 +1308,7 @@ static int ohci_iso_recv_init(struct hps " (%u bytes), using %u blocks, buf_stride %u, block_irq_interval %d", recv->dma_mode == BUFFER_FILL_MODE ? "buffer-fill" : "packet-per-buffer", - iso->buf_size/PAGE_SIZE, iso->buf_size, + iso->buf_size/PAGE_SIZE, iso->buf_size, recv->nblocks, recv->buf_stride, recv->block_irq_interval); return 0; @@ -1309,7 +1324,7 @@ static void ohci_iso_recv_stop(struct hp /* disable interrupts */ reg_write(recv->ohci, OHCI1394_IsoRecvIntMaskClear, 1 << recv->task.context); - + /* halt DMA */ ohci1394_stop_context(recv->ohci, recv->ContextControlClear, NULL); } @@ -1457,20 +1472,20 @@ static int ohci_iso_recv_start(struct hp if (cycle != -1) { u32 seconds; - + /* enable cycleMatch */ reg_write(recv->ohci, recv->ContextControlSet, (1 << 29)); /* set starting cycle */ cycle &= 0x1FFF; - + /* 'cycle' is only mod 8000, but we also need two 'seconds' bits - just snarf them from the current time */ seconds = reg_read(recv->ohci, OHCI1394_IsochronousCycleTimer) >> 25; /* advance one second to give some extra time for DMA to start */ seconds += 1; - + cycle |= (seconds & 3) << 13; contextMatch |= cycle << 12; @@ -1535,7 +1550,7 @@ static void ohci_iso_recv_release_block( next->control |= cpu_to_le32(3 << 20); next->status = cpu_to_le32(recv->buf_stride); - /* link prev to next */ + /* link prev to next */ prev->branchAddress = cpu_to_le32(dma_prog_region_offset_to_bus(&recv->prog, sizeof(struct dma_cmd) * next_i) | 1); /* Z=1 */ @@ -1593,15 +1608,15 @@ static void ohci_iso_recv_bufferfill_par int wake = 0; int runaway = 0; struct ti_ohci *ohci = recv->ohci; - + while (1) { /* we expect the next parsable packet to begin at recv->dma_offset */ /* note: packet layout is as shown in section 10.6.1.1 of the OHCI spec */ - + unsigned int offset; unsigned short len, cycle; unsigned char channel, tag, sy; - + unsigned char *p = iso->data_buf.kvirt; unsigned int this_block = recv->dma_offset/recv->buf_stride; @@ -1619,26 +1634,26 @@ static void ohci_iso_recv_bufferfill_par break; wake = 1; - + /* parse data length, tag, channel, and sy */ - + /* note: we keep our own local copies of 'len' and 'offset' so the user can't mess with them by poking in the mmap area */ - + len = p[recv->dma_offset+2] | (p[recv->dma_offset+3] << 8); if (len > 4096) { PRINT(KERN_ERR, "IR DMA error - bogus 'len' value %u\n", len); } - + channel = p[recv->dma_offset+1] & 0x3F; tag = p[recv->dma_offset+1] >> 6; sy = p[recv->dma_offset+0] & 0xF; /* advance to data payload */ recv->dma_offset += 4; - + /* check for wrap-around */ if (recv->dma_offset >= recv->buf_stride*recv->nblocks) { recv->dma_offset -= recv->buf_stride*recv->nblocks; @@ -1651,7 +1666,7 @@ static void ohci_iso_recv_bufferfill_par recv->dma_offset += len; /* payload is padded to 4 bytes */ - if (len % 4) { + if (len % 4) { recv->dma_offset += 4 - (len%4); } @@ -1700,13 +1715,13 @@ static void ohci_iso_recv_bufferfill_tas /* loop over all blocks */ for (loop = 0; loop < recv->nblocks; loop++) { - + /* check block_dma to see if it's done */ struct dma_cmd *im = &recv->block[recv->block_dma]; - + /* check the DMA descriptor for new writes to xferStatus */ u16 xferstatus = le32_to_cpu(im->status) >> 16; - + /* rescount is the number of bytes *remaining to be written* in the block */ u16 rescount = le32_to_cpu(im->status) & 0xFFFF; @@ -1728,12 +1743,12 @@ static void ohci_iso_recv_bufferfill_tas we can't touch it until it's done */ break; } - + /* OK, the block is finished... */ - + /* sync our view of the block */ dma_region_sync_for_cpu(&iso->data_buf, recv->block_dma*recv->buf_stride, recv->buf_stride); - + /* reset the DMA descriptor */ im->status = recv->buf_stride; @@ -1756,11 +1771,11 @@ static void ohci_iso_recv_packetperbuf_t int count; int wake = 0; struct ti_ohci *ohci = recv->ohci; - + /* loop over the entire buffer */ for (count = 0; count < recv->nblocks; count++) { u32 packet_len = 0; - + /* pointer to the DMA descriptor */ struct dma_cmd *il = ((struct dma_cmd*) recv->prog.kvirt) + iso->pkt_dma; @@ -1774,10 +1789,10 @@ static void ohci_iso_recv_packetperbuf_t /* this packet hasn't come in yet; we are done for now */ goto out; } - + if (event == 0x11) { /* packet received successfully! */ - + /* rescount is the number of bytes *remaining* in the packet buffer, after the packet was written */ packet_len = recv->buf_stride - rescount; @@ -1790,7 +1805,7 @@ static void ohci_iso_recv_packetperbuf_t /* sync our view of the buffer */ dma_region_sync_for_cpu(&iso->data_buf, iso->pkt_dma * recv->buf_stride, recv->buf_stride); - + /* record the per-packet info */ { /* iso header is 8 bytes ahead of the data payload */ @@ -1806,7 +1821,7 @@ static void ohci_iso_recv_packetperbuf_t /* skip iso header */ offset += 8; packet_len -= 8; - + cycle = (hdr[0] | (hdr[1] << 8)) & 0x1FFF; channel = hdr[5] & 0x3F; tag = hdr[5] >> 6; @@ -1814,7 +1829,7 @@ static void ohci_iso_recv_packetperbuf_t hpsb_iso_packet_received(iso, offset, packet_len, cycle, channel, tag, sy); } - + /* reset the DMA descriptor */ il->status = recv->buf_stride; @@ -1958,7 +1973,7 @@ static void ohci_iso_xmit_task(unsigned /* DMA descriptor */ struct iso_xmit_cmd *cmd = dma_region_i(&xmit->prog, struct iso_xmit_cmd, iso->pkt_dma); - + /* check for new writes to xferStatus */ u16 xferstatus = le32_to_cpu(cmd->output_last.status) >> 16; u8 event = xferstatus & 0x1F; @@ -1971,16 +1986,16 @@ static void ohci_iso_xmit_task(unsigned if (event != 0x11) PRINT(KERN_ERR, "IT DMA error - OHCI error code 0x%02x\n", event); - + /* at least one packet went out, so wake up the writer */ wake = 1; - + /* parse cycle */ cycle = le32_to_cpu(cmd->output_last.status) & 0x1FFF; /* tell the subsystem the packet has gone out */ hpsb_iso_packet_sent(iso, cycle, event != 0x11); - + /* reset the DMA descriptor for next time */ cmd->output_last.status = 0; } @@ -2101,14 +2116,14 @@ static int ohci_iso_xmit_start(struct hp /* cycle match */ if (cycle != -1) { u32 start = cycle & 0x1FFF; - + /* 'cycle' is only mod 8000, but we also need two 'seconds' bits - just snarf them from the current time */ u32 seconds = reg_read(xmit->ohci, OHCI1394_IsochronousCycleTimer) >> 25; /* advance one second to give some extra time for DMA to start */ seconds += 1; - + start |= (seconds & 3) << 13; reg_write(xmit->ohci, xmit->ContextControlSet, 0x80000000 | (start << 16)); @@ -2201,6 +2216,7 @@ static void dma_trm_reset(struct dma_trm unsigned long flags; LIST_HEAD(packet_list); struct ti_ohci *ohci = d->ohci; + struct hpsb_packet *packet, *ptmp; ohci1394_stop_context(ohci, d->ctrlClear, NULL); @@ -2221,19 +2237,20 @@ static void dma_trm_reset(struct dma_trm spin_unlock_irqrestore(&d->lock, flags); - /* Now process subsystem callbacks for the packets from the - * context. */ + if (list_empty(&packet_list)) + return; - while (!list_empty(&packet_list)) { - struct hpsb_packet *p = driver_packet(packet_list.next); - PRINT(KERN_INFO, - "AT dma reset ctx=%d, aborting transmission", d->ctx); - list_del(&p->driver_list); - hpsb_packet_sent(ohci->host, p, ACKX_ABORTED); + PRINT(KERN_INFO, "AT dma reset ctx=%d, aborting transmission", d->ctx); + + /* Now process subsystem callbacks for the packets from this + * context. */ + list_for_each_entry_safe(packet, ptmp, &packet_list, driver_list) { + list_del_init(&packet->driver_list); + hpsb_packet_sent(ohci->host, packet, ACKX_ABORTED); } } -static void ohci_schedule_iso_tasklets(struct ti_ohci *ohci, +static void ohci_schedule_iso_tasklets(struct ti_ohci *ohci, quadlet_t rx_event, quadlet_t tx_event) { @@ -2393,7 +2410,8 @@ static irqreturn_t ohci_irq_handler(int ohci1394_stop_context(ohci, d->ctrlClear, "reqTxComplete"); else - tasklet_schedule(&d->task); + dma_trm_tasklet((unsigned long)d); + //tasklet_schedule(&d->task); event &= ~OHCI1394_reqTxComplete; } if (event & OHCI1394_respTxComplete) { @@ -2436,7 +2454,7 @@ static irqreturn_t ohci_irq_handler(int event &= ~OHCI1394_isochRx; } if (event & OHCI1394_isochTx) { - quadlet_t tx_event; + quadlet_t tx_event; tx_event = reg_read(ohci, OHCI1394_IsoXmitIntEventSet); reg_write(ohci, OHCI1394_IsoXmitIntEventClear, tx_event); @@ -2459,7 +2477,7 @@ static irqreturn_t ohci_irq_handler(int isroot = (node_id & 0x40000000) != 0; DBGMSG("SelfID interrupt received " - "(phyid %d, %s)", phyid, + "(phyid %d, %s)", phyid, (isroot ? "root" : "not root")); handle_selfid(ohci, host, phyid, isroot); @@ -2535,10 +2553,10 @@ static void insert_dma_buffer(struct dma #define cond_le32_to_cpu(data, noswap) \ (noswap ? data : le32_to_cpu(data)) -static const int TCODE_SIZE[16] = {20, 0, 16, -1, 16, 20, 20, 0, +static const int TCODE_SIZE[16] = {20, 0, 16, -1, 16, 20, 20, 0, -1, 0, -1, 0, -1, -1, 16, -1}; -/* +/* * Determine the length of a packet in the buffer * Optimization suggested by Pascal Drolet */ @@ -2669,7 +2687,7 @@ static void dma_rcv_tasklet (unsigned lo offset=0; } } - + /* We get one phy packet to the async descriptor for each * bus reset. We always ignore it. */ if (tcode != OHCI1394_TCODE_PHY) { @@ -2687,7 +2705,7 @@ static void dma_rcv_tasklet (unsigned lo ack = (((cond_le32_to_cpu(d->spb[length/4-1], ohci->no_swap_incoming)>>16)&0x1f) == 0x11) ? 1 : 0; - hpsb_packet_received(ohci->host, d->spb, + hpsb_packet_received(ohci->host, d->spb, length-4, ack); } #ifdef OHCI1394_DEBUG @@ -2713,24 +2731,23 @@ static void dma_trm_tasklet (unsigned lo { struct dma_trm_ctx *d = (struct dma_trm_ctx*)data; struct ti_ohci *ohci = (struct ti_ohci*)(d->ohci); - struct hpsb_packet *packet; + struct hpsb_packet *packet, *ptmp; unsigned long flags; u32 status, ack; size_t datasize; spin_lock_irqsave(&d->lock, flags); - while (!list_empty(&d->fifo_list)) { - packet = driver_packet(d->fifo_list.next); + list_for_each_entry_safe(packet, ptmp, &d->fifo_list, driver_list) { datasize = packet->data_size; if (datasize && packet->type != hpsb_raw) status = le32_to_cpu( d->prg_cpu[d->sent_ind]->end.status) >> 16; - else + else status = le32_to_cpu( d->prg_cpu[d->sent_ind]->begin.status) >> 16; - if (status == 0) + if (status == 0) /* this packet hasn't been sent yet*/ break; @@ -2738,34 +2755,34 @@ static void dma_trm_tasklet (unsigned lo if (datasize) if (((le32_to_cpu(d->prg_cpu[d->sent_ind]->data[0])>>4)&0xf) == 0xa) DBGMSG("Stream packet sent to channel %d tcode=0x%X " - "ack=0x%X spd=%d dataLength=%d ctx=%d", + "ack=0x%X spd=%d dataLength=%d ctx=%d", (le32_to_cpu(d->prg_cpu[d->sent_ind]->data[0])>>8)&0x3f, (le32_to_cpu(d->prg_cpu[d->sent_ind]->data[0])>>4)&0xf, - status&0x1f, (status>>5)&0x3, + status&0x1f, (status>>5)&0x3, le32_to_cpu(d->prg_cpu[d->sent_ind]->data[1])>>16, d->ctx); else DBGMSG("Packet sent to node %d tcode=0x%X tLabel=" - "0x%02X ack=0x%X spd=%d dataLength=%d ctx=%d", + "0x%02X ack=0x%X spd=%d dataLength=%d ctx=%d", (le32_to_cpu(d->prg_cpu[d->sent_ind]->data[1])>>16)&0x3f, (le32_to_cpu(d->prg_cpu[d->sent_ind]->data[0])>>4)&0xf, (le32_to_cpu(d->prg_cpu[d->sent_ind]->data[0])>>10)&0x3f, status&0x1f, (status>>5)&0x3, le32_to_cpu(d->prg_cpu[d->sent_ind]->data[3])>>16, d->ctx); - else + else DBGMSG("Packet sent to node %d tcode=0x%X tLabel=" - "0x%02X ack=0x%X spd=%d data=0x%08X ctx=%d", + "0x%02X ack=0x%X spd=%d data=0x%08X ctx=%d", (le32_to_cpu(d->prg_cpu[d->sent_ind]->data[1]) >>16)&0x3f, (le32_to_cpu(d->prg_cpu[d->sent_ind]->data[0]) >>4)&0xf, (le32_to_cpu(d->prg_cpu[d->sent_ind]->data[0]) >>10)&0x3f, - status&0x1f, (status>>5)&0x3, + status&0x1f, (status>>5)&0x3, le32_to_cpu(d->prg_cpu[d->sent_ind]->data[3]), d->ctx); -#endif +#endif if (status & 0x10) { ack = status & 0xf; @@ -2818,11 +2835,11 @@ static void dma_trm_tasklet (unsigned lo } } - list_del(&packet->driver_list); + list_del_init(&packet->driver_list); hpsb_packet_sent(ohci->host, packet, ack); if (datasize) { - pci_unmap_single(ohci->dev, + pci_unmap_single(ohci->dev, cpu_to_le32(d->prg_cpu[d->sent_ind]->end.address), datasize, PCI_DMA_TODEVICE); OHCI_DMA_FREE("single Xmit data packet"); @@ -2867,7 +2884,7 @@ static void free_dma_rcv_ctx(struct dma_ for (i=0; inum_desc; i++) if (d->buf_cpu[i] && d->buf_bus[i]) { pci_free_consistent( - ohci->dev, d->buf_size, + ohci->dev, d->buf_size, d->buf_cpu[i], d->buf_bus[i]); OHCI_DMA_FREE("consistent dma_rcv buf[%d]", i); } @@ -2875,7 +2892,7 @@ static void free_dma_rcv_ctx(struct dma_ kfree(d->buf_bus); } if (d->prg_cpu) { - for (i=0; inum_desc; i++) + for (i=0; inum_desc; i++) if (d->prg_cpu[i] && d->prg_bus[i]) { pci_pool_free(d->prg_pool, d->prg_cpu[i], d->prg_bus[i]); OHCI_DMA_FREE("consistent dma_rcv prg[%d]", i); @@ -2921,7 +2938,7 @@ alloc_dma_rcv_ctx(struct ti_ohci *ohci, memset(d->buf_cpu, 0, d->num_desc * sizeof(quadlet_t*)); memset(d->buf_bus, 0, d->num_desc * sizeof(dma_addr_t)); - d->prg_cpu = kmalloc(d->num_desc * sizeof(struct dma_cmd*), + d->prg_cpu = kmalloc(d->num_desc * sizeof(struct dma_cmd*), GFP_KERNEL); d->prg_bus = kmalloc(d->num_desc * sizeof(dma_addr_t), GFP_KERNEL); @@ -2946,11 +2963,11 @@ alloc_dma_rcv_ctx(struct ti_ohci *ohci, OHCI_DMA_ALLOC("dma_rcv prg pool"); for (i=0; inum_desc; i++) { - d->buf_cpu[i] = pci_alloc_consistent(ohci->dev, + d->buf_cpu[i] = pci_alloc_consistent(ohci->dev, d->buf_size, d->buf_bus+i); OHCI_DMA_ALLOC("consistent dma_rcv buf[%d]", i); - + if (d->buf_cpu[i] != NULL) { memset(d->buf_cpu[i], 0, d->buf_size); } else { @@ -3015,7 +3032,7 @@ static void free_dma_trm_ctx(struct dma_ DBGMSG("Freeing dma_trm_ctx %d", d->ctx); if (d->prg_cpu) { - for (i=0; inum_desc; i++) + for (i=0; inum_desc; i++) if (d->prg_cpu[i] && d->prg_bus[i]) { pci_pool_free(d->prg_pool, d->prg_cpu[i], d->prg_bus[i]); OHCI_DMA_FREE("pool dma_trm prg[%d]", i); @@ -3045,7 +3062,7 @@ alloc_dma_trm_ctx(struct ti_ohci *ohci, d->ctrlClear = 0; d->cmdPtr = 0; - d->prg_cpu = kmalloc(d->num_desc * sizeof(struct at_dma_prg*), + d->prg_cpu = kmalloc(d->num_desc * sizeof(struct at_dma_prg*), GFP_KERNEL); d->prg_bus = kmalloc(d->num_desc * sizeof(dma_addr_t), GFP_KERNEL); @@ -3194,7 +3211,7 @@ static int __devinit ohci1394_pci_probe( * noByteSwapData registers to see if they were not cleared to * zero. Should this work? Obviously it's not defined what these * registers will read when they aren't supported. Bleh! */ - if (dev->vendor == PCI_VENDOR_ID_APPLE && + if (dev->vendor == PCI_VENDOR_ID_APPLE && dev->device == PCI_DEVICE_ID_APPLE_UNI_N_FW) { ohci->no_swap_incoming = 1; ohci->selfid_swap = 0; @@ -3217,7 +3234,7 @@ static int __devinit ohci1394_pci_probe( /* We hardwire the MMIO length, since some CardBus adaptors * fail to report the right length. Anyway, the ohci spec - * clearly says it's 2kb, so this shouldn't be a problem. */ + * clearly says it's 2kb, so this shouldn't be a problem. */ ohci_base = pci_resource_start(dev, 0); if (pci_resource_len(dev, 0) != OHCI1394_REGISTER_SIZE) PRINT(KERN_WARNING, "Unexpected PCI resource length of %lx!", @@ -3248,7 +3265,7 @@ static int __devinit ohci1394_pci_probe( ohci->init_state = OHCI_INIT_HAVE_CONFIG_ROM_BUFFER; /* self-id dma buffer allocation */ - ohci->selfid_buf_cpu = + ohci->selfid_buf_cpu = pci_alloc_consistent(ohci->dev, OHCI1394_SI_DMA_BUF_SIZE, &ohci->selfid_buf_bus); OHCI_DMA_ALLOC("consistent selfid_buf"); @@ -3259,8 +3276,8 @@ static int __devinit ohci1394_pci_probe( if ((unsigned long)ohci->selfid_buf_cpu & 0x1fff) PRINT(KERN_INFO, "SelfID buffer %p is not aligned on " - "8Kb boundary... may cause problems on some CXD3222 chip", - ohci->selfid_buf_cpu); + "8Kb boundary... may cause problems on some CXD3222 chip", + ohci->selfid_buf_cpu); /* No self-id errors at startup */ ohci->self_id_errors = 0; @@ -3423,7 +3440,7 @@ static void ohci1394_pci_remove(struct p free_dma_trm_ctx(&ohci->it_legacy_context); case OHCI_INIT_HAVE_SELFID_BUFFER: - pci_free_consistent(ohci->dev, OHCI1394_SI_DMA_BUF_SIZE, + pci_free_consistent(ohci->dev, OHCI1394_SI_DMA_BUF_SIZE, ohci->selfid_buf_cpu, ohci->selfid_buf_bus); OHCI_DMA_FREE("consistent selfid_buf"); @@ -3544,7 +3561,7 @@ int ohci1394_stop_context(struct ti_ohci /* stop the channel program if it's still running */ reg_write(ohci, reg, 0x8000); - + /* Wait until it effectively stops */ while (reg_read(ohci, reg) & 0x400) { i++; --- linux-2.6.6-rc1/drivers/ieee1394/ohci1394.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/ohci1394.h 2004-04-18 22:25:24.751067744 -0700 @@ -110,7 +110,7 @@ struct dma_rcv_ctx { int ctxtMatch; }; -/* DMA transmit context */ +/* DMA transmit context */ struct dma_trm_ctx { struct ti_ohci *ohci; enum context_type type; @@ -151,7 +151,7 @@ struct ohci1394_iso_tasklet { struct ti_ohci { struct pci_dev *dev; - enum { + enum { OHCI_INIT_ALLOC_HOST, OHCI_INIT_HAVE_MEM_REGION, OHCI_INIT_HAVE_IOMAPPING, @@ -161,17 +161,17 @@ struct ti_ohci { OHCI_INIT_HAVE_IRQ, OHCI_INIT_DONE, } init_state; - + /* remapped memory spaces */ - void *registers; + void *registers; /* dma buffer for self-id packets */ quadlet_t *selfid_buf_cpu; dma_addr_t selfid_buf_bus; /* buffer for csr config rom */ - quadlet_t *csr_config_rom_cpu; - dma_addr_t csr_config_rom_bus; + quadlet_t *csr_config_rom_cpu; + dma_addr_t csr_config_rom_bus; int csr_config_rom_length; unsigned int max_packet_size; @@ -198,7 +198,7 @@ struct ti_ohci { struct dma_rcv_ctx ir_legacy_context; struct ohci1394_iso_tasklet ir_legacy_tasklet; - + /* iso transmit */ int nb_iso_xmit_ctx; unsigned long it_ctx_usage; /* use test_and_set_bit() for atomicity */ @@ -260,7 +260,7 @@ static inline u32 reg_read(const struct /* 2 KiloBytes of register space */ -#define OHCI1394_REGISTER_SIZE 0x800 +#define OHCI1394_REGISTER_SIZE 0x800 /* Offsets relative to context bases defined below */ @@ -440,9 +440,9 @@ static inline u32 reg_read(const struct #define OHCI1394_TCODE_PHY 0xE -void ohci1394_init_iso_tasklet(struct ohci1394_iso_tasklet *tasklet, +void ohci1394_init_iso_tasklet(struct ohci1394_iso_tasklet *tasklet, int type, - void (*func)(unsigned long), + void (*func)(unsigned long), unsigned long data); int ohci1394_register_iso_tasklet(struct ti_ohci *ohci, struct ohci1394_iso_tasklet *tasklet); --- linux-2.6.6-rc1/drivers/ieee1394/pcilynx.c 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/pcilynx.c 2004-04-18 22:25:24.755067136 -0700 @@ -23,7 +23,7 @@ * Contributions: * * Manfred Weihs - * reading bus info block (containing GUID) from serial + * reading bus info block (containing GUID) from serial * eeprom via i2c and storing it in config ROM * Reworked code for initiating bus resets * (long, short, with or without hold-off) @@ -139,7 +139,7 @@ static struct i2c_algo_bit_data bit_data .udelay = 5, .mdelay = 5, .timeout = 100, -}; +}; static struct i2c_adapter bit_ops = { .id = 0xAA, //FIXME: probably we should get an id in i2c-id.h @@ -195,19 +195,19 @@ static void free_pcl(struct ti_lynx *lyn if (lynx->pcl_bmap[off] & 1<pcl_bmap[off] &= ~(1<id, + PRINT(KERN_ERR, lynx->id, "attempted to free unallocated PCL %d", pclid); } spin_unlock(&lynx->lock); } -/* functions useful for debugging */ +/* functions useful for debugging */ static void pretty_print_pcl(const struct ti_pcl *pcl) { int i; printk("PCL next %08x, userdata %08x, status %08x, remtrans %08x, nextbuf %08x\n", - pcl->next, pcl->user_data, pcl->pcl_status, + pcl->next, pcl->user_data, pcl->pcl_status, pcl->remaining_transfer_count, pcl->next_data_buffer); printk("PCL"); @@ -218,7 +218,7 @@ static void pretty_print_pcl(const struc } printk("\n"); } - + static void print_pcl(const struct ti_lynx *lynx, pcl_t pclid) { struct ti_pcl pcl; @@ -419,7 +419,7 @@ static void handle_selfid(struct ti_lynx cpu_to_be32s(&q[i]); i--; } - + if (!lynx->phyic.reg_1394a) { lsid = generate_own_selfid(lynx, host); } @@ -437,7 +437,7 @@ static void handle_selfid(struct ti_lynx while (size > 0) { struct selfid *sid = (struct selfid *)q; - if (!lynx->phyic.reg_1394a && !sid->extended + if (!lynx->phyic.reg_1394a && !sid->extended && (sid->phy_id == (phyid + 1))) { hpsb_selfid_received(host, lsid); } @@ -484,8 +484,7 @@ static void send_next(struct ti_lynx *ly } packet = driver_packet(d->queue.next); - list_del(&packet->driver_list); - list_add_tail(&packet->driver_list, &d->pcl_queue); + list_move_tail(&packet->driver_list, &d->pcl_queue); d->header_dma = pci_map_single(lynx->dev, packet->header, packet->header_size, PCI_DMA_TODEVICE); @@ -500,11 +499,9 @@ static void send_next(struct ti_lynx *ly pcl.next = PCL_NEXT_INVALID; pcl.async_error_next = PCL_NEXT_INVALID; pcl.pcl_status = 0; -#ifdef __BIG_ENDIAN pcl.buffer[0].control = packet->speed_code << 14 | packet->header_size; -#else - pcl.buffer[0].control = packet->speed_code << 14 | packet->header_size - | PCL_BIGENDIAN; +#ifdef __BIG_ENDIAN + pcl.buffer[0].control |= PCL_BIGENDIAN; #endif pcl.buffer[0].pointer = d->header_dma; pcl.buffer[1].control = PCL_LAST_BUFF | packet->data_size; @@ -520,7 +517,7 @@ static void send_next(struct ti_lynx *ly case hpsb_raw: pcl.buffer[0].control |= PCL_CMD_UNFXMT; break; - } + } put_pcl(lynx, d->pcl, &pcl); run_pcl(lynx, d->pcl_start, d->channel); @@ -727,16 +724,16 @@ static int lynx_devctl(struct hpsb_host case GET_CYCLE_COUNTER: retval = reg_read(lynx, CYCLE_TIMER); break; - + case SET_CYCLE_COUNTER: reg_write(lynx, CYCLE_TIMER, arg); break; case SET_BUS_ID: - reg_write(lynx, LINK_ID, + reg_write(lynx, LINK_ID, (arg << 22) | (reg_read(lynx, LINK_ID) & 0x003f0000)); break; - + case ACT_CYCLE_MASTER: if (arg) { reg_set_bits(lynx, LINK_CONTROL, @@ -767,7 +764,7 @@ static int lynx_devctl(struct hpsb_host get_pcl(lynx, lynx->async.pcl, &pcl); packet = driver_packet(lynx->async.pcl_queue.next); - list_del(&packet->driver_list); + list_del_init(&packet->driver_list); pci_unmap_single(lynx->dev, lynx->async.header_dma, packet->header_size, PCI_DMA_TODEVICE); @@ -795,7 +792,7 @@ static int lynx_devctl(struct hpsb_host while (!list_empty(&packet_list)) { packet = driver_packet(packet_list.next); - list_del(&packet->driver_list); + list_del_init(&packet->driver_list); hpsb_packet_sent(host, packet, ACKX_ABORTED); } @@ -803,7 +800,7 @@ static int lynx_devctl(struct hpsb_host case ISO_LISTEN_CHANNEL: spin_lock_irqsave(&lynx->iso_rcv.lock, flags); - + if (lynx->iso_rcv.chan_count++ == 0) { reg_write(lynx, DMA_WORD1_CMP_ENABLE(CHANNEL_ISO_RCV), DMA_WORD1_CMP_ENABLE_MASTER); @@ -875,7 +872,7 @@ static int mem_open(struct inode *inode, int cid = iminor(inode); enum { t_rom, t_aux, t_ram } type; struct memdata *md; - + if (cid < PCILYNX_MINOR_AUX_START) { /* just for completeness */ return -ENXIO; @@ -976,10 +973,10 @@ loff_t mem_llseek(struct file *file, lof return newoffs; } -/* - * do not DMA if count is too small because this will have a serious impact +/* + * do not DMA if count is too small because this will have a serious impact * on performance - the value 2400 was found by experiment and may not work - * everywhere as good as here - use mem_mindma option for modules to change + * everywhere as good as here - use mem_mindma option for modules to change */ static short mem_mindma = 2400; module_param(mem_mindma, short, 0444); @@ -1123,7 +1120,7 @@ static ssize_t mem_read(struct file *fil } -static ssize_t mem_write(struct file *file, const char *buffer, size_t count, +static ssize_t mem_write(struct file *file, const char *buffer, size_t count, loff_t *offset) { struct memdata *md = (struct memdata *)file->private_data; @@ -1292,7 +1289,7 @@ static irqreturn_t lynx_irq_handler(int get_pcl(lynx, lynx->async.pcl, &pcl); packet = driver_packet(lynx->async.pcl_queue.next); - list_del(&packet->driver_list); + list_del_init(&packet->driver_list); pci_unmap_single(lynx->dev, lynx->async.header_dma, packet->header_size, PCI_DMA_TODEVICE); @@ -1338,7 +1335,7 @@ static irqreturn_t lynx_irq_handler(int get_pcl(lynx, lynx->iso_send.pcl, &pcl); packet = driver_packet(lynx->iso_send.pcl_queue.next); - list_del(&packet->driver_list); + list_del_init(&packet->driver_list); pci_unmap_single(lynx->dev, lynx->iso_send.header_dma, packet->header_size, PCI_DMA_TODEVICE); @@ -1375,7 +1372,7 @@ static irqreturn_t lynx_irq_handler(int int stat = reg_read(lynx, DMA_CHAN_STAT(CHANNEL_ASYNC_RCV)); PRINTD(KERN_DEBUG, lynx->id, "received packet size %d", - stat & 0x1fff); + stat & 0x1fff); if (stat & DMA_CHAN_STAT_SELFID) { lynx->selfid_size = stat & 0x1fff; @@ -1417,7 +1414,7 @@ static void iso_rcv_bh(struct ti_lynx *l lynx->iso_rcv.stat[idx]); } - if (lynx->iso_rcv.stat[idx] + if (lynx->iso_rcv.stat[idx] & (DMA_CHAN_STAT_PCIERR | DMA_CHAN_STAT_PKTERR)) { PRINT(KERN_INFO, lynx->id, "iso receive error on %d to 0x%p", idx, data); @@ -1460,7 +1457,7 @@ static void remove_card(struct pci_dev * reg_write(lynx, PCI_INT_ENABLE, 0); free_irq(lynx->dev->irq, lynx); - /* Disable IRM Contender */ + /* Disable IRM Contender and LCtrl */ if (lynx->phyic.reg_1394a) set_phy_reg(lynx, 4, ~0xc0 & get_phy_reg(lynx, 4)); @@ -1558,7 +1555,7 @@ static int __devinit add_card(struct pci if (lynx->pcl_mem != NULL) { lynx->state = have_pcl_mem; - PRINT(KERN_INFO, lynx->id, + PRINT(KERN_INFO, lynx->id, "allocated PCL memory %d Bytes @ 0x%p", LOCALRAM_SIZE, lynx->pcl_mem); } else { @@ -1668,7 +1665,7 @@ static int __devinit add_card(struct pci lynx->async.channel = CHANNEL_ASYNC_SEND; lynx->iso_send.queue_lock = SPIN_LOCK_UNLOCKED; lynx->iso_send.channel = CHANNEL_ISO_SEND; - + PRINT(KERN_INFO, lynx->id, "remapped memory spaces reg 0x%p, rom 0x%p, " "ram 0x%p, aux 0x%p", lynx->registers, lynx->local_rom, lynx->local_ram, lynx->aux_port); @@ -1698,17 +1695,17 @@ static int __devinit add_card(struct pci pcl.next = PCL_NEXT_INVALID; pcl.async_error_next = PCL_NEXT_INVALID; -#ifdef __BIG_ENDIAN + pcl.buffer[0].control = PCL_CMD_RCV | 16; - pcl.buffer[1].control = PCL_LAST_BUFF | 4080; -#else - pcl.buffer[0].control = PCL_CMD_RCV | PCL_BIGENDIAN | 16; - pcl.buffer[1].control = PCL_LAST_BUFF | 4080; +#ifdef __BIG_ENDIAN + pcl.buffer[0].control |= PCL_BIGENDIAN; #endif + pcl.buffer[1].control = PCL_LAST_BUFF | 4080; + pcl.buffer[0].pointer = lynx->rcv_page_dma; pcl.buffer[1].pointer = lynx->rcv_page_dma + 16; put_pcl(lynx, lynx->rcv_pcl, &pcl); - + pcl.next = pcl_bus(lynx, lynx->async.pcl); pcl.async_error_next = pcl_bus(lynx, lynx->async.pcl); put_pcl(lynx, lynx->async.pcl_start, &pcl); @@ -1729,7 +1726,7 @@ static int __devinit add_card(struct pci int page = i / ISORCV_PER_PAGE; int sec = i % ISORCV_PER_PAGE; - pcl.buffer[0].pointer = lynx->iso_rcv.page_dma[page] + pcl.buffer[0].pointer = lynx->iso_rcv.page_dma[page] + sec * MAX_ISORCV_SIZE; pcl.buffer[1].pointer = pcl.buffer[0].pointer + 4; put_pcl(lynx, lynx->iso_rcv.pcl[i], &pcl); @@ -1755,11 +1752,11 @@ static int __devinit add_card(struct pci reg_write(lynx, LINK_INT_ENABLE, LINK_INT_PHY_TIMEOUT | LINK_INT_PHY_REG_RCVD | LINK_INT_PHY_BUSRESET - | LINK_INT_ISO_STUCK | LINK_INT_ASYNC_STUCK + | LINK_INT_ISO_STUCK | LINK_INT_ASYNC_STUCK | LINK_INT_SENT_REJECT | LINK_INT_TX_INVALID_TC | LINK_INT_GRF_OVERFLOW | LINK_INT_ITF_UNDERFLOW | LINK_INT_ATF_UNDERFLOW); - + reg_write(lynx, DMA_WORD0_CMP_VALUE(CHANNEL_ASYNC_RCV), 0); reg_write(lynx, DMA_WORD0_CMP_ENABLE(CHANNEL_ASYNC_RCV), 0xa<<4); reg_write(lynx, DMA_WORD1_CMP_VALUE(CHANNEL_ASYNC_RCV), 0); @@ -1786,14 +1783,14 @@ static int __devinit add_card(struct pci /* attempt to enable contender bit -FIXME- would this work * elsewhere? */ reg_set_bits(lynx, GPIO_CTRL_A, 0x1); - reg_write(lynx, GPIO_DATA_BASE + 0x3c, 0x1); + reg_write(lynx, GPIO_DATA_BASE + 0x3c, 0x1); } else { - /* set the contender bit in the extended PHY register + /* set the contender and LCtrl bit in the extended PHY register * set. (Should check that bis 0,1,2 (=0xE0) is set * in register 2?) */ i = get_phy_reg(lynx, 4); - if (i != -1) set_phy_reg(lynx, 4, i | 0x40); + if (i != -1) set_phy_reg(lynx, 4, i | 0xc0); } @@ -1820,7 +1817,7 @@ static int __devinit add_card(struct pci { /* do i2c stuff */ unsigned char i2c_cmd = 0x10; - struct i2c_msg msg[2] = { { 0x50, 0, 1, &i2c_cmd }, + struct i2c_msg msg[2] = { { 0x50, 0, 1, &i2c_cmd }, { 0x50, I2C_M_RD, 20, (unsigned char*) lynx->bus_info_block } }; --- linux-2.6.6-rc1/drivers/ieee1394/pcilynx.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/pcilynx.h 2004-04-18 22:25:24.757066832 -0700 @@ -47,7 +47,7 @@ struct ti_lynx { enum { clear, have_intr, have_aux_buf, have_pcl_mem, have_1394_buffers, have_iomappings, is_host } state; - + /* remapped memory spaces */ void *registers; void *local_rom; @@ -66,9 +66,9 @@ struct ti_lynx { #endif /* - * use local RAM of LOCALRAM_SIZE bytes for PCLs, which allows for + * use local RAM of LOCALRAM_SIZE bytes for PCLs, which allows for * LOCALRAM_SIZE * 8 PCLs (each sized 128 bytes); - * the following is an allocation bitmap + * the following is an allocation bitmap */ u8 pcl_bmap[LOCALRAM_SIZE / 1024]; @@ -167,7 +167,7 @@ static inline void reg_clear_bits(const #define SERIAL_EEPROM_CONTROL 0x44 #define PCI_INT_STATUS 0x48 -#define PCI_INT_ENABLE 0x4c +#define PCI_INT_ENABLE 0x4c /* status and enable have identical bit numbers */ #define PCI_INT_INT_PEND (1<<31) #define PCI_INT_FORCED_INT (1<<30) @@ -199,7 +199,7 @@ static inline void reg_clear_bits(const #define LBUS_ADDR_SEL_RAM (0x0<<16) #define LBUS_ADDR_SEL_ROM (0x1<<16) #define LBUS_ADDR_SEL_AUX (0x2<<16) -#define LBUS_ADDR_SEL_ZV (0x3<<16) +#define LBUS_ADDR_SEL_ZV (0x3<<16) #define GPIO_CTRL_A 0xb8 #define GPIO_CTRL_B 0xbc @@ -208,14 +208,14 @@ static inline void reg_clear_bits(const #define DMA_BREG(base, chan) (base + chan * 0x20) #define DMA_SREG(base, chan) (base + chan * 0x10) -#define DMA0_PREV_PCL 0x100 +#define DMA0_PREV_PCL 0x100 #define DMA1_PREV_PCL 0x120 #define DMA2_PREV_PCL 0x140 #define DMA3_PREV_PCL 0x160 #define DMA4_PREV_PCL 0x180 #define DMA_PREV_PCL(chan) (DMA_BREG(DMA0_PREV_PCL, chan)) -#define DMA0_CURRENT_PCL 0x104 +#define DMA0_CURRENT_PCL 0x104 #define DMA1_CURRENT_PCL 0x124 #define DMA2_CURRENT_PCL 0x144 #define DMA3_CURRENT_PCL 0x164 @@ -237,14 +237,14 @@ static inline void reg_clear_bits(const #define DMA_CHAN_STAT_SPECIALACK (1<<14) -#define DMA0_CHAN_CTRL 0x110 +#define DMA0_CHAN_CTRL 0x110 #define DMA1_CHAN_CTRL 0x130 #define DMA2_CHAN_CTRL 0x150 #define DMA3_CHAN_CTRL 0x170 #define DMA4_CHAN_CTRL 0x190 #define DMA_CHAN_CTRL(chan) (DMA_BREG(DMA0_CHAN_CTRL, chan)) /* CHAN_CTRL registers share bits */ -#define DMA_CHAN_CTRL_ENABLE (1<<31) +#define DMA_CHAN_CTRL_ENABLE (1<<31) #define DMA_CHAN_CTRL_BUSY (1<<30) #define DMA_CHAN_CTRL_LINK (1<<29) @@ -353,7 +353,7 @@ static inline void reg_clear_bits(const #define LINK_INT_GRF_OVERFLOW (1<<5) #define LINK_INT_ITF_UNDERFLOW (1<<4) #define LINK_INT_ATF_UNDERFLOW (1<<3) -#define LINK_INT_ISOARB_FAILED (1<<0) +#define LINK_INT_ISOARB_FAILED (1<<0) /* PHY specifics */ #define PHY_VENDORID_TI 0x800028 --- linux-2.6.6-rc1/drivers/ieee1394/raw1394.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ieee1394/raw1394.c 2004-04-18 22:25:24.765065616 -0700 @@ -235,10 +235,10 @@ static void remove_host(struct hpsb_host if (hi != NULL) { list_del(&hi->list); host_count--; - /* - FIXME: address ranges should be removed + /* + FIXME: address ranges should be removed and fileinfo states should be initialized - (including setting generation to + (including setting generation to internal-generation ...) */ } @@ -339,7 +339,7 @@ static void iso_receive(struct hpsb_host req->req.misc = 0; req->req.recvb = ptr2int(fi->iso_buffer); req->req.length = min(length, fi->iso_buffer_length); - + list_add_tail(&req->list, &reqs); } } @@ -399,7 +399,7 @@ static void fcp_request(struct hpsb_host req->req.misc = nodeid | (direction << 16); req->req.recvb = ptr2int(fi->fcp_buffer); req->req.length = length; - + list_add_tail(&req->list, &reqs); } } @@ -502,7 +502,7 @@ static int state_initialized(struct file if (khl != NULL) { req->req.misc = host_count; req->data = (quadlet_t *)khl; - + list_for_each_entry(hi, &host_info_list, list) { khl->nodes = hi->host->node_count; strcpy(khl->name, hi->host->driver->name); @@ -536,7 +536,7 @@ static int state_initialized(struct file req->req.error = RAW1394_ERROR_NONE; req->req.generation = get_hpsb_generation(fi->host); - req->req.misc = (fi->host->node_id << 16) + req->req.misc = (fi->host->node_id << 16) | fi->host->node_count; if (fi->protocol_version > 3) { req->req.misc |= NODEID_TO_NODE(fi->host->irm_id) << 8; @@ -635,7 +635,7 @@ static int handle_async_request(struct f req->data = &packet->header[3]; else req->data = packet->data; - + break; case RAW1394_REQ_ASYNC_WRITE: @@ -655,7 +655,7 @@ static int handle_async_request(struct f req->req.length)) req->req.error = RAW1394_ERROR_MEMFAULT; } - + req->req.length = 0; break; @@ -670,7 +670,7 @@ static int handle_async_request(struct f if (copy_from_user(packet->data, int2ptr(req->req.sendb), req->req.length)) req->req.error = RAW1394_ERROR_MEMFAULT; - + req->req.length = 0; break; @@ -807,13 +807,12 @@ static int handle_async_send(struct file int expect_response = req->req.misc >> 16; if ((header_length > req->req.length) || - (header_length < 12)) - { + (header_length < 12)) { req->req.error = RAW1394_ERROR_INVALID_ARG; req->req.length = 0; queue_complete_req(req); return sizeof(struct raw1394_request); - } + } packet = hpsb_alloc_packet(req->req.length-header_length); req->packet = packet; @@ -886,7 +885,7 @@ static int arm_read (struct hpsb_host *h entry = fi->addr_list.next; while (entry != &(fi->addr_list)) { arm_addr = list_entry(entry, struct arm_addr, addr_list); - if (((arm_addr->start) <= (addr)) && + if (((arm_addr->start) <= (addr)) && ((arm_addr->end) >= (addr+length))) { found = 1; break; @@ -914,7 +913,7 @@ static int arm_read (struct hpsb_host *h if (rcode == -1) { if (arm_addr->access_rights & ARM_READ) { if (!(arm_addr->client_transactions & ARM_READ)) { - memcpy(buffer,(arm_addr->addr_space_buffer)+(addr-(arm_addr->start)), + memcpy(buffer,(arm_addr->addr_space_buffer)+(addr-(arm_addr->start)), length); DBGMSG("arm_read -> (rcode_complete)"); rcode = RCODE_COMPLETE; @@ -930,7 +929,7 @@ static int arm_read (struct hpsb_host *h if (!req) { DBGMSG("arm_read -> rcode_conflict_error"); spin_unlock(&host_info_lock); - return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. + return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. The request may be retried */ } if (rcode == RCODE_COMPLETE) { @@ -946,7 +945,7 @@ static int arm_read (struct hpsb_host *h free_pending_request(req); DBGMSG("arm_read -> rcode_conflict_error"); spin_unlock(&host_info_lock); - return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. + return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. The request may be retried */ } req->free_data=1; @@ -958,19 +957,19 @@ static int arm_read (struct hpsb_host *h req->req.recvb = arm_addr->recvb; req->req.length = size; arm_req_resp = (struct arm_request_response *) (req->data); - arm_req = (struct arm_request *) ((byte_t *)(req->data) + + arm_req = (struct arm_request *) ((byte_t *)(req->data) + (sizeof (struct arm_request_response))); - arm_resp = (struct arm_response *) ((byte_t *)(arm_req) + + arm_resp = (struct arm_response *) ((byte_t *)(arm_req) + (sizeof(struct arm_request))); arm_req->buffer = NULL; arm_resp->buffer = NULL; if (rcode == RCODE_COMPLETE) { - arm_resp->buffer = ((byte_t *)(arm_resp) + + arm_resp->buffer = ((byte_t *)(arm_resp) + (sizeof(struct arm_response))); memcpy (arm_resp->buffer, - (arm_addr->addr_space_buffer)+(addr-(arm_addr->start)), + (arm_addr->addr_space_buffer)+(addr-(arm_addr->start)), length); - arm_resp->buffer = int2ptr((arm_addr->recvb) + + arm_resp->buffer = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request) + sizeof (struct arm_response)); @@ -985,9 +984,9 @@ static int arm_read (struct hpsb_host *h arm_req->destination_nodeid = host->node_id; arm_req->tlabel = (flags >> 10) & 0x3f; arm_req->tcode = (flags >> 4) & 0x0f; - arm_req_resp->request = int2ptr((arm_addr->recvb) + + arm_req_resp->request = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response)); - arm_req_resp->response = int2ptr((arm_addr->recvb) + + arm_req_resp->response = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request)); queue_complete_req(req); @@ -1005,7 +1004,7 @@ static int arm_write (struct hpsb_host * struct list_head *entry; struct arm_addr *arm_addr = NULL; struct arm_request *arm_req = NULL; - struct arm_response *arm_resp = NULL; + struct arm_response *arm_resp = NULL; int found=0, size=0, rcode=-1, length_conflict=0; struct arm_request_response *arm_req_resp = NULL; @@ -1020,7 +1019,7 @@ static int arm_write (struct hpsb_host * entry = fi->addr_list.next; while (entry != &(fi->addr_list)) { arm_addr = list_entry(entry, struct arm_addr, addr_list); - if (((arm_addr->start) <= (addr)) && + if (((arm_addr->start) <= (addr)) && ((arm_addr->end) >= (addr+length))) { found = 1; break; @@ -1065,7 +1064,7 @@ static int arm_write (struct hpsb_host * if (!req) { DBGMSG("arm_write -> rcode_conflict_error"); spin_unlock(&host_info_lock); - return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. + return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. The request my be retried */ } size = sizeof(struct arm_request)+sizeof(struct arm_response) + @@ -1076,7 +1075,7 @@ static int arm_write (struct hpsb_host * free_pending_request(req); DBGMSG("arm_write -> rcode_conflict_error"); spin_unlock(&host_info_lock); - return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. + return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. The request may be retried */ } req->free_data=1; @@ -1088,15 +1087,15 @@ static int arm_write (struct hpsb_host * req->req.recvb = arm_addr->recvb; req->req.length = size; arm_req_resp = (struct arm_request_response *) (req->data); - arm_req = (struct arm_request *) ((byte_t *)(req->data) + + arm_req = (struct arm_request *) ((byte_t *)(req->data) + (sizeof (struct arm_request_response))); - arm_resp = (struct arm_response *) ((byte_t *)(arm_req) + + arm_resp = (struct arm_response *) ((byte_t *)(arm_req) + (sizeof(struct arm_request))); - arm_req->buffer = ((byte_t *)(arm_resp) + + arm_req->buffer = ((byte_t *)(arm_resp) + (sizeof(struct arm_response))); arm_resp->buffer = NULL; memcpy (arm_req->buffer, data, length); - arm_req->buffer = int2ptr((arm_addr->recvb) + + arm_req->buffer = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request) + sizeof (struct arm_response)); @@ -1110,9 +1109,9 @@ static int arm_write (struct hpsb_host * arm_req->tcode = (flags >> 4) & 0x0f; arm_resp->buffer_length = 0; arm_resp->response_code = rcode; - arm_req_resp->request = int2ptr((arm_addr->recvb) + + arm_req_resp->request = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response)); - arm_req_resp->response = int2ptr((arm_addr->recvb) + + arm_req_resp->response = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request)); queue_complete_req(req); @@ -1130,7 +1129,7 @@ static int arm_lock (struct hpsb_host *h struct list_head *entry; struct arm_addr *arm_addr = NULL; struct arm_request *arm_req = NULL; - struct arm_response *arm_resp = NULL; + struct arm_response *arm_resp = NULL; int found=0, size=0, rcode=-1; quadlet_t old, new; struct arm_request_response *arm_req_resp = NULL; @@ -1138,12 +1137,12 @@ static int arm_lock (struct hpsb_host *h if (((ext_tcode & 0xFF) == EXTCODE_FETCH_ADD) || ((ext_tcode & 0xFF) == EXTCODE_LITTLE_ADD)) { DBGMSG("arm_lock called by node: %X " - "addr: %4.4x %8.8x extcode: %2.2X data: %8.8X", + "addr: %4.4x %8.8x extcode: %2.2X data: %8.8X", nodeid, (u16) ((addr >>32) & 0xFFFF), (u32) (addr & 0xFFFFFFFF), ext_tcode & 0xFF , be32_to_cpu(data)); } else { DBGMSG("arm_lock called by node: %X " - "addr: %4.4x %8.8x extcode: %2.2X data: %8.8X arg: %8.8X", + "addr: %4.4x %8.8x extcode: %2.2X data: %8.8X arg: %8.8X", nodeid, (u16) ((addr >>32) & 0xFFFF), (u32) (addr & 0xFFFFFFFF), ext_tcode & 0xFF , be32_to_cpu(data), be32_to_cpu(arg)); } @@ -1154,7 +1153,7 @@ static int arm_lock (struct hpsb_host *h entry = fi->addr_list.next; while (entry != &(fi->addr_list)) { arm_addr = list_entry(entry, struct arm_addr, addr_list); - if (((arm_addr->start) <= (addr)) && + if (((arm_addr->start) <= (addr)) && ((arm_addr->end) >= (addr+sizeof(*store)))) { found = 1; break; @@ -1199,7 +1198,7 @@ static int arm_lock (struct hpsb_host *h break; case (EXTCODE_BOUNDED_ADD): if (old != arg) { - new = cpu_to_be32(be32_to_cpu(data) + + new = cpu_to_be32(be32_to_cpu(data) + be32_to_cpu(old)); } else { new = old; @@ -1207,7 +1206,7 @@ static int arm_lock (struct hpsb_host *h break; case (EXTCODE_WRAP_ADD): if (old != arg) { - new = cpu_to_be32(be32_to_cpu(data) + + new = cpu_to_be32(be32_to_cpu(data) + be32_to_cpu(old)); } else { new = data; @@ -1224,7 +1223,7 @@ static int arm_lock (struct hpsb_host *h rcode = RCODE_COMPLETE; memcpy (store, &old, sizeof(*store)); memcpy ((arm_addr->addr_space_buffer)+ - (addr-(arm_addr->start)), + (addr-(arm_addr->start)), &new, sizeof(*store)); } } @@ -1239,31 +1238,31 @@ static int arm_lock (struct hpsb_host *h if (!req) { DBGMSG("arm_lock -> rcode_conflict_error"); spin_unlock(&host_info_lock); - return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. + return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. The request may be retried */ } size = sizeof(struct arm_request)+sizeof(struct arm_response) + - 3 * sizeof(*store) + + 3 * sizeof(*store) + sizeof (struct arm_request_response); /* maximum */ req->data = kmalloc(size, SLAB_ATOMIC); if (!(req->data)) { free_pending_request(req); DBGMSG("arm_lock -> rcode_conflict_error"); spin_unlock(&host_info_lock); - return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. + return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. The request may be retried */ } req->free_data=1; arm_req_resp = (struct arm_request_response *) (req->data); - arm_req = (struct arm_request *) ((byte_t *)(req->data) + + arm_req = (struct arm_request *) ((byte_t *)(req->data) + (sizeof (struct arm_request_response))); - arm_resp = (struct arm_response *) ((byte_t *)(arm_req) + + arm_resp = (struct arm_response *) ((byte_t *)(arm_req) + (sizeof(struct arm_request))); - arm_req->buffer = ((byte_t *)(arm_resp) + + arm_req->buffer = ((byte_t *)(arm_resp) + (sizeof(struct arm_response))); - arm_resp->buffer = ((byte_t *)(arm_req->buffer) + + arm_resp->buffer = ((byte_t *)(arm_req->buffer) + (2* sizeof(*store))); - if ((ext_tcode == EXTCODE_FETCH_ADD) || + if ((ext_tcode == EXTCODE_FETCH_ADD) || (ext_tcode == EXTCODE_LITTLE_ADD)) { arm_req->buffer_length = sizeof(*store); memcpy (arm_req->buffer, &data, sizeof(*store)); @@ -1271,7 +1270,7 @@ static int arm_lock (struct hpsb_host *h } else { arm_req->buffer_length = 2 * sizeof(*store); memcpy (arm_req->buffer, &arg, sizeof(*store)); - memcpy (((arm_req->buffer) + sizeof(*store)), + memcpy (((arm_req->buffer) + sizeof(*store)), &data, sizeof(*store)); } if (rcode == RCODE_COMPLETE) { @@ -1284,7 +1283,7 @@ static int arm_lock (struct hpsb_host *h req->file_info = fi; req->req.type = RAW1394_REQ_ARM; req->req.generation = get_hpsb_generation(host); - req->req.misc = ( (((sizeof(*store)) << 16) & (0xFFFF0000)) | + req->req.misc = ( (((sizeof(*store)) << 16) & (0xFFFF0000)) | (ARM_LOCK & 0xFF)); req->req.tag = arm_addr->arm_tag; req->req.recvb = arm_addr->recvb; @@ -1297,16 +1296,16 @@ static int arm_lock (struct hpsb_host *h arm_req->tlabel = (flags >> 10) & 0x3f; arm_req->tcode = (flags >> 4) & 0x0f; arm_resp->response_code = rcode; - arm_req_resp->request = int2ptr((arm_addr->recvb) + + arm_req_resp->request = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response)); - arm_req_resp->response = int2ptr((arm_addr->recvb) + + arm_req_resp->response = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request)); - arm_req->buffer = int2ptr((arm_addr->recvb) + + arm_req->buffer = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request) + sizeof (struct arm_response)); - arm_resp->buffer = int2ptr((arm_addr->recvb) + + arm_resp->buffer = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request) + sizeof (struct arm_response) + @@ -1336,20 +1335,20 @@ static int arm_lock64 (struct hpsb_host DBGMSG("arm_lock64 called by node: %X " "addr: %4.4x %8.8x extcode: %2.2X data: %8.8X %8.8X ", nodeid, (u16) ((addr >>32) & 0xFFFF), - (u32) (addr & 0xFFFFFFFF), - ext_tcode & 0xFF , - (u32) ((be64_to_cpu(data) >> 32) & 0xFFFFFFFF), + (u32) (addr & 0xFFFFFFFF), + ext_tcode & 0xFF , + (u32) ((be64_to_cpu(data) >> 32) & 0xFFFFFFFF), (u32) (be64_to_cpu(data) & 0xFFFFFFFF)); } else { DBGMSG("arm_lock64 called by node: %X " "addr: %4.4x %8.8x extcode: %2.2X data: %8.8X %8.8X arg: " "%8.8X %8.8X ", nodeid, (u16) ((addr >>32) & 0xFFFF), - (u32) (addr & 0xFFFFFFFF), - ext_tcode & 0xFF , - (u32) ((be64_to_cpu(data) >> 32) & 0xFFFFFFFF), + (u32) (addr & 0xFFFFFFFF), + ext_tcode & 0xFF , + (u32) ((be64_to_cpu(data) >> 32) & 0xFFFFFFFF), (u32) (be64_to_cpu(data) & 0xFFFFFFFF), - (u32) ((be64_to_cpu(arg) >> 32) & 0xFFFFFFFF), + (u32) ((be64_to_cpu(arg) >> 32) & 0xFFFFFFFF), (u32) (be64_to_cpu(arg) & 0xFFFFFFFF)); } spin_lock(&host_info_lock); @@ -1359,7 +1358,7 @@ static int arm_lock64 (struct hpsb_host entry = fi->addr_list.next; while (entry != &(fi->addr_list)) { arm_addr = list_entry(entry, struct arm_addr, addr_list); - if (((arm_addr->start) <= (addr)) && + if (((arm_addr->start) <= (addr)) && ((arm_addr->end) >= (addr+sizeof(*store)))) { found = 1; break; @@ -1404,7 +1403,7 @@ static int arm_lock64 (struct hpsb_host break; case (EXTCODE_BOUNDED_ADD): if (old != arg) { - new = cpu_to_be64(be64_to_cpu(data) + + new = cpu_to_be64(be64_to_cpu(data) + be64_to_cpu(old)); } else { new = old; @@ -1412,7 +1411,7 @@ static int arm_lock64 (struct hpsb_host break; case (EXTCODE_WRAP_ADD): if (old != arg) { - new = cpu_to_be64(be64_to_cpu(data) + + new = cpu_to_be64(be64_to_cpu(data) + be64_to_cpu(old)); } else { new = data; @@ -1429,9 +1428,9 @@ static int arm_lock64 (struct hpsb_host rcode = RCODE_COMPLETE; memcpy (store, &old, sizeof(*store)); memcpy ((arm_addr->addr_space_buffer)+ - (addr-(arm_addr->start)), + (addr-(arm_addr->start)), &new, sizeof(*store)); - } + } } } else { rcode = RCODE_TYPE_ERROR; /* function not allowed */ @@ -1444,7 +1443,7 @@ static int arm_lock64 (struct hpsb_host if (!req) { spin_unlock(&host_info_lock); DBGMSG("arm_lock64 -> rcode_conflict_error"); - return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. + return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. The request may be retried */ } size = sizeof(struct arm_request)+sizeof(struct arm_response) + @@ -1455,20 +1454,20 @@ static int arm_lock64 (struct hpsb_host free_pending_request(req); spin_unlock(&host_info_lock); DBGMSG("arm_lock64 -> rcode_conflict_error"); - return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. + return(RCODE_CONFLICT_ERROR); /* A resource conflict was detected. The request may be retried */ } req->free_data=1; arm_req_resp = (struct arm_request_response *) (req->data); - arm_req = (struct arm_request *) ((byte_t *)(req->data) + + arm_req = (struct arm_request *) ((byte_t *)(req->data) + (sizeof (struct arm_request_response))); - arm_resp = (struct arm_response *) ((byte_t *)(arm_req) + + arm_resp = (struct arm_response *) ((byte_t *)(arm_req) + (sizeof(struct arm_request))); - arm_req->buffer = ((byte_t *)(arm_resp) + + arm_req->buffer = ((byte_t *)(arm_resp) + (sizeof(struct arm_response))); - arm_resp->buffer = ((byte_t *)(arm_req->buffer) + + arm_resp->buffer = ((byte_t *)(arm_req->buffer) + (2* sizeof(*store))); - if ((ext_tcode == EXTCODE_FETCH_ADD) || + if ((ext_tcode == EXTCODE_FETCH_ADD) || (ext_tcode == EXTCODE_LITTLE_ADD)) { arm_req->buffer_length = sizeof(*store); memcpy (arm_req->buffer, &data, sizeof(*store)); @@ -1476,7 +1475,7 @@ static int arm_lock64 (struct hpsb_host } else { arm_req->buffer_length = 2 * sizeof(*store); memcpy (arm_req->buffer, &arg, sizeof(*store)); - memcpy (((arm_req->buffer) + sizeof(*store)), + memcpy (((arm_req->buffer) + sizeof(*store)), &data, sizeof(*store)); } if (rcode == RCODE_COMPLETE) { @@ -1489,7 +1488,7 @@ static int arm_lock64 (struct hpsb_host req->file_info = fi; req->req.type = RAW1394_REQ_ARM; req->req.generation = get_hpsb_generation(host); - req->req.misc = ( (((sizeof(*store)) << 16) & (0xFFFF0000)) | + req->req.misc = ( (((sizeof(*store)) << 16) & (0xFFFF0000)) | (ARM_LOCK & 0xFF)); req->req.tag = arm_addr->arm_tag; req->req.recvb = arm_addr->recvb; @@ -1502,16 +1501,16 @@ static int arm_lock64 (struct hpsb_host arm_req->tlabel = (flags >> 10) & 0x3f; arm_req->tcode = (flags >> 4) & 0x0f; arm_resp->response_code = rcode; - arm_req_resp->request = int2ptr((arm_addr->recvb) + + arm_req_resp->request = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response)); - arm_req_resp->response = int2ptr((arm_addr->recvb) + + arm_req_resp->response = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request)); - arm_req->buffer = int2ptr((arm_addr->recvb) + + arm_req->buffer = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request) + sizeof (struct arm_response)); - arm_resp->buffer = int2ptr((arm_addr->recvb) + + arm_resp->buffer = int2ptr((arm_addr->recvb) + sizeof (struct arm_request_response) + sizeof (struct arm_request) + sizeof (struct arm_response) + @@ -1548,11 +1547,11 @@ static int arm_register(struct file_info return (-EINVAL); } /* addr-list-entry for fileinfo */ - addr = (struct arm_addr *)kmalloc(sizeof(struct arm_addr), SLAB_KERNEL); + addr = (struct arm_addr *)kmalloc(sizeof(struct arm_addr), SLAB_KERNEL); if (!addr) { req->req.length = 0; return (-ENOMEM); - } + } /* allocation of addr_space_buffer */ addr->addr_space_buffer = (u8 *)vmalloc(req->req.length); if (!(addr->addr_space_buffer)) { @@ -1593,7 +1592,7 @@ static int arm_register(struct file_info entry = fi_hlp->addr_list.next; while (entry != &(fi_hlp->addr_list)) { arm_addr = list_entry(entry, struct arm_addr, addr_list); - if ( (arm_addr->start == addr->start) && + if ( (arm_addr->start == addr->start) && (arm_addr->end == addr->end)) { DBGMSG("same host ownes same " "addressrange -> EALREADY"); @@ -1620,7 +1619,7 @@ static int arm_register(struct file_info entry = fi_hlp->addr_list.next; while (entry != &(fi_hlp->addr_list)) { arm_addr = list_entry(entry, struct arm_addr, addr_list); - if ( (arm_addr->start == addr->start) && + if ( (arm_addr->start == addr->start) && (arm_addr->end == addr->end)) { DBGMSG("another host ownes same " "addressrange"); @@ -1662,7 +1661,7 @@ static int arm_register(struct file_info vfree(addr->addr_space_buffer); kfree(addr); spin_unlock_irqrestore(&host_info_lock, flags); - return (-EALREADY); + return (-EALREADY); } spin_unlock_irqrestore(&host_info_lock, flags); free_pending_request(req); /* immediate success or fail */ @@ -1703,16 +1702,16 @@ static int arm_unregister(struct file_in } DBGMSG("arm_Unregister addr found"); another_host = 0; - /* another host with valid address-entry containing + /* another host with valid address-entry containing same addressrange */ list_for_each_entry(hi, &host_info_list, list) { if (hi->host != fi->host) { list_for_each_entry(fi_hlp, &hi->file_info_list, list) { entry = fi_hlp->addr_list.next; while (entry != &(fi_hlp->addr_list)) { - arm_addr = list_entry(entry, + arm_addr = list_entry(entry, struct arm_addr, addr_list); - if (arm_addr->start == + if (arm_addr->start == addr->start) { DBGMSG("another host ownes " "same addressrange"); @@ -1735,7 +1734,7 @@ static int arm_unregister(struct file_in free_pending_request(req); /* immediate success or fail */ spin_unlock_irqrestore(&host_info_lock, flags); return sizeof(struct raw1394_request); - } + } retval = hpsb_unregister_addrspace(&raw1394_highlevel, fi->host, addr->start); if (!retval) { printk(KERN_ERR "raw1394: arm_Unregister failed -> EINVAL\n"); @@ -1863,7 +1862,7 @@ static int reset_notification(struct fil fi->notification=(u8)req->req.misc; free_pending_request(req); /* we have to free the request, because we queue no response, and therefore nobody will free it */ return sizeof(struct raw1394_request); - } + } /* error EINVAL (22) invalid argument */ return (-EINVAL); } @@ -1905,7 +1904,7 @@ static int get_config_rom(struct file_in status = csr1212_read(fi->host->csr.rom, CSR1212_CONFIG_ROM_SPACE_OFFSET, data, req->req.length); - if (copy_to_user(int2ptr(req->req.recvb), data, + if (copy_to_user(int2ptr(req->req.recvb), data, req->req.length)) ret = -EFAULT; if (copy_to_user(int2ptr(req->req.tag), &fi->host->csr.rom->cache_head->len, @@ -1914,7 +1913,7 @@ static int get_config_rom(struct file_in if (copy_to_user(int2ptr(req->req.address), &fi->host->csr.generation, sizeof(fi->host->csr.generation))) ret = -EFAULT; - if (copy_to_user(int2ptr(req->req.sendb), &status, + if (copy_to_user(int2ptr(req->req.sendb), &status, sizeof(status))) ret = -EFAULT; kfree(data); @@ -1929,14 +1928,14 @@ static int update_config_rom(struct file int ret=sizeof(struct raw1394_request); quadlet_t *data = kmalloc(req->req.length, SLAB_KERNEL); if (!data) return -ENOMEM; - if (copy_from_user(data,int2ptr(req->req.sendb), + if (copy_from_user(data,int2ptr(req->req.sendb), req->req.length)) { ret= -EFAULT; } else { - int status = hpsb_update_config_rom(fi->host, - data, req->req.length, + int status = hpsb_update_config_rom(fi->host, + data, req->req.length, (unsigned char) req->req.misc); - if (copy_to_user(int2ptr(req->req.recvb), + if (copy_to_user(int2ptr(req->req.recvb), &status, sizeof(status))) ret = -ENOMEM; } @@ -2033,7 +2032,7 @@ static int modify_config_rom(struct file if (ret == CSR1212_SUCCESS) { ret = hpsb_update_config_rom_image(fi->host); - if (ret >= 0 && copy_to_user(int2ptr(req->req.recvb), + if (ret >= 0 && copy_to_user(int2ptr(req->req.recvb), &dr, sizeof(dr))) { ret = -ENOMEM; } @@ -2044,7 +2043,7 @@ static int modify_config_rom(struct file if (ret >= 0) { /* we have to free the request, because we queue no response, - * and therefore nobody will free it */ + * and therefore nobody will free it */ free_pending_request(req); return sizeof(struct raw1394_request); } else { @@ -2362,7 +2361,7 @@ static int raw1394_iso_recv_packets(stru &fi->iso_handle->infos[packet], sizeof(struct raw1394_iso_packet_info))) return -EFAULT; - + packet = (packet + 1) % fi->iso_handle->buf_packets; } @@ -2534,7 +2533,7 @@ static int raw1394_open(struct inode *in fi = kmalloc(sizeof(struct file_info), SLAB_KERNEL); if (fi == NULL) return -ENOMEM; - + memset(fi, 0, sizeof(struct file_info)); fi->notification = (u8) RAW1394_NOTIFY_ON; /* busreset notification */ @@ -2588,16 +2587,16 @@ static int raw1394_release(struct inode another_host = 0; lh = fi->addr_list.next; addr = list_entry(lh, struct arm_addr, addr_list); - /* another host with valid address-entry containing + /* another host with valid address-entry containing same addressrange? */ list_for_each_entry(hi, &host_info_list, list) { if (hi->host != fi->host) { list_for_each_entry(fi_hlp, &hi->file_info_list, list) { entry = fi_hlp->addr_list.next; while (entry != &(fi_hlp->addr_list)) { - arm_addr = list_entry(entry, + arm_addr = list_entry(entry, struct arm_addr, addr_list); - if (arm_addr->start == + if (arm_addr->start == addr->start) { DBGMSG("raw1394_release: " "another host ownes " @@ -2726,13 +2725,13 @@ static struct hpsb_highlevel raw1394_hig static struct cdev raw1394_cdev; static struct file_operations raw1394_fops = { .owner = THIS_MODULE, - .read = raw1394_read, + .read = raw1394_read, .write = raw1394_write, .mmap = raw1394_mmap, .ioctl = raw1394_ioctl, - .poll = raw1394_poll, - .open = raw1394_open, - .release = raw1394_release, + .poll = raw1394_poll, + .open = raw1394_open, + .release = raw1394_release, }; static int __init init_raw1394(void) @@ -2746,9 +2745,9 @@ static int __init init_raw1394(void) cdev_init(&raw1394_cdev, &raw1394_fops); raw1394_cdev.owner = THIS_MODULE; + kobject_set_name(&raw1394_cdev.kobj, RAW1394_DEVICE_NAME); ret = cdev_add(&raw1394_cdev, IEEE1394_RAW1394_DEV, 1); if (ret) { - /* jmc: leaves reference to (static) raw1394_cdev */ HPSB_ERR("raw1394 failed to register minor device block"); devfs_remove(RAW1394_DEVICE_NAME); hpsb_unregister_highlevel(&raw1394_highlevel); --- linux-2.6.6-rc1/drivers/ieee1394/raw1394-private.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/raw1394-private.h 2004-04-18 22:25:24.757066832 -0700 @@ -33,7 +33,7 @@ struct file_info { spinlock_t reqlists_lock; wait_queue_head_t poll_wait_complete; - struct list_head addr_list; + struct list_head addr_list; u8 *fcp_buffer; --- linux-2.6.6-rc1/drivers/ieee1394/sbp2.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ieee1394/sbp2.c 2004-04-18 22:25:24.771064704 -0700 @@ -78,7 +78,7 @@ #include "sbp2.h" static char version[] __devinitdata = - "$Rev: 1170 $ Ben Collins "; + "$Rev: 1205 $ Ben Collins "; /* * Module load parameter definitions @@ -137,7 +137,7 @@ MODULE_PARM_DESC(exclusive_login, "Exclu * if your sbp2 device is not properly handling the SCSI inquiry command. * This hack makes the inquiry look more like a typical MS Windows * inquiry. - * + * * If force_inquiry_hack=1 is required for your device to work, * please submit the logged sbp2_firmware_revision value of this device to * the linux1394-devel mailing list. @@ -206,7 +206,7 @@ static u32 global_outstanding_dmas = 0; #define SBP2_INFO(fmt, args...) HPSB_INFO("sbp2: "fmt, ## args) #define SBP2_NOTICE(fmt, args...) HPSB_NOTICE("sbp2: "fmt, ## args) #define SBP2_WARN(fmt, args...) HPSB_WARN("sbp2: "fmt, ## args) -#else +#else #define SBP2_DEBUG(fmt, args...) #define SBP2_INFO(fmt, args...) HPSB_INFO("sbp2: "fmt, ## args) #define SBP2_NOTICE(fmt, args...) HPSB_NOTICE("sbp2: "fmt, ## args) @@ -226,7 +226,7 @@ static void sbp2scsi_complete_all_comman static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id, u32 scsi_status, Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *)); - + static Scsi_Host_Template scsi_driver_template; const u8 sbp2_speedto_max_payload[] = { 0x7, 0x8, 0x9, 0xA, 0xB, 0xC }; @@ -409,7 +409,7 @@ static int sbp2util_create_command_orb_p struct sbp2_command_info *command; orbs = serialize_io ? 2 : SBP2_MAX_CMDS; - + spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags); for (i = 0; i < orbs; i++) { command = (struct sbp2_command_info *) @@ -445,7 +445,7 @@ static void sbp2util_remove_command_orb_ struct list_head *lh, *next; struct sbp2_command_info *command; unsigned long flags; - + spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags); if (!list_empty(&scsi_id->sbp2_command_orb_completed)) { list_for_each_safe(lh, next, &scsi_id->sbp2_command_orb_completed) { @@ -468,7 +468,7 @@ static void sbp2util_remove_command_orb_ return; } -/* +/* * This function finds the sbp2_command for a given outstanding command * orb.Only looks at the inuse list. */ @@ -494,7 +494,7 @@ static struct sbp2_command_info *sbp2uti return(NULL); } -/* +/* * This function finds the sbp2_command for a given outstanding SCpnt. * Only looks at the inuse list. */ @@ -520,8 +520,8 @@ static struct sbp2_command_info *sbp2uti * This function allocates a command orb used to send a scsi command. */ static struct sbp2_command_info *sbp2util_allocate_command_orb( - struct scsi_id_instance_data *scsi_id, - Scsi_Cmnd *Current_SCpnt, + struct scsi_id_instance_data *scsi_id, + Scsi_Cmnd *Current_SCpnt, void (*Current_done)(Scsi_Cmnd *)) { struct list_head *lh; @@ -647,8 +647,8 @@ static int sbp2_update(struct unit_direc SBP2_DEBUG("sbp2_update"); if (sbp2_reconnect_device(scsi_id)) { - - /* + + /* * Ok, reconnect has failed. Perhaps we didn't * reconnect fast enough. Try doing a regular login, but * first do a logout just in case of any weirdness. @@ -658,7 +658,7 @@ static int sbp2_update(struct unit_direc if (sbp2_login_device(scsi_id)) { /* Login failed too, just fail, and the backend * will call our sbp2_remove for us */ - SBP2_INFO("sbp2_reconnect_device failed!"); + SBP2_ERR("Failed to reconnect to sbp2 device!"); return -EBUSY; } } @@ -851,7 +851,7 @@ alloc_fail: scsi_id->query_logins_orb_dma); SBP2_DMA_FREE("query logins ORB DMA"); } - + if (scsi_id->logout_orb) { pci_free_consistent(hi->host->pdev, sizeof(struct sbp2_logout_orb), @@ -905,7 +905,6 @@ alloc_fail: * allows someone else to login instead. One second makes sense. */ set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ); - /* * Login to the sbp-2 device @@ -920,12 +919,12 @@ alloc_fail: * Set max retries to something large on the device */ sbp2_set_busy_timeout(scsi_id); - + /* * Do a SBP-2 fetch agent reset */ sbp2_agent_reset(scsi_id, 1); - + /* * Get the max speed and packet size that we can use */ @@ -1157,14 +1156,14 @@ static int sbp2_query_logins(struct scsi max_logins = RESPONSE_GET_MAX_LOGINS(scsi_id->query_logins_response->length_max_logins); SBP2_DEBUG("Maximum concurrent logins supported: %d", max_logins); - + active_logins = RESPONSE_GET_ACTIVE_LOGINS(scsi_id->query_logins_response->length_max_logins); SBP2_DEBUG("Number of active logins: %d", active_logins); - + if (active_logins >= max_logins) { return(-EIO); } - + return 0; } @@ -1172,7 +1171,7 @@ static int sbp2_query_logins(struct scsi * This function is called in order to login to a particular SBP-2 device, * after a bus reset. */ -static int sbp2_login_device(struct scsi_id_instance_data *scsi_id) +static int sbp2_login_device(struct scsi_id_instance_data *scsi_id) { struct sbp2scsi_host_info *hi = scsi_id->hi; quadlet_t data[2]; @@ -1192,7 +1191,7 @@ static int sbp2_login_device(struct scsi } /* Set-up login ORB, assume no password */ - scsi_id->login_orb->password_hi = 0; + scsi_id->login_orb->password_hi = 0; scsi_id->login_orb->password_lo = 0; SBP2_DEBUG("sbp2_login_device: password_hi/lo initialized"); @@ -1216,7 +1215,7 @@ static int sbp2_login_device(struct scsi ORB_SET_LOGIN_RESP_LENGTH(sizeof(struct sbp2_login_response)); SBP2_DEBUG("sbp2_login_device: passwd_resp_lengths initialized"); - scsi_id->login_orb->status_FIFO_lo = SBP2_STATUS_FIFO_ADDRESS_LO + + scsi_id->login_orb->status_FIFO_lo = SBP2_STATUS_FIFO_ADDRESS_LO + SBP2_STATUS_FIFO_ENTRY_TO_OFFSET(scsi_id->ud->id); scsi_id->login_orb->status_FIFO_hi = (ORB_SET_NODE_ID(hi->host->node_id) | SBP2_STATUS_FIFO_ADDRESS_HI); @@ -1229,7 +1228,7 @@ static int sbp2_login_device(struct scsi SBP2_DEBUG("sbp2_login_device: orb byte-swapped"); - sbp2util_packet_dump(scsi_id->login_orb, sizeof(struct sbp2_login_orb), + sbp2util_packet_dump(scsi_id->login_orb, sizeof(struct sbp2_login_orb), "sbp2 login orb", scsi_id->login_orb_dma); /* @@ -1255,7 +1254,7 @@ static int sbp2_login_device(struct scsi SBP2_DEBUG("sbp2_login_device: written"); /* - * Wait for login status (up to 20 seconds)... + * Wait for login status (up to 20 seconds)... */ if (sbp2util_down_timeout(&scsi_id->sbp2_login_complete, 20*HZ)) { SBP2_ERR("Error logging into SBP-2 device - login timed-out"); @@ -1310,10 +1309,11 @@ static int sbp2_login_device(struct scsi * This function is called in order to logout from a particular SBP-2 * device, usually called during driver unload. */ -static int sbp2_logout_device(struct scsi_id_instance_data *scsi_id) +static int sbp2_logout_device(struct scsi_id_instance_data *scsi_id) { struct sbp2scsi_host_info *hi = scsi_id->hi; quadlet_t data[2]; + int error; SBP2_DEBUG("sbp2_logout_device"); @@ -1332,7 +1332,7 @@ static int sbp2_logout_device(struct scs scsi_id->logout_orb->login_ID_misc |= ORB_SET_NOTIFY(1); scsi_id->logout_orb->reserved5 = 0x0; - scsi_id->logout_orb->status_FIFO_lo = SBP2_STATUS_FIFO_ADDRESS_LO + + scsi_id->logout_orb->status_FIFO_lo = SBP2_STATUS_FIFO_ADDRESS_LO + SBP2_STATUS_FIFO_ENTRY_TO_OFFSET(scsi_id->ud->id); scsi_id->logout_orb->status_FIFO_hi = (ORB_SET_NODE_ID(hi->host->node_id) | SBP2_STATUS_FIFO_ADDRESS_HI); @@ -1342,7 +1342,7 @@ static int sbp2_logout_device(struct scs */ sbp2util_cpu_to_be32_buffer(scsi_id->logout_orb, sizeof(struct sbp2_logout_orb)); - sbp2util_packet_dump(scsi_id->logout_orb, sizeof(struct sbp2_logout_orb), + sbp2util_packet_dump(scsi_id->logout_orb, sizeof(struct sbp2_logout_orb), "sbp2 logout orb", scsi_id->logout_orb_dma); /* @@ -1354,10 +1354,15 @@ static int sbp2_logout_device(struct scs atomic_set(&scsi_id->sbp2_login_complete, 0); - hpsb_node_write(scsi_id->ne, scsi_id->sbp2_management_agent_addr, data, 8); + error = hpsb_node_write(scsi_id->ne, + scsi_id->sbp2_management_agent_addr, + data, 8); + if (error) + return error; /* Wait for device to logout...1 second. */ - sbp2util_down_timeout(&scsi_id->sbp2_login_complete, HZ); + if (sbp2util_down_timeout(&scsi_id->sbp2_login_complete, HZ)) + return -EIO; SBP2_INFO("Logged out of SBP-2 device"); @@ -1369,10 +1374,11 @@ static int sbp2_logout_device(struct scs * This function is called in order to reconnect to a particular SBP-2 * device, after a bus reset. */ -static int sbp2_reconnect_device(struct scsi_id_instance_data *scsi_id) +static int sbp2_reconnect_device(struct scsi_id_instance_data *scsi_id) { struct sbp2scsi_host_info *hi = scsi_id->hi; quadlet_t data[2]; + int error; SBP2_DEBUG("sbp2_reconnect_device"); @@ -1392,7 +1398,7 @@ static int sbp2_reconnect_device(struct scsi_id->reconnect_orb->login_ID_misc |= ORB_SET_NOTIFY(1); scsi_id->reconnect_orb->reserved5 = 0x0; - scsi_id->reconnect_orb->status_FIFO_lo = SBP2_STATUS_FIFO_ADDRESS_LO + + scsi_id->reconnect_orb->status_FIFO_lo = SBP2_STATUS_FIFO_ADDRESS_LO + SBP2_STATUS_FIFO_ENTRY_TO_OFFSET(scsi_id->ud->id); scsi_id->reconnect_orb->status_FIFO_hi = (ORB_SET_NODE_ID(hi->host->node_id) | SBP2_STATUS_FIFO_ADDRESS_HI); @@ -1402,7 +1408,7 @@ static int sbp2_reconnect_device(struct */ sbp2util_cpu_to_be32_buffer(scsi_id->reconnect_orb, sizeof(struct sbp2_reconnect_orb)); - sbp2util_packet_dump(scsi_id->reconnect_orb, sizeof(struct sbp2_reconnect_orb), + sbp2util_packet_dump(scsi_id->reconnect_orb, sizeof(struct sbp2_reconnect_orb), "sbp2 reconnect orb", scsi_id->reconnect_orb_dma); /* @@ -1419,7 +1425,11 @@ static int sbp2_reconnect_device(struct atomic_set(&scsi_id->sbp2_login_complete, 0); - hpsb_node_write(scsi_id->ne, scsi_id->sbp2_management_agent_addr, data, 8); + error = hpsb_node_write(scsi_id->ne, + scsi_id->sbp2_management_agent_addr, + data, 8); + if (error) + return error; /* * Wait for reconnect status (up to 1 second)... @@ -1448,7 +1458,7 @@ static int sbp2_reconnect_device(struct return(-EIO); } - SBP2_INFO("Reconnected to SBP-2 device"); + HPSB_DEBUG("Reconnected to SBP-2 device"); return(0); @@ -1456,7 +1466,7 @@ static int sbp2_reconnect_device(struct /* * This function is called in order to set the busy timeout (number of - * retries to attempt) on the sbp2 device. + * retries to attempt) on the sbp2 device. */ static int sbp2_set_busy_timeout(struct scsi_id_instance_data *scsi_id) { @@ -1480,7 +1490,7 @@ static int sbp2_set_busy_timeout(struct /* * This function is called to parse sbp2 device's config rom unit * directory. Used to determine things like sbp2 management agent offset, - * and command set used (SCSI or RBC). + * and command set used (SCSI or RBC). */ static void sbp2_parse_unit_directory(struct scsi_id_instance_data *scsi_id, struct unit_directory *ud) @@ -1638,18 +1648,18 @@ static int sbp2_max_speed_and_size(struc scsi_id->max_payload_size = min(sbp2_speedto_max_payload[scsi_id->speed_code], (u8)(hi->host->csr.max_rec - 1)); - SBP2_ERR("Node " NODE_BUS_FMT ": Max speed [%s] - Max payload [%u]", - NODE_BUS_ARGS(hi->host, scsi_id->ne->nodeid), - hpsb_speedto_str[scsi_id->speed_code], - 1 << ((u32)scsi_id->max_payload_size + 2)); + HPSB_DEBUG("Node " NODE_BUS_FMT ": Max speed [%s] - Max payload [%u]", + NODE_BUS_ARGS(hi->host, scsi_id->ne->nodeid), + hpsb_speedto_str[scsi_id->speed_code], + 1 << ((u32)scsi_id->max_payload_size + 2)); return(0); } /* - * This function is called in order to perform a SBP-2 agent reset. + * This function is called in order to perform a SBP-2 agent reset. */ -static int sbp2_agent_reset(struct scsi_id_instance_data *scsi_id, int wait) +static int sbp2_agent_reset(struct scsi_id_instance_data *scsi_id, int wait) { quadlet_t data; u64 addr; @@ -1690,7 +1700,7 @@ static int sbp2_create_command_orb(struc unchar *scsi_cmd, unsigned int scsi_use_sg, unsigned int scsi_request_bufflen, - void *scsi_request_buffer, + void *scsi_request_buffer, unsigned char scsi_dir) { struct sbp2scsi_host_info *hi = scsi_id->hi; @@ -1734,7 +1744,7 @@ static int sbp2_create_command_orb(struc case SCSI_DATA_UNKNOWN: default: SBP2_ERR("SCSI data transfer direction not specified. " - "Update the SBP2 direction table in sbp2.h if " + "Update the SBP2 direction table in sbp2.h if " "necessary for your application"); print_command (scsi_cmd); orb_direction = sbp2scsi_direction_table[*scsi_cmd]; @@ -1805,12 +1815,12 @@ static int sbp2_create_command_orb(struc while (sg_len) { scatter_gather_element[sg_count].segment_base_lo = sg_addr; if (sg_len > SBP2_MAX_SG_ELEMENT_LENGTH) { - scatter_gather_element[sg_count].length_segment_base_hi = + scatter_gather_element[sg_count].length_segment_base_hi = PAGE_TABLE_SET_SEGMENT_LENGTH(SBP2_MAX_SG_ELEMENT_LENGTH); sg_addr += SBP2_MAX_SG_ELEMENT_LENGTH; sg_len -= SBP2_MAX_SG_ELEMENT_LENGTH; } else { - scatter_gather_element[sg_count].length_segment_base_hi = + scatter_gather_element[sg_count].length_segment_base_hi = PAGE_TABLE_SET_SEGMENT_LENGTH(sg_len); sg_len = 0; } @@ -1821,14 +1831,14 @@ static int sbp2_create_command_orb(struc /* Number of page table (s/g) elements */ command_orb->misc |= ORB_SET_DATA_SIZE(sg_count); - sbp2util_packet_dump(scatter_gather_element, - (sizeof(struct sbp2_unrestricted_page_table)) * sg_count, + sbp2util_packet_dump(scatter_gather_element, + (sizeof(struct sbp2_unrestricted_page_table)) * sg_count, "sbp2 s/g list", command->sge_dma); /* * Byte swap page tables if necessary */ - sbp2util_cpu_to_be32_buffer(scatter_gather_element, + sbp2util_cpu_to_be32_buffer(scatter_gather_element, (sizeof(struct sbp2_unrestricted_page_table)) * sg_count); @@ -1871,7 +1881,7 @@ static int sbp2_create_command_orb(struc /* * Need to turn this into page tables, since the * buffer is too large. - */ + */ command_orb->data_descriptor_hi = ORB_SET_NODE_ID(hi->host->node_id); command_orb->data_descriptor_lo = command->sge_dma; @@ -1889,12 +1899,12 @@ static int sbp2_create_command_orb(struc while (sg_len) { scatter_gather_element[sg_count].segment_base_lo = sg_addr; if (sg_len > SBP2_MAX_SG_ELEMENT_LENGTH) { - scatter_gather_element[sg_count].length_segment_base_hi = + scatter_gather_element[sg_count].length_segment_base_hi = PAGE_TABLE_SET_SEGMENT_LENGTH(SBP2_MAX_SG_ELEMENT_LENGTH); sg_addr += SBP2_MAX_SG_ELEMENT_LENGTH; sg_len -= SBP2_MAX_SG_ELEMENT_LENGTH; } else { - scatter_gather_element[sg_count].length_segment_base_hi = + scatter_gather_element[sg_count].length_segment_base_hi = PAGE_TABLE_SET_SEGMENT_LENGTH(sg_len); sg_len = 0; } @@ -1904,14 +1914,14 @@ static int sbp2_create_command_orb(struc /* Number of page table (s/g) elements */ command_orb->misc |= ORB_SET_DATA_SIZE(sg_count); - sbp2util_packet_dump(scatter_gather_element, - (sizeof(struct sbp2_unrestricted_page_table)) * sg_count, + sbp2util_packet_dump(scatter_gather_element, + (sizeof(struct sbp2_unrestricted_page_table)) * sg_count, "sbp2 s/g list", command->sge_dma); /* * Byte swap page tables if necessary */ - sbp2util_cpu_to_be32_buffer(scatter_gather_element, + sbp2util_cpu_to_be32_buffer(scatter_gather_element, (sizeof(struct sbp2_unrestricted_page_table)) * sg_count); @@ -1932,9 +1942,9 @@ static int sbp2_create_command_orb(struc return(0); } - + /* - * This function is called in order to begin a regular SBP-2 command. + * This function is called in order to begin a regular SBP-2 command. */ static int sbp2_link_orb_command(struct scsi_id_instance_data *scsi_id, struct sbp2_command_info *command) @@ -2019,7 +2029,7 @@ static int sbp2_link_orb_command(struct } /* - * This function is called in order to begin a regular SBP-2 command. + * This function is called in order to begin a regular SBP-2 command. */ static int sbp2_send_command(struct scsi_id_instance_data *scsi_id, Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *)) @@ -2046,8 +2056,8 @@ static int sbp2_send_command(struct scsi /* * The scsi stack sends down a request_bufflen which does not match the - * length field in the scsi cdb. This causes some sbp2 devices to - * reject this inquiry command. Fix the request_bufflen. + * length field in the scsi cdb. This causes some sbp2 devices to + * reject this inquiry command. Fix the request_bufflen. */ if (*cmd == INQUIRY) { if (force_inquiry_hack || scsi_id->workarounds & SBP2_BREAKAGE_INQUIRY_HACK) @@ -2061,14 +2071,14 @@ static int sbp2_send_command(struct scsi */ sbp2_create_command_orb(scsi_id, command, cmd, SCpnt->use_sg, request_bufflen, SCpnt->request_buffer, - SCpnt->sc_data_direction); + SCpnt->sc_data_direction); /* * Update our cdb if necessary (to handle sbp2 RBC command set * differences). This is where the command set hacks go! =) */ sbp2_check_sbp2_command(scsi_id, command->command_orb.cdb); - sbp2util_packet_dump(&command->command_orb, sizeof(struct sbp2_command_orb), + sbp2util_packet_dump(&command->command_orb, sizeof(struct sbp2_command_orb), "sbp2 command orb", command->command_orb_dma); /* @@ -2080,7 +2090,7 @@ static int sbp2_send_command(struct scsi * Link up the orb, and ring the doorbell if needed */ sbp2_link_orb_command(scsi_id, command); - + return(0); } @@ -2097,13 +2107,13 @@ static void sbp2_check_sbp2_command(stru SBP2_DEBUG("sbp2_check_sbp2_command"); switch (*cmd) { - + case READ_6: if (sbp2_command_conversion_device_type(device_type)) { SBP2_DEBUG("Convert READ_6 to READ_10"); - + /* * Need to turn read_6 into read_10 */ @@ -2117,7 +2127,7 @@ static void sbp2_check_sbp2_command(stru new_cmd[7] = 0x0; new_cmd[8] = cmd[4]; new_cmd[9] = cmd[5]; - + memcpy(cmd, new_cmd, 10); } @@ -2129,7 +2139,7 @@ static void sbp2_check_sbp2_command(stru if (sbp2_command_conversion_device_type(device_type)) { SBP2_DEBUG("Convert WRITE_6 to WRITE_10"); - + /* * Need to turn write_6 into write_10 */ @@ -2143,7 +2153,7 @@ static void sbp2_check_sbp2_command(stru new_cmd[7] = 0x0; new_cmd[8] = cmd[4]; new_cmd[9] = cmd[5]; - + memcpy(cmd, new_cmd, 10); } @@ -2169,7 +2179,7 @@ static void sbp2_check_sbp2_command(stru new_cmd[7] = 0x0; new_cmd[8] = cmd[4]; new_cmd[9] = cmd[5]; - + memcpy(cmd, new_cmd, 10); } @@ -2232,7 +2242,7 @@ static void sbp2_check_sbp2_response(str SBP2_DEBUG("sbp2_check_sbp2_response"); switch (SCpnt->cmnd[0]) { - + case INQUIRY: /* @@ -2270,7 +2280,7 @@ static void sbp2_check_sbp2_response(str case MODE_SENSE: if (sbp2_command_conversion_device_type(device_type)) { - + SBP2_DEBUG("Modify mode sense response (10 byte version)"); scsi_buf[0] = scsi_buf[1]; /* Mode data length */ @@ -2278,7 +2288,6 @@ static void sbp2_check_sbp2_response(str scsi_buf[2] = scsi_buf[3]; /* Device specific parameter */ scsi_buf[3] = scsi_buf[7]; /* Block descriptor length */ memcpy(scsi_buf + 4, scsi_buf + 8, scsi_buf[0]); - } break; @@ -2342,7 +2351,7 @@ static int sbp2_handle_status_write(stru } /* - * Put response into scsi_id status fifo... + * Put response into scsi_id status fifo... */ memcpy(&scsi_id->status_block, data, length); @@ -2394,7 +2403,7 @@ static int sbp2_handle_status_write(stru if (STATUS_GET_DEAD_BIT(scsi_id->status_block.ORB_offset_hi_misc)) { /* - * Initiate a fetch agent reset. + * Initiate a fetch agent reset. */ SBP2_DEBUG("Dead bit set - initiating fetch agent reset"); sbp2_agent_reset(scsi_id, 0); @@ -2405,7 +2414,7 @@ static int sbp2_handle_status_write(stru /* * Check here to see if there are no commands in-use. If there are none, we can - * null out last orb so that next time around we write directly to the orb pointer... + * null out last orb so that next time around we write directly to the orb pointer... * Quick start saves one 1394 bus transaction. */ if (list_empty(&scsi_id->sbp2_command_orb_inuse)) { @@ -2413,8 +2422,8 @@ static int sbp2_handle_status_write(stru } } else { - - /* + + /* * It's probably a login/logout/reconnect status. */ if ((scsi_id->login_orb_dma == scsi_id->status_block.ORB_offset_lo) || @@ -2443,10 +2452,10 @@ static int sbp2_handle_status_write(stru **************************************/ /* - * This routine is the main request entry routine for doing I/O. It is + * This routine is the main request entry routine for doing I/O. It is * called from the scsi stack directly. */ -static int sbp2scsi_queuecommand (Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *)) +static int sbp2scsi_queuecommand (Scsi_Cmnd *SCpnt, void (*done)(Scsi_Cmnd *)) { struct scsi_id_instance_data *scsi_id = (struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0]; @@ -2521,7 +2530,7 @@ static int sbp2scsi_queuecommand (Scsi_C * This function is called in order to complete all outstanding SBP-2 * commands (in case of resets, etc.). */ -static void sbp2scsi_complete_all_commands(struct scsi_id_instance_data *scsi_id, +static void sbp2scsi_complete_all_commands(struct scsi_id_instance_data *scsi_id, u32 status) { struct sbp2scsi_host_info *hi = scsi_id->hi; @@ -2581,7 +2590,7 @@ static void sbp2scsi_complete_command(st SBP2_ERR("Bus reset in progress - retry command later"); return; } - + /* * Switch on scsi status */ @@ -2650,7 +2659,7 @@ static void sbp2scsi_complete_command(st * or hot-plug... */ #if 0 - if ((scsi_status == SBP2_SCSI_STATUS_CHECK_CONDITION) && + if ((scsi_status == SBP2_SCSI_STATUS_CHECK_CONDITION) && (SCpnt->sense_buffer[2] == UNIT_ATTENTION)) { SBP2_DEBUG("UNIT ATTENTION - return busy"); SCpnt->result = DID_BUS_BUSY << 16; @@ -2680,7 +2689,7 @@ static int sbp2scsi_slave_configure (str * Called by scsi stack when something has really gone wrong. Usually * called when a command has timed-out for some reason. */ -static int sbp2scsi_abort (Scsi_Cmnd *SCpnt) +static int sbp2scsi_abort (Scsi_Cmnd *SCpnt) { struct scsi_id_instance_data *scsi_id = (struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0]; @@ -2689,7 +2698,7 @@ static int sbp2scsi_abort (Scsi_Cmnd *SC SBP2_ERR("aborting sbp2 command"); print_command (SCpnt->cmnd); - + if (scsi_id) { /* @@ -2716,10 +2725,10 @@ static int sbp2scsi_abort (Scsi_Cmnd *SC } /* - * Initiate a fetch agent reset. + * Initiate a fetch agent reset. */ sbp2_agent_reset(scsi_id, 0); - sbp2scsi_complete_all_commands(scsi_id, DID_BUS_BUSY); + sbp2scsi_complete_all_commands(scsi_id, DID_BUS_BUSY); } return(SUCCESS); @@ -2728,7 +2737,7 @@ static int sbp2scsi_abort (Scsi_Cmnd *SC /* * Called by scsi stack when something has really gone wrong. */ -static int sbp2scsi_reset (Scsi_Cmnd *SCpnt) +static int sbp2scsi_reset (Scsi_Cmnd *SCpnt) { struct scsi_id_instance_data *scsi_id = (struct scsi_id_instance_data *)SCpnt->device->host->hostdata[0]; --- linux-2.6.6-rc1/drivers/ieee1394/sbp2.h 2004-03-10 20:41:27.000000000 -0800 +++ 25/drivers/ieee1394/sbp2.h 2004-04-18 22:25:24.772064552 -0700 @@ -271,7 +271,7 @@ struct sbp2_status_block { #endif /* - * SCSI direction table... + * SCSI direction table... * (now used as a back-up in case the direction passed down from above is "unknown") * * DIN = IN data direction @@ -285,7 +285,7 @@ struct sbp2_status_block { #define DIN ORB_DIRECTION_READ_FROM_MEDIA #define DOU ORB_DIRECTION_WRITE_TO_MEDIA #define DNO ORB_DIRECTION_NO_DATA_TRANSFER -#define DUN DIN +#define DUN DIN static unchar sbp2scsi_direction_table[0x100] = { DNO,DNO,DIN,DIN,DOU,DIN,DIN,DOU,DIN,DUN,DOU,DOU,DUN,DUN,DUN,DIN, @@ -316,8 +316,8 @@ enum cmd_dma_types { CMD_DMA_SINGLE }; -/* - * Encapsulates all the info necessary for an outstanding command. +/* + * Encapsulates all the info necessary for an outstanding command. */ struct sbp2_command_info { @@ -386,12 +386,12 @@ struct scsi_id_instance_data { u32 sbp2_device_type_and_lun; u32 sbp2_firmware_revision; - /* + /* * Variable used for logins, reconnects, logouts, query logins */ atomic_t sbp2_login_complete; - /* + /* * Pool of command orbs, so we can have more than overlapped command per id */ spinlock_t sbp2_command_orb_lock; @@ -433,8 +433,8 @@ static int sbp2util_create_command_orb_p static void sbp2util_remove_command_orb_pool(struct scsi_id_instance_data *scsi_id); static struct sbp2_command_info *sbp2util_find_command_for_orb(struct scsi_id_instance_data *scsi_id, dma_addr_t orb); static struct sbp2_command_info *sbp2util_find_command_for_SCpnt(struct scsi_id_instance_data *scsi_id, void *SCpnt); -static struct sbp2_command_info *sbp2util_allocate_command_orb(struct scsi_id_instance_data *scsi_id, - Scsi_Cmnd *Current_SCpnt, +static struct sbp2_command_info *sbp2util_allocate_command_orb(struct scsi_id_instance_data *scsi_id, + Scsi_Cmnd *Current_SCpnt, void (*Current_done)(Scsi_Cmnd *)); static void sbp2util_mark_command_completed(struct scsi_id_instance_data *scsi_id, struct sbp2_command_info *command); @@ -455,8 +455,8 @@ static int sbp2_handle_physdma_read(stru */ static int sbp2_query_logins(struct scsi_id_instance_data *scsi_id); static int sbp2_login_device(struct scsi_id_instance_data *scsi_id); -static int sbp2_reconnect_device(struct scsi_id_instance_data *scsi_id); -static int sbp2_logout_device(struct scsi_id_instance_data *scsi_id); +static int sbp2_reconnect_device(struct scsi_id_instance_data *scsi_id); +static int sbp2_logout_device(struct scsi_id_instance_data *scsi_id); static int sbp2_handle_status_write(struct hpsb_host *host, int nodeid, int destid, quadlet_t *data, u64 addr, size_t length, u16 flags); static int sbp2_agent_reset(struct scsi_id_instance_data *scsi_id, int wait); @@ -465,7 +465,7 @@ static int sbp2_create_command_orb(struc unchar *scsi_cmd, unsigned int scsi_use_sg, unsigned int scsi_request_bufflen, - void *scsi_request_buffer, + void *scsi_request_buffer, unsigned char scsi_dir); static int sbp2_link_orb_command(struct scsi_id_instance_data *scsi_id, struct sbp2_command_info *command); --- linux-2.6.6-rc1/drivers/ieee1394/video1394.c 2004-04-03 20:39:12.000000000 -0800 +++ 25/drivers/ieee1394/video1394.c 2004-04-18 22:25:24.776063944 -0700 @@ -102,7 +102,7 @@ struct dma_iso_ctx { unsigned int *buffer_status; struct timeval *buffer_time; /* time when the buffer was received */ - unsigned int *last_used_cmd; /* For ISO Transmit with + unsigned int *last_used_cmd; /* For ISO Transmit with variable sized packets only ! */ int ctrlClear; int ctrlSet; @@ -154,7 +154,7 @@ static struct hpsb_highlevel video1394_h static int free_dma_iso_ctx(struct dma_iso_ctx *d) { int i; - + DBGMSG(d->ohci->host->id, "Freeing dma_iso_ctx %d", d->ctx); ohci1394_stop_context(d->ohci, d->ctrlClear, NULL); @@ -260,7 +260,7 @@ alloc_dma_iso_ctx(struct ti_ohci *ohci, d->cmdPtr = OHCI1394_IsoRcvCommandPtr+32*d->ctx; d->ctxMatch = OHCI1394_IsoRcvContextMatch+32*d->ctx; - d->ir_prg = kmalloc(d->num_desc * sizeof(struct dma_cmd *), + d->ir_prg = kmalloc(d->num_desc * sizeof(struct dma_cmd *), GFP_KERNEL); if (d->ir_prg == NULL) { @@ -273,7 +273,7 @@ alloc_dma_iso_ctx(struct ti_ohci *ohci, d->nb_cmd = d->buf_size / PAGE_SIZE + 1; d->left_size = (d->frame_size % PAGE_SIZE) ? d->frame_size % PAGE_SIZE : PAGE_SIZE; - + for (i = 0;i < d->num_desc; i++) { if (dma_prog_region_alloc(&d->prg_reg[i], d->nb_cmd * sizeof(struct dma_cmd), ohci->dev)) { @@ -289,21 +289,21 @@ alloc_dma_iso_ctx(struct ti_ohci *ohci, d->ctrlClear = OHCI1394_IsoXmitContextControlClear+16*d->ctx; d->cmdPtr = OHCI1394_IsoXmitCommandPtr+16*d->ctx; - d->it_prg = kmalloc(d->num_desc * sizeof(struct it_dma_prg *), + d->it_prg = kmalloc(d->num_desc * sizeof(struct it_dma_prg *), GFP_KERNEL); if (d->it_prg == NULL) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Failed to allocate dma it prg"); free_dma_iso_ctx(d); return NULL; } memset(d->it_prg, 0, d->num_desc*sizeof(struct it_dma_prg *)); - + d->packet_size = packet_size; if (PAGE_SIZE % packet_size || packet_size>4096) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Packet size %d (page_size: %ld) " "not yet supported\n", packet_size, PAGE_SIZE); @@ -362,7 +362,7 @@ alloc_dma_iso_ctx(struct ti_ohci *ohci, memset(d->buffer_time, 0, d->num_desc * sizeof(struct timeval)); memset(d->last_used_cmd, 0, d->num_desc * sizeof(unsigned int)); memset(d->next_buffer, -1, d->num_desc * sizeof(int)); - + spin_lock_init(&d->lock); PRINT(KERN_INFO, ohci->host->id, "Iso %s DMA: %d buffers " @@ -412,9 +412,9 @@ static void initialize_dma_ir_prg(struct (unsigned long)d->dma.kvirt)); ir_prg[1].branchAddress = cpu_to_le32((dma_prog_region_offset_to_bus(ir_reg, 2 * sizeof(struct dma_cmd)) & 0xfffffff0) | 0x1); - + for (i = 2; i < d->nb_cmd - 1; i++) { - ir_prg[i].control = cpu_to_le32(DMA_CTL_INPUT_MORE | DMA_CTL_UPDATE | + ir_prg[i].control = cpu_to_le32(DMA_CTL_INPUT_MORE | DMA_CTL_UPDATE | DMA_CTL_BRANCH | PAGE_SIZE); ir_prg[i].address = cpu_to_le32(dma_region_offset_to_bus(&d->dma, (buf+(i-1)*PAGE_SIZE) - @@ -426,21 +426,21 @@ static void initialize_dma_ir_prg(struct } /* The last descriptor will generate an interrupt */ - ir_prg[i].control = cpu_to_le32(DMA_CTL_INPUT_MORE | DMA_CTL_UPDATE | + ir_prg[i].control = cpu_to_le32(DMA_CTL_INPUT_MORE | DMA_CTL_UPDATE | DMA_CTL_IRQ | DMA_CTL_BRANCH | d->left_size); ir_prg[i].address = cpu_to_le32(dma_region_offset_to_bus(&d->dma, (buf+(i-1)*PAGE_SIZE) - (unsigned long)d->dma.kvirt)); - } else { + } else { /* Only one DMA page is used. Read d->left_size immediately and */ /* generate an interrupt as this is also the last page. */ - ir_prg[1].control = cpu_to_le32(DMA_CTL_INPUT_MORE | DMA_CTL_UPDATE | + ir_prg[1].control = cpu_to_le32(DMA_CTL_INPUT_MORE | DMA_CTL_UPDATE | DMA_CTL_IRQ | DMA_CTL_BRANCH | (d->left_size-4)); ir_prg[1].address = cpu_to_le32(dma_region_offset_to_bus(&d->dma, (buf + 4) - (unsigned long)d->dma.kvirt)); } } - + static void initialize_dma_ir_ctx(struct dma_iso_ctx *d, int tag, int flags) { struct ti_ohci *ohci = (struct ti_ohci *)d->ohci; @@ -462,13 +462,13 @@ static void initialize_dma_ir_ctx(struct reg_write(ohci, d->ctrlSet, 0x80000000); /* Set isoch header */ - if (flags & VIDEO1394_INCLUDE_ISO_HEADERS) + if (flags & VIDEO1394_INCLUDE_ISO_HEADERS) reg_write(ohci, d->ctrlSet, 0x40000000); - /* Set the context match register to match on all tags, + /* Set the context match register to match on all tags, sync for sync tag, and listen to d->channel */ reg_write(ohci, d->ctxMatch, 0xf0000000|((tag&0xf)<<8)|d->channel); - + /* Set up isoRecvIntMask to generate interrupts */ reg_write(ohci, OHCI1394_IsoRecvIntMaskSet, 1<ctx); } @@ -524,9 +524,9 @@ static inline void put_timestamp(struct timeStamp = ((cycleTimer & 0x0fff) + d->syt_offset); /* 11059 = 450 us */ timeStamp = (timeStamp % 3072 + ((timeStamp / 3072) << 12) + (cycleTimer & 0xf000)) & 0xffff; - - buf[6] = timeStamp >> 8; - buf[7] = timeStamp & 0xff; + + buf[6] = timeStamp >> 8; + buf[7] = timeStamp & 0xff; /* if first packet is empty packet, then put timestamp into the next full one too */ if ( (le32_to_cpu(d->it_prg[n][0].data[1]) >>16) == 0x008) { @@ -557,7 +557,7 @@ static inline void put_timestamp(struct #if 0 printk("curr: %d, next: %d, cycleTimer: %08x timeStamp: %08x\n", curr, n, cycleTimer, timeStamp); -#endif +#endif } void wakeup_dma_it_ctx(unsigned long l) @@ -569,7 +569,7 @@ void wakeup_dma_it_ctx(unsigned long l) spin_lock(&d->lock); for (i = 0; i < d->num_desc; i++) { - if (d->it_prg[i][d->last_used_cmd[i]].end.status & + if (d->it_prg[i][d->last_used_cmd[i]].end.status & cpu_to_le32(0xFFFF0000)) { int next = d->next_buffer[i]; put_timestamp(ohci, d, next); @@ -592,23 +592,23 @@ static void initialize_dma_it_prg(struct int i; d->last_used_cmd[n] = d->nb_cmd - 1; for (i=0;inb_cmd;i++) { - + it_prg[i].begin.control = cpu_to_le32(DMA_CTL_OUTPUT_MORE | DMA_CTL_IMMEDIATE | 8) ; it_prg[i].begin.address = 0; - + it_prg[i].begin.status = 0; - + it_prg[i].data[0] = cpu_to_le32( - (IEEE1394_SPEED_100 << 16) + (IEEE1394_SPEED_100 << 16) | (/* tag */ 1 << 14) - | (d->channel << 8) + | (d->channel << 8) | (TCODE_ISO_DATA << 4)); if (i==0) it_prg[i].data[0] |= cpu_to_le32(sync_tag); it_prg[i].data[1] = cpu_to_le32(d->packet_size << 16); it_prg[i].data[2] = 0; it_prg[i].data[3] = 0; - + it_prg[i].end.control = cpu_to_le32(DMA_CTL_OUTPUT_LAST | DMA_CTL_BRANCH); it_prg[i].end.address = @@ -617,15 +617,15 @@ static void initialize_dma_it_prg(struct if (inb_cmd-1) { it_prg[i].end.control |= cpu_to_le32(d->packet_size); - it_prg[i].begin.branchAddress = + it_prg[i].begin.branchAddress = cpu_to_le32((dma_prog_region_offset_to_bus(it_reg, (i + 1) * sizeof(struct it_dma_prg)) & 0xfffffff0) | 0x3); - it_prg[i].end.branchAddress = + it_prg[i].end.branchAddress = cpu_to_le32((dma_prog_region_offset_to_bus(it_reg, (i + 1) * sizeof(struct it_dma_prg)) & 0xfffffff0) | 0x3); } else { /* the last prg generates an interrupt */ - it_prg[i].end.control |= cpu_to_le32(DMA_CTL_UPDATE | + it_prg[i].end.control |= cpu_to_le32(DMA_CTL_UPDATE | DMA_CTL_IRQ | d->left_size); /* the last prg doesn't branch */ it_prg[i].begin.branchAddress = 0; @@ -657,7 +657,7 @@ static void initialize_dma_it_prg_var_pa } else { size = packet_sizes[i]; } - it_prg[i].data[1] = cpu_to_le32(size << 16); + it_prg[i].data[1] = cpu_to_le32(size << 16); it_prg[i].end.control = cpu_to_le32(DMA_CTL_OUTPUT_LAST | DMA_CTL_BRANCH); if (i < d->nb_cmd-1 && packet_sizes[i+1] != 0) { @@ -670,7 +670,7 @@ static void initialize_dma_it_prg_var_pa sizeof(struct it_dma_prg)) & 0xfffffff0) | 0x3); } else { /* the last prg generates an interrupt */ - it_prg[i].end.control |= cpu_to_le32(DMA_CTL_UPDATE | + it_prg[i].end.control |= cpu_to_le32(DMA_CTL_UPDATE | DMA_CTL_IRQ | size); /* the last prg doesn't branch */ it_prg[i].begin.branchAddress = 0; @@ -694,7 +694,7 @@ static void initialize_dma_it_ctx(struct for (i=0;inum_desc;i++) initialize_dma_it_prg(d, i, sync_tag); - + /* Set up isoRecvIntMask to generate interrupts */ reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, 1<ctx); } @@ -731,9 +731,9 @@ static int video1394_ioctl(struct inode mask = mask << 1; } } - + if (v.channel<0 || v.channel>(ISO_CHANNELS-1)) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Iso channel %d out of bounds", v.channel); return -EFAULT; } @@ -743,7 +743,7 @@ static int video1394_ioctl(struct inode (u32)(ohci->ISO_channel_usage>>32), (u32)(ohci->ISO_channel_usage&0xffffffff)); if (ohci->ISO_channel_usage & mask) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Channel %d is already taken", v.channel); return -EFAULT; } @@ -762,19 +762,19 @@ static int video1394_ioctl(struct inode } if (v.nb_buffers * v.buf_size > VIDEO1394_MAX_SIZE) { - PRINT(KERN_ERR, ohci->host->id, - "%d buffers of size %d bytes is too big", + PRINT(KERN_ERR, ohci->host->id, + "%d buffers of size %d bytes is too big", v.nb_buffers, v.buf_size); return -EFAULT; } if (cmd == VIDEO1394_IOC_LISTEN_CHANNEL) { d = alloc_dma_iso_ctx(ohci, OHCI_ISO_RECEIVE, - v.nb_buffers, v.buf_size, + v.nb_buffers, v.buf_size, v.channel, 0); if (d == NULL) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Couldn't allocate ir context"); return -EFAULT; } @@ -785,21 +785,21 @@ static int video1394_ioctl(struct inode v.buf_size = d->buf_size; list_add_tail(&d->link, &ctx->context_list); - PRINT(KERN_INFO, ohci->host->id, + PRINT(KERN_INFO, ohci->host->id, "iso context %d listen on channel %d", d->ctx, v.channel); } else { d = alloc_dma_iso_ctx(ohci, OHCI_ISO_TRANSMIT, - v.nb_buffers, v.buf_size, + v.nb_buffers, v.buf_size, v.channel, v.packet_size); if (d == NULL) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Couldn't allocate it context"); return -EFAULT; } - initialize_dma_it_ctx(d, v.sync_tag, + initialize_dma_it_ctx(d, v.sync_tag, v.syt_offset, v.flags); ctx->current_ctx = d; @@ -808,7 +808,7 @@ static int video1394_ioctl(struct inode list_add_tail(&d->link, &ctx->context_list); - PRINT(KERN_INFO, ohci->host->id, + PRINT(KERN_INFO, ohci->host->id, "Iso context %d talk on channel %d", d->ctx, v.channel); } @@ -818,7 +818,7 @@ static int video1394_ioctl(struct inode return 0; } - case VIDEO1394_IOC_UNLISTEN_CHANNEL: + case VIDEO1394_IOC_UNLISTEN_CHANNEL: case VIDEO1394_IOC_UNTALK_CHANNEL: { int channel; @@ -829,13 +829,13 @@ static int video1394_ioctl(struct inode return -EFAULT; if (channel<0 || channel>(ISO_CHANNELS-1)) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Iso channel %d out of bound", channel); return -EFAULT; } mask = (u64)0x1<ISO_channel_usage & mask)) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Channel %d is not being used", channel); return -EFAULT; } @@ -852,7 +852,7 @@ static int video1394_ioctl(struct inode PRINT(KERN_INFO, ohci->host->id, "Iso context %d " "stop talking on channel %d", d->ctx, channel); free_dma_iso_ctx(d); - + return 0; } case VIDEO1394_IOC_LISTEN_QUEUE_BUFFER: @@ -866,20 +866,20 @@ static int video1394_ioctl(struct inode d = find_ctx(&ctx->context_list, OHCI_ISO_RECEIVE, v.channel); if ((v.buffer<0) || (v.buffer>d->num_desc)) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Buffer %d out of range",v.buffer); return -EFAULT; } - + spin_lock_irqsave(&d->lock,flags); if (d->buffer_status[v.buffer]==VIDEO1394_BUFFER_QUEUED) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Buffer %d is already used",v.buffer); spin_unlock_irqrestore(&d->lock,flags); return -EFAULT; } - + d->buffer_status[v.buffer]=VIDEO1394_BUFFER_QUEUED; if (d->last_buffer>=0) @@ -893,7 +893,7 @@ static int video1394_ioctl(struct inode spin_unlock_irqrestore(&d->lock,flags); - if (!(reg_read(ohci, d->ctrlSet) & 0x8000)) + if (!(reg_read(ohci, d->ctrlSet) & 0x8000)) { DBGMSG(ohci->host->id, "Starting iso DMA ctx=%d",d->ctx); @@ -907,13 +907,13 @@ static int video1394_ioctl(struct inode else { /* Wake up dma context if necessary */ if (!(reg_read(ohci, d->ctrlSet) & 0x400)) { - PRINT(KERN_INFO, ohci->host->id, + PRINT(KERN_INFO, ohci->host->id, "Waking up iso dma ctx=%d", d->ctx); reg_write(ohci, d->ctrlSet, 0x1000); } } return 0; - + } case VIDEO1394_IOC_LISTEN_WAIT_BUFFER: case VIDEO1394_IOC_LISTEN_POLL_BUFFER: @@ -928,13 +928,13 @@ static int video1394_ioctl(struct inode d = find_ctx(&ctx->context_list, OHCI_ISO_RECEIVE, v.channel); if ((v.buffer<0) || (v.buffer>d->num_desc)) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Buffer %d out of range",v.buffer); return -EFAULT; } /* - * I change the way it works so that it returns + * I change the way it works so that it returns * the last received frame. */ spin_lock_irqsave(&d->lock, flags); @@ -961,7 +961,7 @@ static int video1394_ioctl(struct inode } } #else - if (wait_event_interruptible(d->waitq, + if (wait_event_interruptible(d->waitq, d->buffer_status[v.buffer] == VIDEO1394_BUFFER_READY) == -ERESTARTSYS) @@ -970,7 +970,7 @@ static int video1394_ioctl(struct inode d->buffer_status[v.buffer]=VIDEO1394_BUFFER_FREE; break; default: - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Buffer %d is not queued",v.buffer); spin_unlock_irqrestore(&d->lock, flags); return -EFAULT; @@ -1011,16 +1011,16 @@ static int video1394_ioctl(struct inode d = find_ctx(&ctx->context_list, OHCI_ISO_TRANSMIT, v.channel); if ((v.buffer<0) || (v.buffer>d->num_desc)) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Buffer %d out of range",v.buffer); return -EFAULT; } - + if (d->flags & VIDEO1394_VARIABLE_PACKET_SIZE) { unsigned int *psizes; int buf_size = d->nb_cmd * sizeof(unsigned int); - if (copy_from_user(&qv, (void *)arg, sizeof(qv))) + if (copy_from_user(&qv, (void *)arg, sizeof(qv))) return -EFAULT; psizes = kmalloc(buf_size, GFP_KERNEL); @@ -1038,14 +1038,14 @@ static int video1394_ioctl(struct inode spin_lock_irqsave(&d->lock,flags); if (d->buffer_status[v.buffer]!=VIDEO1394_BUFFER_FREE) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Buffer %d is already used",v.buffer); spin_unlock_irqrestore(&d->lock,flags); if (qv.packet_sizes) kfree(qv.packet_sizes); return -EFAULT; } - + if (d->flags & VIDEO1394_VARIABLE_PACKET_SIZE) { initialize_dma_it_prg_var_packet_queue( d, v.buffer, qv.packet_sizes, @@ -1056,7 +1056,7 @@ static int video1394_ioctl(struct inode if (d->last_buffer >= 0) { d->it_prg[d->last_buffer] - [ d->last_used_cmd[d->last_buffer] ].end.branchAddress = + [ d->last_used_cmd[d->last_buffer] ].end.branchAddress = cpu_to_le32((dma_prog_region_offset_to_bus(&d->prg_reg[v.buffer], 0) & 0xfffffff0) | 0x3); @@ -1073,7 +1073,7 @@ static int video1394_ioctl(struct inode spin_unlock_irqrestore(&d->lock,flags); - if (!(reg_read(ohci, d->ctrlSet) & 0x8000)) + if (!(reg_read(ohci, d->ctrlSet) & 0x8000)) { DBGMSG(ohci->host->id, "Starting iso transmit DMA ctx=%d", d->ctx); @@ -1089,8 +1089,8 @@ static int video1394_ioctl(struct inode else { /* Wake up dma context if necessary */ if (!(reg_read(ohci, d->ctrlSet) & 0x400)) { - PRINT(KERN_INFO, ohci->host->id, - "Waking up iso transmit dma ctx=%d", + PRINT(KERN_INFO, ohci->host->id, + "Waking up iso transmit dma ctx=%d", d->ctx); put_timestamp(ohci, d, d->last_buffer); reg_write(ohci, d->ctrlSet, 0x1000); @@ -1101,7 +1101,7 @@ static int video1394_ioctl(struct inode kfree(qv.packet_sizes); return 0; - + } case VIDEO1394_IOC_TALK_WAIT_BUFFER: { @@ -1114,7 +1114,7 @@ static int video1394_ioctl(struct inode d = find_ctx(&ctx->context_list, OHCI_ISO_TRANSMIT, v.channel); if ((v.buffer<0) || (v.buffer>d->num_desc)) { - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Buffer %d out of range",v.buffer); return -EFAULT; } @@ -1131,7 +1131,7 @@ static int video1394_ioctl(struct inode if (signal_pending(current)) return -EINTR; } #else - if (wait_event_interruptible(d->waitq, + if (wait_event_interruptible(d->waitq, d->buffer_status[v.buffer] == VIDEO1394_BUFFER_READY) == -ERESTARTSYS) @@ -1140,7 +1140,7 @@ static int video1394_ioctl(struct inode d->buffer_status[v.buffer]=VIDEO1394_BUFFER_FREE; return 0; default: - PRINT(KERN_ERR, ohci->host->id, + PRINT(KERN_ERR, ohci->host->id, "Buffer %d is not queued",v.buffer); return -EFAULT; } @@ -1153,7 +1153,7 @@ static int video1394_ioctl(struct inode /* * This maps the vmalloced and reserved buffer to user space. * - * FIXME: + * FIXME: * - PAGE_READONLY should suffice!? * - remap_page_range is kind of inefficient for page by page remapping. * But e.g. pte_alloc() does not work in modules ... :-( @@ -1211,7 +1211,7 @@ static int video1394_release(struct inod struct dma_iso_ctx *d; d = list_entry(lh, struct dma_iso_ctx, link); mask = (u64) 1 << d->channel; - + if (!(ohci->ISO_channel_usage & mask)) PRINT(KERN_ERR, ohci->host->id, "On release: Channel %d " "is not being used", d->channel); @@ -1226,7 +1226,7 @@ static int video1394_release(struct inod kfree(ctx); file->private_data = NULL; - + unlock_kernel(); return 0; } @@ -1285,7 +1285,7 @@ static void video1394_add_host (struct h hpsb_set_hostinfo(&video1394_highlevel, host, ohci); hpsb_set_hostinfo_key(&video1394_highlevel, host, ohci->host->id); - minor = IEEE1394_MINOR_BLOCK_VIDEO1394 * 16 + ohci->host->id; + minor = IEEE1394_MINOR_BLOCK_VIDEO1394 * 16 + ohci->host->id; devfs_mk_cdev(MKDEV(IEEE1394_MAJOR, minor), S_IFCHR | S_IRUSR | S_IWUSR, "%s/%d", VIDEO1394_DRIVER_NAME, ohci->host->id); @@ -1438,7 +1438,7 @@ static void __exit video1394_exit_module ret |= unregister_ioctl32_conversion(VIDEO1394_IOC32_TALK_WAIT_BUFFER); ret |= unregister_ioctl32_conversion(VIDEO1394_IOC32_LISTEN_POLL_BUFFER); if (ret) - PRINT_G(KERN_INFO, "Error unregistering ioctl32 translations"); + PRINT_G(KERN_CRIT, "Error unregistering ioctl32 translations"); #endif hpsb_unregister_protocol(&video1394_driver); @@ -1457,6 +1457,7 @@ static int __init video1394_init_module cdev_init(&video1394_cdev, &video1394_fops); video1394_cdev.owner = THIS_MODULE; + kobject_set_name(&video1394_cdev.kobj, VIDEO1394_DRIVER_NAME); ret = cdev_add(&video1394_cdev, IEEE1394_VIDEO1394_DEV, 16); if (ret) { PRINT_G(KERN_ERR, "video1394: unable to get minor device block"); --- linux-2.6.6-rc1/drivers/ieee1394/video1394.h 2003-06-14 12:18:25.000000000 -0700 +++ 25/drivers/ieee1394/video1394.h 2004-04-18 22:25:24.776063944 -0700 @@ -42,7 +42,7 @@ struct video1394_mmap { unsigned int sync_tag; unsigned int nb_buffers; unsigned int buf_size; - unsigned int packet_size; /* For VARIABLE_PACKET_SIZE: + unsigned int packet_size; /* For VARIABLE_PACKET_SIZE: Maximum packet size */ unsigned int fps; unsigned int syt_offset; @@ -53,7 +53,7 @@ struct video1394_mmap { struct video1394_queue_variable { unsigned int channel; unsigned int buffer; - unsigned int* packet_sizes; /* Buffer of size: + unsigned int* packet_sizes; /* Buffer of size: buf_size / packet_size */ }; --- linux-2.6.6-rc1/drivers/input/serio/i8042.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/input/serio/i8042.c 2004-04-18 22:25:29.738309568 -0700 @@ -532,8 +532,8 @@ static int __init i8042_check_mux(struct return -1; /* Workaround for broken chips which seem to support MUX, but in reality don't. */ - /* They all report version 12.10 */ - if (mux_version == 0xCA) + /* They all report version 10.12 */ + if (mux_version == 0xAC) return -1; printk(KERN_INFO "i8042.c: Detected active multiplexing controller, rev %d.%d.\n", --- linux-2.6.6-rc1/drivers/input/serio/serio.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/input/serio/serio.c 2004-04-18 22:25:45.174962840 -0700 @@ -195,9 +195,6 @@ irqreturn_t serio_interrupt(struct serio ret = serio->dev->interrupt(serio, data, flags, regs); } else { if (!flags) { - if ((serio->type == SERIO_8042 || - serio->type == SERIO_8042_XL) && (data != 0xaa)) - return ret; serio_rescan(serio); ret = IRQ_HANDLED; } --- linux-2.6.6-rc1/drivers/isdn/capi/capi.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/isdn/capi/capi.c 2004-04-18 22:25:24.777063792 -0700 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,8 @@ MODULE_LICENSE("GPL"); /* -------- driver information -------------------------------------- */ +static struct class_simple *capi_class; + int capi_major = 68; /* allocated */ #ifdef CONFIG_ISDN_CAPI_MIDDLEWARE #define CAPINC_NR_PORTS 32 @@ -1313,7 +1316,8 @@ static int capinc_tty_init(void) drv->owner = THIS_MODULE; drv->driver_name = "capi_nc"; - drv->name = "capi/"; + drv->devfs_name = "capi/"; + drv->name = "capi"; drv->major = capi_ttymajor; drv->minor_start = 0; drv->type = TTY_DRIVER_TYPE_SERIAL; @@ -1483,11 +1487,20 @@ static int __init capi_init(void) return -EIO; } + capi_class = class_simple_create(THIS_MODULE, "capi"); + if (IS_ERR(capi_class)) { + unregister_chrdev(capi_major, "capi20"); + return PTR_ERR(capi_class); + } + + class_simple_device_add(capi_class, MKDEV(capi_major, 0), NULL, "capi"); devfs_mk_cdev(MKDEV(capi_major, 0), S_IFCHR | S_IRUSR | S_IWUSR, "isdn/capi20"); #ifdef CONFIG_ISDN_CAPI_MIDDLEWARE if (capinc_tty_init() < 0) { + class_simple_device_remove(MKDEV(capi_major, 0)); + class_simple_destroy(capi_class); unregister_chrdev(capi_major, "capi20"); return -ENOMEM; } @@ -1514,6 +1527,8 @@ static void __exit capi_exit(void) { proc_exit(); + class_simple_device_remove(MKDEV(capi_major, 0)); + class_simple_destroy(capi_class); unregister_chrdev(capi_major, "capi20"); devfs_remove("isdn/capi20"); --- linux-2.6.6-rc1/drivers/isdn/i4l/isdn_ppp.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/isdn/i4l/isdn_ppp.c 2004-04-18 22:25:24.780063336 -0700 @@ -606,7 +606,7 @@ isdn_ppp_ioctl(int min, struct file *fil if (copy_from_user(&uprog, (void *) arg, sizeof(uprog))) return -EFAULT; - if (uprog.len > 0 && uprog.len < 65536) { + if (uprog.len > 0) { len = uprog.len * sizeof(struct sock_filter); code = kmalloc(len, GFP_KERNEL); if (code == NULL) @@ -1121,7 +1121,12 @@ isdn_ppp_push_higher(isdn_net_dev * net_ * the filter instructions are constructed assuming * a four-byte PPP header on each packet (which is still present) */ skb_push(skb, 4); - skb->data[0] = 0; /* indicate inbound */ + + { + u_int16_t *p = (u_int16_t *) skb->data; + + *p = 0; /* indicate inbound in DLT_LINUX_SLL */ + } if (is->pass_filter.filter && sk_run_filter(skb, is->pass_filter.filter, @@ -1263,8 +1268,13 @@ isdn_ppp_xmit(struct sk_buff *skb, struc * the filter instructions are constructed assuming * a four-byte PPP header on each packet */ skb_push(skb, 4); - skb->data[0] = 1; /* indicate outbound */ - *(u_int16_t *)(skb->data + 2) = htons(proto); + + { + u_int16_t *p = (u_int16_t *) skb->data; + + *p++ = htons(4); /* indicate outbound in DLT_LINUX_SLL */ + *p = htons(proto); + } if (ipt->pass_filter.filter && sk_run_filter(skb, ipt->pass_filter.filter, @@ -1457,8 +1467,13 @@ int isdn_ppp_autodial_filter(struct sk_b * earlier. */ skb_pull(skb, IPPP_MAX_HEADER - 4); - skb->data[0] = 1; /* indicate outbound */ - *(u_int16_t *)(skb->data + 2) = htons(proto); + + { + u_int16_t *p = (u_int16_t *) skb->data; + + *p++ = htons(4); /* indicate outbound in DLT_LINUX_SLL */ + *p = htons(proto); + } drop |= is->pass_filter.filter && sk_run_filter(skb, is->pass_filter.filter, --- linux-2.6.6-rc1/drivers/isdn/i4l/Kconfig 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/isdn/i4l/Kconfig 2004-04-18 22:25:24.778063640 -0700 @@ -31,6 +31,17 @@ config ISDN_MPP by bundling several ISDN-connections, using this protocol. See for more information. +config IPPP_FILTER + bool "Filtering for synchronous PPP" + depends on ISDN_PPP + help + Say Y here if you want to be able to filter the packets passing over + IPPP interfaces. This allows you to control which packets count as + activity (i.e. which packets will reset the idle timer or bring up + a demand-dialled link) and which packets are to be dropped entirely. + You need to say Y here if you wish to use the pass-filter and + active-filter options to ipppd. + config ISDN_PPP_BSDCOMP tristate "Support BSD compression" depends on ISDN_PPP --- linux-2.6.6-rc1/drivers/md/dm.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/md/dm.c 2004-04-18 22:26:00.976560632 -0700 @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,7 @@ struct target_io { */ #define DMF_BLOCK_IO 0 #define DMF_SUSPENDED 1 +#define DMF_FS_LOCKED 2 struct mapped_device { struct rw_semaphore lock; @@ -80,6 +82,11 @@ struct mapped_device { */ uint32_t event_nr; wait_queue_head_t eventq; + + /* + * freeze/thaw support require holding onto a super block + */ + struct super_block *frozen_sb; }; #define MIN_IOS 256 @@ -294,6 +301,9 @@ static int clone_endio(struct bio *bio, if (bio->bi_size) return 1; + if (!bio_flagged(bio, BIO_UPTODATE) && !error) + error = -EIO; + if (endio) { r = endio(tio->ti, bio, error, &tio->info); if (r < 0) @@ -745,7 +755,7 @@ static void event_callback(void *context down_write(&md->lock); md->event_nr++; - wake_up_interruptible(&md->eventq); + wake_up(&md->eventq); up_write(&md->lock); } @@ -882,6 +892,52 @@ int dm_swap_table(struct mapped_device * } /* + * Functions to lock and unlock any filesystem running on the + * device. + */ +static int __lock_fs(struct mapped_device *md) +{ + struct block_device *bdev; + + if (test_and_set_bit(DMF_FS_LOCKED, &md->flags)) + return 0; + + bdev = bdget_disk(md->disk, 0); + if (!bdev) { + DMWARN("bdget failed in __lock_fs"); + return -ENOMEM; + } + + WARN_ON(md->frozen_sb); + md->frozen_sb = freeze_bdev(bdev); + /* don't bdput right now, we don't want the bdev + * to go away while it is locked. We'll bdput + * in __unlock_fs + */ + return 0; +} + +static int __unlock_fs(struct mapped_device *md) +{ + struct block_device *bdev; + + if (!test_and_clear_bit(DMF_FS_LOCKED, &md->flags)) + return 0; + + bdev = bdget_disk(md->disk, 0); + if (!bdev) { + DMWARN("bdget failed in __unlock_fs"); + return -ENOMEM; + } + + thaw_bdev(bdev, md->frozen_sb); + md->frozen_sb = NULL; + bdput(bdev); + bdput(bdev); + return 0; +} + +/* * We need to be able to change a mapping table under a mounted * filesystem. For example we might want to move some data in * the background. Before the table can be swapped with @@ -893,13 +949,27 @@ int dm_suspend(struct mapped_device *md) struct dm_table *map; DECLARE_WAITQUEUE(wait, current); - down_write(&md->lock); + /* Flush I/O to the device. */ + down_read(&md->lock); + if (test_bit(DMF_BLOCK_IO, &md->flags)) { + up_read(&md->lock); + return -EINVAL; + } + + __lock_fs(md); + up_read(&md->lock); /* * First we set the BLOCK_IO flag so no more ios will be * mapped. */ + down_write(&md->lock); if (test_bit(DMF_BLOCK_IO, &md->flags)) { + /* + * If we get here we know another thread is + * trying to suspend as well, so we leave the fs + * locked for this thread. + */ up_write(&md->lock); return -EINVAL; } @@ -922,7 +992,7 @@ int dm_suspend(struct mapped_device *md) while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!atomic_read(&md->pending)) + if (!atomic_read(&md->pending) || signal_pending(current)) break; io_schedule(); @@ -931,6 +1001,15 @@ int dm_suspend(struct mapped_device *md) down_write(&md->lock); remove_wait_queue(&md->wait, &wait); + + /* were we interrupted ? */ + if (atomic_read(&md->pending)) { + __unlock_fs(md); + clear_bit(DMF_BLOCK_IO, &md->flags); + up_write(&md->lock); + return -EINTR; + } + set_bit(DMF_SUSPENDED, &md->flags); map = dm_get_table(md); @@ -963,6 +1042,7 @@ int dm_resume(struct mapped_device *md) def = bio_list_get(&md->deferred); __flush_deferred_io(md, def); up_write(&md->lock); + __unlock_fs(md); dm_table_unplug_all(map); dm_table_put(map); --- linux-2.6.6-rc1/drivers/md/dm-ioctl.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/md/dm-ioctl.c 2004-04-18 22:25:24.781063184 -0700 @@ -800,7 +800,7 @@ static void retrieve_status(struct dm_ta struct dm_target *ti = dm_table_get_target(table, i); remaining = len - (outptr - outbuf); - if (remaining < sizeof(struct dm_target_spec)) { + if (remaining <= sizeof(struct dm_target_spec)) { param->flags |= DM_BUFFER_FULL_FLAG; break; } @@ -815,6 +815,10 @@ static void retrieve_status(struct dm_ta outptr += sizeof(struct dm_target_spec); remaining = len - (outptr - outbuf); + if (remaining <= 0) { + param->flags |= DM_BUFFER_FULL_FLAG; + break; + } /* Get the status/table string from the target driver */ if (ti->type->status) { @@ -828,7 +832,7 @@ static void retrieve_status(struct dm_ta outptr += strlen(outptr) + 1; used = param->data_start + (outptr - outbuf); - align_ptr(outptr); + outptr = align_ptr(outptr); spec->next = outptr - outbuf; } --- linux-2.6.6-rc1/drivers/md/dm-stripe.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/md/dm-stripe.c 2004-04-18 22:25:24.781063184 -0700 @@ -187,24 +187,24 @@ static int stripe_status(struct dm_targe status_type_t type, char *result, unsigned int maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; - int offset; + unsigned int sz = 0; unsigned int i; char buffer[32]; +#define EMIT(x...) sz += ((sz >= maxlen) ? \ + 0 : scnprintf(result + sz, maxlen - sz, x)) + switch (type) { case STATUSTYPE_INFO: result[0] = '\0'; break; case STATUSTYPE_TABLE: - offset = scnprintf(result, maxlen, "%d " SECTOR_FORMAT, - sc->stripes, sc->chunk_mask + 1); + EMIT("%d " SECTOR_FORMAT, sc->stripes, sc->chunk_mask + 1); for (i = 0; i < sc->stripes; i++) { format_dev_t(buffer, sc->stripe[i].dev->bdev->bd_dev); - offset += - scnprintf(result + offset, maxlen - offset, - " %s " SECTOR_FORMAT, buffer, - sc->stripe[i].physical_start); + EMIT(" %s " SECTOR_FORMAT, buffer, + sc->stripe[i].physical_start); } break; } --- linux-2.6.6-rc1/drivers/md/dm-table.c 2004-04-14 23:14:47.000000000 -0700 +++ 25/drivers/md/dm-table.c 2004-04-18 22:25:24.782063032 -0700 @@ -663,12 +663,14 @@ int dm_table_add_target(struct dm_table if (!len) { tgt->error = "zero-length target"; + DMERR(": %s\n", tgt->error); return -EINVAL; } tgt->type = dm_get_target_type(type); if (!tgt->type) { tgt->error = "unknown target type"; + DMERR(": %s\n", tgt->error); return -EINVAL; } @@ -705,7 +707,7 @@ int dm_table_add_target(struct dm_table return 0; bad: - printk(KERN_ERR DM_NAME ": %s\n", tgt->error); + DMERR(": %s\n", tgt->error); dm_put_target_type(tgt->type); return r; } --- linux-2.6.6-rc1/drivers/net/3c509.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/3c509.c 2004-04-18 22:26:00.274667336 -0700 @@ -595,10 +595,8 @@ no_pnp: #endif el3_cards++; -#if !defined(__ISAPNP__) || defined(CONFIG_X86_PC9800) lp->next_dev = el3_root_dev; el3_root_dev = dev; -#endif return 0; out1: --- linux-2.6.6-rc1/drivers/net/8139too.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/8139too.c 2004-04-18 22:25:29.925281144 -0700 @@ -1673,11 +1673,17 @@ static void rtl8139_tx_timeout (struct n u8 tmp8; unsigned long flags; - DPRINTK ("%s: Transmit timeout, status %2.2x %4.4x " - "media %2.2x.\n", dev->name, - RTL_R8 (ChipCmd), - RTL_R16 (IntrStatus), - RTL_R8 (MediaStatus)); + printk (KERN_DEBUG "%s: Transmit timeout, status %2.2x %4.4x %4.4x " + "media %2.2x.\n", dev->name, RTL_R8 (ChipCmd), + RTL_R16(IntrStatus), RTL_R16(IntrMask), RTL_R8(MediaStatus)); + /* Emit info to figure out what went wrong. */ + printk (KERN_DEBUG "%s: Tx queue start entry %ld dirty entry %ld.\n", + dev->name, tp->cur_tx, tp->dirty_tx); + for (i = 0; i < NUM_TX_DESC; i++) + printk (KERN_DEBUG "%s: Tx descriptor %d is %8.8lx.%s\n", + dev->name, i, RTL_R32 (TxStatus0 + (i * 4)), + i == tp->dirty_tx % NUM_TX_DESC ? + " (queue head)" : ""); tp->xstats.tx_timeouts++; @@ -1690,15 +1696,6 @@ static void rtl8139_tx_timeout (struct n /* Disable interrupts by clearing the interrupt mask. */ RTL_W16 (IntrMask, 0x0000); - /* Emit info to figure out what went wrong. */ - printk (KERN_DEBUG "%s: Tx queue start entry %ld dirty entry %ld.\n", - dev->name, tp->cur_tx, tp->dirty_tx); - for (i = 0; i < NUM_TX_DESC; i++) - printk (KERN_DEBUG "%s: Tx descriptor %d is %8.8lx.%s\n", - dev->name, i, RTL_R32 (TxStatus0 + (i * 4)), - i == tp->dirty_tx % NUM_TX_DESC ? - " (queue head)" : ""); - /* Stop a shared interrupt from scavenging while we are. */ spin_lock_irqsave (&tp->lock, flags); rtl8139_tx_clear (tp); @@ -1710,7 +1707,6 @@ static void rtl8139_tx_timeout (struct n netif_wake_queue (dev); } spin_unlock(&tp->rx_lock); - } --- linux-2.6.6-rc1/drivers/net/82596.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/82596.c 2004-04-18 22:26:02.379347376 -0700 @@ -58,7 +58,6 @@ #include #include #include -#include static char version[] __initdata = "82596.c $Revision: 1.5 $\n"; --- linux-2.6.6-rc1/drivers/net/8390.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/8390.c 2004-04-18 22:25:24.784062728 -0700 @@ -1084,7 +1084,7 @@ void NS8390_init(struct net_device *dev, for(i = 0; i < 6; i++) { outb_p(dev->dev_addr[i], e8390_base + EN1_PHYS_SHIFT(i)); - if(inb_p(e8390_base + EN1_PHYS_SHIFT(i))!=dev->dev_addr[i]) + if (ei_debug > 1 && inb_p(e8390_base + EN1_PHYS_SHIFT(i))!=dev->dev_addr[i]) printk(KERN_ERR "Hw. address read/write mismap %d\n",i); } --- linux-2.6.6-rc1/drivers/net/8390.h 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/8390.h 2004-04-18 22:25:24.785062576 -0700 @@ -131,8 +131,19 @@ struct ei_device { #define inb_p(port) in_8(port) #define outb_p(val,port) out_8(port,val) -#elif defined(CONFIG_ARM_ETHERH) || defined(CONFIG_ARM_ETHERH_MODULE) || \ - defined(CONFIG_NET_CBUS) +#elif defined(CONFIG_ARM_ETHERH) || defined(CONFIG_ARM_ETHERH_MODULE) +#define EI_SHIFT(x) (ei_local->reg_offset[x]) +#undef inb +#undef inb_p +#undef outb +#undef outb_p + +#define inb(_p) readb(_p) +#define outb(_v,_p) writeb(_v,_p) +#define inb_p(_p) inb(_p) +#define outb_p(_v,_p) outb(_v,_p) + +#elif defined(CONFIG_NET_CBUS) #define EI_SHIFT(x) (ei_local->reg_offset[x]) #else #define EI_SHIFT(x) (x) --- linux-2.6.6-rc1/drivers/net/a2065.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/a2065.c 2004-04-18 22:25:24.788062120 -0700 @@ -274,6 +274,7 @@ static int lance_rx (struct net_device * struct sk_buff *skb = 0; /* XXX shut up gcc warnings */ #ifdef TEST_HITS + int i; printk ("["); for (i = 0; i < RX_RING_SIZE; i++) { if (i == lp->rx_new) --- linux-2.6.6-rc1/drivers/net/acenic.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/acenic.c 2004-04-18 22:25:29.931280232 -0700 @@ -131,7 +131,6 @@ #define PCI_DEVICE_ID_SGI_ACENIC 0x0009 #endif -#if LINUX_VERSION_CODE >= 0x20400 static struct pci_device_id acenic_pci_tbl[] = { { PCI_VENDOR_ID_ALTEON, PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, }, @@ -156,37 +155,6 @@ static struct pci_device_id acenic_pci_t { } }; MODULE_DEVICE_TABLE(pci, acenic_pci_tbl); -#endif - - -#ifndef MODULE_LICENSE -#define MODULE_LICENSE(a) -#endif - -#ifndef wmb -#define wmb() mb() -#endif - -#ifndef __exit -#define __exit -#endif - -#ifndef __devinit -#define __devinit __init -#endif - -#ifndef SMP_CACHE_BYTES -#define SMP_CACHE_BYTES L1_CACHE_BYTES -#endif - -#ifndef SET_MODULE_OWNER -#define SET_MODULE_OWNER(dev) do{} while(0) -#define ACE_MOD_INC_USE_COUNT MOD_INC_USE_COUNT -#define ACE_MOD_DEC_USE_COUNT MOD_DEC_USE_COUNT -#else -#define ACE_MOD_INC_USE_COUNT do{} while(0) -#define ACE_MOD_DEC_USE_COUNT do{} while(0) -#endif #ifndef SET_NETDEV_DEV #define SET_NETDEV_DEV(net, pdev) do{} while(0) @@ -198,151 +166,8 @@ MODULE_DEVICE_TABLE(pci, acenic_pci_tbl) #define ace_sync_irq(irq) synchronize_irq() #endif -#if LINUX_VERSION_CODE < 0x2051e -#define local_irq_save(flags) do{__save_flags(flags) ; \ - __cli();} while(0) -#define local_irq_restore(flags) __restore_flags(flags) -#endif - -#if (LINUX_VERSION_CODE < 0x02030d) -#define pci_resource_start(dev, bar) dev->base_address[bar] -#elif (LINUX_VERSION_CODE < 0x02032c) -#define pci_resource_start(dev, bar) dev->resource[bar].start -#endif - -#if (LINUX_VERSION_CODE < 0x02030e) -#define net_device device -#endif - - -#if (LINUX_VERSION_CODE < 0x02032a) -typedef u32 dma_addr_t; - -static inline void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, - dma_addr_t *dma_handle) -{ - void *virt_ptr; - - virt_ptr = kmalloc(size, GFP_KERNEL); - if (!virt_ptr) - return NULL; - *dma_handle = virt_to_bus(virt_ptr); - return virt_ptr; -} - -#define pci_free_consistent(cookie, size, ptr, dma_ptr) kfree(ptr) -#define pci_map_page(cookie, page, off, size, dir) \ - virt_to_bus(page_address(page)+(off)) -#define pci_unmap_page(cookie, address, size, dir) -#define pci_set_dma_mask(dev, mask) \ - (((u64)(mask) & 0xffffffff00000000) == 0 ? 0 : -EIO) -#define pci_dma_supported(dev, mask) \ - (((u64)(mask) & 0xffffffff00000000) == 0 ? 1 : 0) - -#elif (LINUX_VERSION_CODE < 0x02040d) - -/* - * 2.4.13 introduced pci_map_page()/pci_unmap_page() - for 2.4.12 and prior, - * fall back on pci_map_single()/pci_unnmap_single(). - * - * We are guaranteed that the page is mapped at this point since - * pci_map_page() is only used upon valid struct skb's. - */ -static inline dma_addr_t -pci_map_page(struct pci_dev *cookie, struct page *page, unsigned long off, - size_t size, int dir) -{ - void *page_virt; - - page_virt = page_address(page); - if (!page_virt) - BUG(); - return pci_map_single(cookie, (page_virt + off), size, dir); -} -#define pci_unmap_page(cookie, dma_addr, size, dir) \ - pci_unmap_single(cookie, dma_addr, size, dir) -#endif - -#if (LINUX_VERSION_CODE < 0x020412) -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) 0 -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do{} while(0) -#define pci_unmap_len(PTR, LEN_NAME) 0 -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do{} while(0) -#endif - - -#if (LINUX_VERSION_CODE < 0x02032b) -/* - * SoftNet - * - * For pre-softnet kernels we need to tell the upper layer not to - * re-enter start_xmit() while we are in there. However softnet - * guarantees not to enter while we are in there so there is no need - * to do the netif_stop_queue() dance unless the transmit queue really - * gets stuck. This should also improve performance according to tests - * done by Aman Singla. - */ -#define dev_kfree_skb_irq(a) dev_kfree_skb(a) -#define netif_wake_queue(dev) clear_bit(0, &dev->tbusy) -#define netif_stop_queue(dev) set_bit(0, &dev->tbusy) -#define late_stop_netif_stop_queue(dev) do{} while(0) -#define early_stop_netif_stop_queue(dev) test_and_set_bit(0,&dev->tbusy) -#define early_stop_netif_wake_queue(dev) netif_wake_queue(dev) - -static inline void netif_start_queue(struct net_device *dev) -{ - dev->tbusy = 0; - dev->interrupt = 0; - dev->start = 1; -} - -#define ace_mark_net_bh() mark_bh(NET_BH) -#define netif_queue_stopped(dev) dev->tbusy -#define netif_running(dev) dev->start -#define ace_if_down(dev) do{dev->start = 0;} while(0) - -#define tasklet_struct tq_struct -static inline void tasklet_schedule(struct tasklet_struct *tasklet) -{ - queue_task(tasklet, &tq_immediate); - mark_bh(IMMEDIATE_BH); -} - -static inline void tasklet_init(struct tasklet_struct *tasklet, - void (*func)(unsigned long), - unsigned long data) -{ - tasklet->next = NULL; - tasklet->sync = 0; - tasklet->routine = (void (*)(void *))func; - tasklet->data = (void *)data; -} -#define tasklet_kill(tasklet) do{} while(0) -#else -#define late_stop_netif_stop_queue(dev) netif_stop_queue(dev) -#define early_stop_netif_stop_queue(dev) 0 -#define early_stop_netif_wake_queue(dev) do{} while(0) -#define ace_mark_net_bh() do{} while(0) -#define ace_if_down(dev) do{} while(0) -#endif - -#if (LINUX_VERSION_CODE >= 0x02031b) -#define NEW_NETINIT -#define ACE_PROBE_ARG void -#else -#define ACE_PROBE_ARG struct net_device *dev -#endif - -#ifndef min_t -#define min_t(type,a,b) (((a)<(b))?(a):(b)) -#endif - -#ifndef ARCH_HAS_PREFETCHW -#ifndef prefetchw -#define prefetchw(x) do{} while(0) -#endif +#ifndef offset_in_page +#define offset_in_page(ptr) ((unsigned long)(ptr) & ~PAGE_MASK) #endif #define ACE_MAX_MOD_PARMS 8 @@ -595,407 +420,323 @@ static int max_rx_desc[ACE_MAX_MOD_PARMS static int tx_ratio[ACE_MAX_MOD_PARMS]; static int dis_pci_mem_inval[ACE_MAX_MOD_PARMS] = {1, 1, 1, 1, 1, 1, 1, 1}; +MODULE_AUTHOR("Jes Sorensen "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("AceNIC/3C985/GA620 Gigabit Ethernet driver"); +MODULE_PARM(link, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(trace, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(tx_coal_tick, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(max_tx_desc, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(rx_coal_tick, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(max_rx_desc, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM(tx_ratio, "1-" __MODULE_STRING(8) "i"); +MODULE_PARM_DESC(link, "AceNIC/3C985/NetGear link state"); +MODULE_PARM_DESC(trace, "AceNIC/3C985/NetGear firmware trace level"); +MODULE_PARM_DESC(tx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first tx descriptor arrives"); +MODULE_PARM_DESC(max_tx_desc, "AceNIC/3C985/GA620 max number of transmit descriptors to wait"); +MODULE_PARM_DESC(rx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first rx descriptor arrives"); +MODULE_PARM_DESC(max_rx_desc, "AceNIC/3C985/GA620 max number of receive descriptors to wait"); +MODULE_PARM_DESC(tx_ratio, "AceNIC/3C985/GA620 ratio of NIC memory used for TX/RX descriptors (range 0-63)"); + + static char version[] __initdata = "acenic.c: v0.92 08/05/2002 Jes Sorensen, linux-acenic@SunSITE.dk\n" " http://home.cern.ch/~jes/gige/acenic.html\n"; -static struct net_device *root_dev; - -static int probed __initdata = 0; - - -int __devinit acenic_probe (ACE_PROBE_ARG) +static int __devinit acenic_probe_one(struct pci_dev *pdev, + const struct pci_device_id *id) { -#ifdef NEW_NETINIT struct net_device *dev; -#endif struct ace_private *ap; - struct pci_dev *pdev = NULL; - int boards_found = 0; - int version_disp; - - if (probed) - return -ENODEV; - probed++; - - version_disp = 0; - - while ((pdev = pci_find_class(PCI_CLASS_NETWORK_ETHERNET<<8, pdev))) { - - if (!((pdev->vendor == PCI_VENDOR_ID_ALTEON) && - ((pdev->device == PCI_DEVICE_ID_ALTEON_ACENIC_FIBRE) || - (pdev->device == PCI_DEVICE_ID_ALTEON_ACENIC_COPPER)))&& - !((pdev->vendor == PCI_VENDOR_ID_3COM) && - (pdev->device == PCI_DEVICE_ID_3COM_3C985)) && - !((pdev->vendor == PCI_VENDOR_ID_NETGEAR) && - ((pdev->device == PCI_DEVICE_ID_NETGEAR_GA620) || - (pdev->device == PCI_DEVICE_ID_NETGEAR_GA620T))) && - /* - * Farallon used the DEC vendor ID on their cards by - * mistake for a while - */ - !((pdev->vendor == PCI_VENDOR_ID_DEC) && - (pdev->device == PCI_DEVICE_ID_FARALLON_PN9000SX)) && - !((pdev->vendor == PCI_VENDOR_ID_ALTEON) && - (pdev->device == PCI_DEVICE_ID_FARALLON_PN9100T)) && - !((pdev->vendor == PCI_VENDOR_ID_SGI) && - (pdev->device == PCI_DEVICE_ID_SGI_ACENIC))) - continue; - - dev = alloc_etherdev(sizeof(struct ace_private)); - if (dev == NULL) { - printk(KERN_ERR "acenic: Unable to allocate " - "net_device structure!\n"); - break; - } + static int boards_found; - SET_MODULE_OWNER(dev); - SET_NETDEV_DEV(dev, &pdev->dev); + dev = alloc_etherdev(sizeof(struct ace_private)); + if (dev == NULL) { + printk(KERN_ERR "acenic: Unable to allocate " + "net_device structure!\n"); + return -ENOMEM; + } - ap = dev->priv; - ap->pdev = pdev; + SET_MODULE_OWNER(dev); + SET_NETDEV_DEV(dev, &pdev->dev); - dev->open = &ace_open; - dev->hard_start_xmit = &ace_start_xmit; - dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; + ap = dev->priv; + ap->pdev = pdev; + + dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; #if ACENIC_DO_VLAN - dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; - dev->vlan_rx_register = ace_vlan_rx_register; - dev->vlan_rx_kill_vid = ace_vlan_rx_kill_vid; -#endif - if (1) { - static void ace_watchdog(struct net_device *dev); - dev->tx_timeout = &ace_watchdog; - dev->watchdog_timeo = 5*HZ; - } - dev->stop = &ace_close; - dev->get_stats = &ace_get_stats; - dev->set_multicast_list = &ace_set_multicast_list; - dev->do_ioctl = &ace_ioctl; - dev->set_mac_address = &ace_set_mac_addr; - dev->change_mtu = &ace_change_mtu; + dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + dev->vlan_rx_register = ace_vlan_rx_register; + dev->vlan_rx_kill_vid = ace_vlan_rx_kill_vid; +#endif + if (1) { + static void ace_watchdog(struct net_device *dev); + dev->tx_timeout = &ace_watchdog; + dev->watchdog_timeo = 5*HZ; + } - /* display version info if adapter is found */ - if (!version_disp) - { - /* set display flag to TRUE so that */ - /* we only display this string ONCE */ - version_disp = 1; - printk(version); - } + dev->open = &ace_open; + dev->stop = &ace_close; + dev->hard_start_xmit = &ace_start_xmit; + dev->get_stats = &ace_get_stats; + dev->set_multicast_list = &ace_set_multicast_list; + dev->do_ioctl = &ace_ioctl; + dev->set_mac_address = &ace_set_mac_addr; + dev->change_mtu = &ace_change_mtu; - if (pci_enable_device(pdev)) { - free_netdev(dev); - continue; - } + /* we only display this string ONCE */ + if (!boards_found) + printk(version); - /* - * Enable master mode before we start playing with the - * pci_command word since pci_set_master() will modify - * it. - */ - pci_set_master(pdev); + if (pci_enable_device(pdev)) + goto fail_free_netdev; - pci_read_config_word(pdev, PCI_COMMAND, &ap->pci_command); + /* + * Enable master mode before we start playing with the + * pci_command word since pci_set_master() will modify + * it. + */ + pci_set_master(pdev); - /* OpenFirmware on Mac's does not set this - DOH.. */ - if (!(ap->pci_command & PCI_COMMAND_MEMORY)) { - printk(KERN_INFO "%s: Enabling PCI Memory Mapped " - "access - was not enabled by BIOS/Firmware\n", - dev->name); - ap->pci_command = ap->pci_command | PCI_COMMAND_MEMORY; - pci_write_config_word(ap->pdev, PCI_COMMAND, - ap->pci_command); - wmb(); - } + pci_read_config_word(pdev, PCI_COMMAND, &ap->pci_command); - pci_read_config_byte(pdev, PCI_LATENCY_TIMER, - &ap->pci_latency); - if (ap->pci_latency <= 0x40) { - ap->pci_latency = 0x40; - pci_write_config_byte(pdev, PCI_LATENCY_TIMER, - ap->pci_latency); - } + /* OpenFirmware on Mac's does not set this - DOH.. */ + if (!(ap->pci_command & PCI_COMMAND_MEMORY)) { + printk(KERN_INFO "%s: Enabling PCI Memory Mapped " + "access - was not enabled by BIOS/Firmware\n", + dev->name); + ap->pci_command = ap->pci_command | PCI_COMMAND_MEMORY; + pci_write_config_word(ap->pdev, PCI_COMMAND, + ap->pci_command); + wmb(); + } - /* - * Remap the regs into kernel space - this is abuse of - * dev->base_addr since it was means for I/O port - * addresses but who gives a damn. - */ - dev->base_addr = pci_resource_start(pdev, 0); - ap->regs = (struct ace_regs *)ioremap(dev->base_addr, 0x4000); - if (!ap->regs) { - printk(KERN_ERR "%s: Unable to map I/O register, " - "AceNIC %i will be disabled.\n", - dev->name, boards_found); - break; - } + pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &ap->pci_latency); + if (ap->pci_latency <= 0x40) { + ap->pci_latency = 0x40; + pci_write_config_byte(pdev, PCI_LATENCY_TIMER, ap->pci_latency); + } - switch(pdev->vendor) { - case PCI_VENDOR_ID_ALTEON: - if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9100T) { - strncpy(ap->name, "Farallon PN9100-T " - "Gigabit Ethernet", sizeof (ap->name)); - printk(KERN_INFO "%s: Farallon PN9100-T ", - dev->name); - } else { - strncpy(ap->name, "AceNIC Gigabit Ethernet", - sizeof (ap->name)); - printk(KERN_INFO "%s: Alteon AceNIC ", - dev->name); - } - break; - case PCI_VENDOR_ID_3COM: - strncpy(ap->name, "3Com 3C985 Gigabit Ethernet", - sizeof (ap->name)); - printk(KERN_INFO "%s: 3Com 3C985 ", dev->name); - break; - case PCI_VENDOR_ID_NETGEAR: - strncpy(ap->name, "NetGear GA620 Gigabit Ethernet", - sizeof (ap->name)); - printk(KERN_INFO "%s: NetGear GA620 ", dev->name); - break; - case PCI_VENDOR_ID_DEC: - if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9000SX) { - strncpy(ap->name, "Farallon PN9000-SX " - "Gigabit Ethernet", sizeof (ap->name)); - printk(KERN_INFO "%s: Farallon PN9000-SX ", - dev->name); - break; - } - case PCI_VENDOR_ID_SGI: - strncpy(ap->name, "SGI AceNIC Gigabit Ethernet", + /* + * Remap the regs into kernel space - this is abuse of + * dev->base_addr since it was means for I/O port + * addresses but who gives a damn. + */ + dev->base_addr = pci_resource_start(pdev, 0); + ap->regs = (struct ace_regs *)ioremap(dev->base_addr, 0x4000); + if (!ap->regs) { + printk(KERN_ERR "%s: Unable to map I/O register, " + "AceNIC %i will be disabled.\n", + dev->name, boards_found); + goto fail_free_netdev; + } + + switch(pdev->vendor) { + case PCI_VENDOR_ID_ALTEON: + if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9100T) { + strncpy(ap->name, "Farallon PN9100-T " + "Gigabit Ethernet", sizeof (ap->name)); + printk(KERN_INFO "%s: Farallon PN9100-T ", + dev->name); + } else { + strncpy(ap->name, "AceNIC Gigabit Ethernet", sizeof (ap->name)); - printk(KERN_INFO "%s: SGI AceNIC ", dev->name); - break; - default: - strncpy(ap->name, "Unknown AceNIC based Gigabit " - "Ethernet", sizeof (ap->name)); - printk(KERN_INFO "%s: Unknown AceNIC ", dev->name); + printk(KERN_INFO "%s: Alteon AceNIC ", + dev->name); + } + break; + case PCI_VENDOR_ID_3COM: + strncpy(ap->name, "3Com 3C985 Gigabit Ethernet", + sizeof (ap->name)); + printk(KERN_INFO "%s: 3Com 3C985 ", dev->name); + break; + case PCI_VENDOR_ID_NETGEAR: + strncpy(ap->name, "NetGear GA620 Gigabit Ethernet", + sizeof (ap->name)); + printk(KERN_INFO "%s: NetGear GA620 ", dev->name); + break; + case PCI_VENDOR_ID_DEC: + if (pdev->device == PCI_DEVICE_ID_FARALLON_PN9000SX) { + strncpy(ap->name, "Farallon PN9000-SX " + "Gigabit Ethernet", sizeof (ap->name)); + printk(KERN_INFO "%s: Farallon PN9000-SX ", + dev->name); break; } - ap->name [sizeof (ap->name) - 1] = '\0'; - printk("Gigabit Ethernet at 0x%08lx, ", dev->base_addr); + case PCI_VENDOR_ID_SGI: + strncpy(ap->name, "SGI AceNIC Gigabit Ethernet", + sizeof (ap->name)); + printk(KERN_INFO "%s: SGI AceNIC ", dev->name); + break; + default: + strncpy(ap->name, "Unknown AceNIC based Gigabit " + "Ethernet", sizeof (ap->name)); + printk(KERN_INFO "%s: Unknown AceNIC ", dev->name); + break; + } + + ap->name [sizeof (ap->name) - 1] = '\0'; + printk("Gigabit Ethernet at 0x%08lx, ", dev->base_addr); #ifdef __sparc__ - printk("irq %s\n", __irq_itoa(pdev->irq)); + printk("irq %s\n", __irq_itoa(pdev->irq)); #else - printk("irq %i\n", pdev->irq); + printk("irq %i\n", pdev->irq); #endif #ifdef CONFIG_ACENIC_OMIT_TIGON_I - if ((readl(&ap->regs->HostCtrl) >> 28) == 4) { - printk(KERN_ERR "%s: Driver compiled without Tigon I" - " support - NIC disabled\n", dev->name); - ace_init_cleanup(dev); - free_netdev(dev); - continue; - } + if ((readl(&ap->regs->HostCtrl) >> 28) == 4) { + printk(KERN_ERR "%s: Driver compiled without Tigon I" + " support - NIC disabled\n", dev->name); + goto fail_uninit; + } #endif - if (ace_allocate_descriptors(dev)) { - /* - * ace_allocate_descriptors() calls - * ace_init_cleanup() on error. - */ - free_netdev(dev); - continue; - } + if (ace_allocate_descriptors(dev)) + goto fail_free_netdev; #ifdef MODULE - if (boards_found >= ACE_MAX_MOD_PARMS) - ap->board_idx = BOARD_IDX_OVERFLOW; - else - ap->board_idx = boards_found; + if (boards_found >= ACE_MAX_MOD_PARMS) + ap->board_idx = BOARD_IDX_OVERFLOW; + else + ap->board_idx = boards_found; #else - ap->board_idx = BOARD_IDX_STATIC; + ap->board_idx = BOARD_IDX_STATIC; #endif - if (ace_init(dev)) { - /* - * ace_init() calls ace_init_cleanup() on error. - */ - free_netdev(dev); - continue; - } + if (ace_init(dev)) + goto fail_free_netdev; - if (register_netdev(dev)) { - printk(KERN_ERR "acenic: device registration failed\n"); - ace_init_cleanup(dev); - free_netdev(dev); - continue; - } - - if (ap->pci_using_dac) - dev->features |= NETIF_F_HIGHDMA; - - boards_found++; + if (register_netdev(dev)) { + printk(KERN_ERR "acenic: device registration failed\n"); + goto fail_uninit; } - /* - * If we're at this point we're going through ace_probe() for - * the first time. Return success (0) if we've initialized 1 - * or more boards. Otherwise, return failure (-ENODEV). - */ - - if (boards_found > 0) - return 0; - else - return -ENODEV; -} + if (ap->pci_using_dac) + dev->features |= NETIF_F_HIGHDMA; + pci_set_drvdata(pdev, dev); -#ifdef MODULE -MODULE_AUTHOR("Jes Sorensen "); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("AceNIC/3C985/GA620 Gigabit Ethernet driver"); -MODULE_PARM(link, "1-" __MODULE_STRING(8) "i"); -MODULE_PARM(trace, "1-" __MODULE_STRING(8) "i"); -MODULE_PARM(tx_coal_tick, "1-" __MODULE_STRING(8) "i"); -MODULE_PARM(max_tx_desc, "1-" __MODULE_STRING(8) "i"); -MODULE_PARM(rx_coal_tick, "1-" __MODULE_STRING(8) "i"); -MODULE_PARM(max_rx_desc, "1-" __MODULE_STRING(8) "i"); -MODULE_PARM(tx_ratio, "1-" __MODULE_STRING(8) "i"); -MODULE_PARM_DESC(link, "AceNIC/3C985/NetGear link state"); -MODULE_PARM_DESC(trace, "AceNIC/3C985/NetGear firmware trace level"); -MODULE_PARM_DESC(tx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first tx descriptor arrives"); -MODULE_PARM_DESC(max_tx_desc, "AceNIC/3C985/GA620 max number of transmit descriptors to wait"); -MODULE_PARM_DESC(rx_coal_tick, "AceNIC/3C985/GA620 max clock ticks to wait from first rx descriptor arrives"); -MODULE_PARM_DESC(max_rx_desc, "AceNIC/3C985/GA620 max number of receive descriptors to wait"); -MODULE_PARM_DESC(tx_ratio, "AceNIC/3C985/GA620 ratio of NIC memory used for TX/RX descriptors (range 0-63)"); -#endif + boards_found++; + return 0; + fail_uninit: + ace_init_cleanup(dev); + fail_free_netdev: + free_netdev(dev); + return -ENODEV; +} -static void __exit ace_module_cleanup(void) +static void __devexit acenic_remove_one(struct pci_dev *pdev) { - struct ace_private *ap; - struct ace_regs *regs; - struct net_device *next; + struct net_device *dev = pci_get_drvdata(pdev); + struct ace_private *ap = dev->priv; + struct ace_regs *regs = ap->regs; short i; - while (root_dev) { - ap = root_dev->priv; - next = ap->next; - unregister_netdev(root_dev); + unregister_netdev(dev); - regs = ap->regs; - - writel(readl(®s->CpuCtrl) | CPU_HALT, ®s->CpuCtrl); - if (ap->version >= 2) - writel(readl(®s->CpuBCtrl) | CPU_HALT, - ®s->CpuBCtrl); - /* - * This clears any pending interrupts - */ - writel(1, ®s->Mb0Lo); - readl(®s->CpuCtrl); /* flush */ + writel(readl(®s->CpuCtrl) | CPU_HALT, ®s->CpuCtrl); + if (ap->version >= 2) + writel(readl(®s->CpuBCtrl) | CPU_HALT, ®s->CpuBCtrl); + + /* + * This clears any pending interrupts + */ + writel(1, ®s->Mb0Lo); + readl(®s->CpuCtrl); /* flush */ - /* - * Make sure no other CPUs are processing interrupts - * on the card before the buffers are being released. - * Otherwise one might experience some `interesting' - * effects. - * - * Then release the RX buffers - jumbo buffers were - * already released in ace_close(). - */ - ace_sync_irq(root_dev->irq); + /* + * Make sure no other CPUs are processing interrupts + * on the card before the buffers are being released. + * Otherwise one might experience some `interesting' + * effects. + * + * Then release the RX buffers - jumbo buffers were + * already released in ace_close(). + */ + ace_sync_irq(dev->irq); - for (i = 0; i < RX_STD_RING_ENTRIES; i++) { - struct sk_buff *skb = ap->skb->rx_std_skbuff[i].skb; + for (i = 0; i < RX_STD_RING_ENTRIES; i++) { + struct sk_buff *skb = ap->skb->rx_std_skbuff[i].skb; - if (skb) { - struct ring_info *ringp; - dma_addr_t mapping; + if (skb) { + struct ring_info *ringp; + dma_addr_t mapping; - ringp = &ap->skb->rx_std_skbuff[i]; - mapping = pci_unmap_addr(ringp, mapping); - pci_unmap_page(ap->pdev, mapping, - ACE_STD_BUFSIZE - (2 + 16), - PCI_DMA_FROMDEVICE); + ringp = &ap->skb->rx_std_skbuff[i]; + mapping = pci_unmap_addr(ringp, mapping); + pci_unmap_page(ap->pdev, mapping, + ACE_STD_BUFSIZE - (2 + 16), + PCI_DMA_FROMDEVICE); - ap->rx_std_ring[i].size = 0; - ap->skb->rx_std_skbuff[i].skb = NULL; - dev_kfree_skb(skb); - } - } - if (ap->version >= 2) { - for (i = 0; i < RX_MINI_RING_ENTRIES; i++) { - struct sk_buff *skb = ap->skb->rx_mini_skbuff[i].skb; - - if (skb) { - struct ring_info *ringp; - dma_addr_t mapping; - - ringp = &ap->skb->rx_mini_skbuff[i]; - mapping = pci_unmap_addr(ringp,mapping); - pci_unmap_page(ap->pdev, mapping, - ACE_MINI_BUFSIZE - (2 + 16), - PCI_DMA_FROMDEVICE); - - ap->rx_mini_ring[i].size = 0; - ap->skb->rx_mini_skbuff[i].skb = NULL; - dev_kfree_skb(skb); - } - } + ap->rx_std_ring[i].size = 0; + ap->skb->rx_std_skbuff[i].skb = NULL; + dev_kfree_skb(skb); } - for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) { - struct sk_buff *skb = ap->skb->rx_jumbo_skbuff[i].skb; + } + + if (ap->version >= 2) { + for (i = 0; i < RX_MINI_RING_ENTRIES; i++) { + struct sk_buff *skb = ap->skb->rx_mini_skbuff[i].skb; + if (skb) { struct ring_info *ringp; dma_addr_t mapping; - ringp = &ap->skb->rx_jumbo_skbuff[i]; - mapping = pci_unmap_addr(ringp, mapping); + ringp = &ap->skb->rx_mini_skbuff[i]; + mapping = pci_unmap_addr(ringp,mapping); pci_unmap_page(ap->pdev, mapping, - ACE_JUMBO_BUFSIZE - (2 + 16), + ACE_MINI_BUFSIZE - (2 + 16), PCI_DMA_FROMDEVICE); - ap->rx_jumbo_ring[i].size = 0; - ap->skb->rx_jumbo_skbuff[i].skb = NULL; + ap->rx_mini_ring[i].size = 0; + ap->skb->rx_mini_skbuff[i].skb = NULL; dev_kfree_skb(skb); } } - - ace_init_cleanup(root_dev); - free_netdev(root_dev); - root_dev = next; } -} + for (i = 0; i < RX_JUMBO_RING_ENTRIES; i++) { + struct sk_buff *skb = ap->skb->rx_jumbo_skbuff[i].skb; + if (skb) { + struct ring_info *ringp; + dma_addr_t mapping; -int __init ace_module_init(void) -{ - int status; + ringp = &ap->skb->rx_jumbo_skbuff[i]; + mapping = pci_unmap_addr(ringp, mapping); + pci_unmap_page(ap->pdev, mapping, + ACE_JUMBO_BUFSIZE - (2 + 16), + PCI_DMA_FROMDEVICE); - root_dev = NULL; + ap->rx_jumbo_ring[i].size = 0; + ap->skb->rx_jumbo_skbuff[i].skb = NULL; + dev_kfree_skb(skb); + } + } -#ifdef NEW_NETINIT - status = acenic_probe(); -#else - status = acenic_probe(NULL); -#endif - return status; + ace_init_cleanup(dev); + free_netdev(dev); } +static struct pci_driver acenic_pci_driver = { + .name = "acenic", + .id_table = acenic_pci_tbl, + .probe = acenic_probe_one, + .remove = __devexit_p(acenic_remove_one), +}; -#if (LINUX_VERSION_CODE < 0x02032a) -#ifdef MODULE -int init_module(void) +static int __init acenic_init(void) { - return ace_module_init(); + return pci_module_init(&acenic_pci_driver); } - -void cleanup_module(void) +static void __exit acenic_exit(void) { - ace_module_cleanup(); + pci_unregister_driver(&acenic_pci_driver); } -#endif -#else -module_init(ace_module_init); -module_exit(ace_module_cleanup); -#endif +module_init(acenic_init); +module_exit(acenic_exit); static void ace_free_descriptors(struct net_device *dev) { @@ -1462,13 +1203,6 @@ static int __init ace_init(struct net_de } else dev->irq = pdev->irq; - /* - * Register the device here to be able to catch allocated - * interrupt handlers in case the firmware doesn't come up. - */ - ap->next = root_dev; - root_dev = dev; - #ifdef INDEX_DEBUG spin_lock_init(&ap->debug_lock); ap->last_tx = ACE_TX_RING_ENTRIES(ap) - 1; @@ -2642,8 +2376,6 @@ static int ace_open(struct net_device *d netif_start_queue(dev); - ACE_MOD_INC_USE_COUNT; - /* * Setup the bottom half rx ring refill handler */ @@ -2660,8 +2392,6 @@ static int ace_close(struct net_device * unsigned long flags; short i; - ace_if_down(dev); - /* * Without (or before) releasing irq and stopping hardware, this * is an absolute non-sense, by the way. It will be reset instantly @@ -2733,7 +2463,6 @@ static int ace_close(struct net_device * ace_unmask_irq(dev); local_irq_restore(flags); - ACE_MOD_DEC_USE_COUNT; return 0; } @@ -2790,12 +2519,6 @@ static int ace_start_xmit(struct sk_buff struct tx_desc *desc; u32 idx, flagsize; - /* - * This only happens with pre-softnet, ie. 2.2.x kernels. - */ - if (early_stop_netif_stop_queue(dev)) - return 1; - restart: idx = ap->tx_prd; --- linux-2.6.6-rc1/drivers/net/amd8111e.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/amd8111e.c 2004-04-18 22:25:24.792061512 -0700 @@ -1,6 +1,6 @@ /* Advanced Micro Devices Inc. AMD8111E Linux Network Driver - * Copyright (C) 2003 Advanced Micro Devices + * Copyright (C) 2004 Advanced Micro Devices * * * Copyright 2001,2002 Jeff Garzik [ 8139cp.c,tg3.c ] @@ -55,6 +55,16 @@ Revision History: 4. Dynamic IPG support is disabled by default. 3.0.3 06/05/2003 1. Bug fix: Fixed failure to close the interface if SMP is enabled. + 3.0.4 12/09/2003 + 1. Added set_mac_address routine for bonding driver support. + 2. Tested the driver for bonding support + 3. Bug fix: Fixed mismach in actual receive buffer lenth and lenth + indicated to the h/w. + 4. Modified amd8111e_rx() routine to receive all the received packets + in the first interrupt. + 5. Bug fix: Corrected rx_errors reported in get_stats() function. + 3.0.5 03/22/2004 + 1. Added NAPI support */ @@ -91,7 +101,7 @@ Revision History: #include "amd8111e.h" #define MODULE_NAME "amd8111e" -#define MODULE_VERS "3.0.3" +#define MODULE_VERS "3.0.5" MODULE_AUTHOR("Advanced Micro Devices, Inc."); MODULE_DESCRIPTION ("AMD8111 based 10/100 Ethernet Controller. Driver Version 3.0.3"); MODULE_LICENSE("GPL"); @@ -276,8 +286,10 @@ static inline void amd8111e_set_rx_buff_ unsigned int mtu = dev->mtu; if (mtu > ETH_DATA_LEN){ - /* MTU + ethernet header + FCS + optional VLAN tag */ - lp->rx_buff_len = mtu + ETH_HLEN + 8; + /* MTU + ethernet header + FCS + + optional VLAN tag + skb reserve space 2 */ + + lp->rx_buff_len = mtu + ETH_HLEN + 10; lp->options |= OPTION_JUMBO_ENABLE; } else{ lp->rx_buff_len = PKT_BUFF_SZ; @@ -337,7 +349,7 @@ static int amd8111e_init_ring(struct net lp->rx_skbuff[i]->data,lp->rx_buff_len-2, PCI_DMA_FROMDEVICE); lp->rx_ring[i].buff_phy_addr = cpu_to_le32(lp->rx_dma_addr[i]); - lp->rx_ring[i].buff_count = cpu_to_le16(lp->rx_buff_len); + lp->rx_ring[i].buff_count = cpu_to_le16(lp->rx_buff_len-2); lp->rx_ring[i].rx_flags = cpu_to_le16(OWN_BIT); } @@ -513,6 +525,9 @@ static void amd8111e_init_hw_default( st void * mmio = lp->mmio; + /* stop the chip */ + writel(RUN, mmio + CMD0); + /* AUTOPOLL0 Register *//*TBD default value is 8100 in FPS */ writew( 0x8101, mmio + AUTOPOLL0); @@ -654,7 +669,11 @@ This is the receive indication function */ static int amd8111e_vlan_rx(struct amd8111e_priv *lp, struct sk_buff *skb, u16 vlan_tag) { +#ifdef CONFIG_AMD8111E_NAPI + return vlan_hwaccel_receive_skb(skb, lp->vlgrp,vlan_tag); +#else return vlan_hwaccel_rx(skb, lp->vlgrp, vlan_tag); +#endif /* CONFIG_AMD8111E_NAPI */ } #endif @@ -700,6 +719,142 @@ static int amd8111e_tx(struct net_device return 0; } +#if CONFIG_AMD8111E_NAPI +/* This function handles the driver receive operation in polling mode */ +static int amd8111e_rx_poll(struct net_device *dev, int * budget) +{ + struct amd8111e_priv *lp = dev->priv; + int rx_index = lp->rx_idx & RX_RING_DR_MOD_MASK; + void * mmio = lp->mmio; + struct sk_buff *skb,*new_skb; + int min_pkt_len, status; + unsigned int intr0; + int num_rx_pkt = 0; + /*int max_rx_pkt = NUM_RX_BUFFERS;*/ + short pkt_len; +#if AMD8111E_VLAN_TAG_USED + short vtag; +#endif + int rx_pkt_limit = dev->quota; + + do{ + /* process receive packets until we use the quota*/ + /* If we own the next entry, it's a new packet. Send it up. */ + while(!(lp->rx_ring[rx_index].rx_flags & OWN_BIT)){ + + /* check if err summary bit is set */ + if(le16_to_cpu(lp->rx_ring[rx_index].rx_flags) + & ERR_BIT){ + /* + * There is a tricky error noted by John Murphy, + * to Russ Nelson: Even with + * full-sized * buffers it's possible for a + * jabber packet to use two buffers, with only + * the last correctly noting the error. + */ + + /* reseting flags */ + lp->rx_ring[rx_index].rx_flags &=RESET_RX_FLAGS; + goto err_next_pkt; + + } + /* check for STP and ENP */ + status = le16_to_cpu(lp->rx_ring[rx_index].rx_flags); + if(!((status & STP_BIT) && (status & ENP_BIT))){ + /* reseting flags */ + lp->rx_ring[rx_index].rx_flags &=RESET_RX_FLAGS; + goto err_next_pkt; + } + pkt_len = le16_to_cpu(lp->rx_ring[rx_index].msg_count) - 4; + +#if AMD8111E_VLAN_TAG_USED + vtag = le16_to_cpu(lp->rx_ring[rx_index].rx_flags) & TT_MASK; + /*MAC will strip vlan tag*/ + if(lp->vlgrp != NULL && vtag !=0) + min_pkt_len =MIN_PKT_LEN - 4; + else +#endif + min_pkt_len =MIN_PKT_LEN; + + if (pkt_len < min_pkt_len) { + lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; + lp->drv_rx_errors++; + goto err_next_pkt; + } + if(--rx_pkt_limit < 0) + goto rx_not_empty; + if(!(new_skb = dev_alloc_skb(lp->rx_buff_len))){ + /* if allocation fail, + ignore that pkt and go to next one */ + lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; + lp->drv_rx_errors++; + goto err_next_pkt; + } + + skb_reserve(new_skb, 2); + skb = lp->rx_skbuff[rx_index]; + pci_unmap_single(lp->pci_dev,lp->rx_dma_addr[rx_index], + lp->rx_buff_len-2, PCI_DMA_FROMDEVICE); + skb_put(skb, pkt_len); + skb->dev = dev; + lp->rx_skbuff[rx_index] = new_skb; + new_skb->dev = dev; + lp->rx_dma_addr[rx_index] = pci_map_single(lp->pci_dev, + new_skb->data, lp->rx_buff_len-2,PCI_DMA_FROMDEVICE); + + skb->protocol = eth_type_trans(skb, dev); + +#if AMD8111E_VLAN_TAG_USED + + vtag = lp->rx_ring[rx_index].rx_flags & TT_MASK; + if(lp->vlgrp != NULL && (vtag == TT_VLAN_TAGGED)){ + amd8111e_vlan_rx(lp, skb, + lp->rx_ring[rx_index].tag_ctrl_info); + } else +#endif + + netif_receive_skb(skb); + /*COAL update rx coalescing parameters*/ + lp->coal_conf.rx_packets++; + lp->coal_conf.rx_bytes += pkt_len; + num_rx_pkt++; + dev->last_rx = jiffies; + +err_next_pkt: + lp->rx_ring[rx_index].buff_phy_addr + = cpu_to_le32(lp->rx_dma_addr[rx_index]); + lp->rx_ring[rx_index].buff_count = + cpu_to_le16(lp->rx_buff_len-2); + lp->rx_ring[rx_index].rx_flags |= cpu_to_le16(OWN_BIT); + rx_index = (++lp->rx_idx) & RX_RING_DR_MOD_MASK; + } + /* Check the interrupt status register for more packets in the + mean time. Process them since we have not used up our quota.*/ + + intr0 = readl(mmio + INT0); + /*Ack receive packets */ + writel(intr0 & RINT0,mmio + INT0); + + }while(intr0 & RINT0); + + /* Receive descriptor is empty now */ + dev->quota -= num_rx_pkt; + *budget -= num_rx_pkt; + netif_rx_complete(dev); + /* enable receive interrupt */ + writel(VAL0|RINTEN0, mmio + INTEN0); + writel(VAL2 | RDMD0, mmio + CMD0); + return 0; +rx_not_empty: + /* Do not call a netif_rx_complete */ + dev->quota -= num_rx_pkt; + *budget -= num_rx_pkt; + return 1; + + +} + +#else /* This function will check the ownership of receive buffers and descriptors. It will indicate to kernel up to half the number of maximum receive buffers in the descriptor ring, in a single receive interrupt. It will also replenish the descriptors with new skbs. */ @@ -710,7 +865,7 @@ static int amd8111e_rx(struct net_device int rx_index = lp->rx_idx & RX_RING_DR_MOD_MASK; int min_pkt_len, status; int num_rx_pkt = 0; - int max_rx_pkt = NUM_RX_BUFFERS/2; + int max_rx_pkt = NUM_RX_BUFFERS; short pkt_len; #if AMD8111E_VLAN_TAG_USED short vtag; @@ -752,14 +907,14 @@ static int amd8111e_rx(struct net_device if (pkt_len < min_pkt_len) { lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; - lp->stats.rx_errors++; + lp->drv_rx_errors++; goto err_next_pkt; } if(!(new_skb = dev_alloc_skb(lp->rx_buff_len))){ /* if allocation fail, ignore that pkt and go to next one */ lp->rx_ring[rx_index].rx_flags &= RESET_RX_FLAGS; - lp->stats.rx_errors++; + lp->drv_rx_errors++; goto err_next_pkt; } @@ -803,7 +958,7 @@ err_next_pkt: return 0; } - +#endif /* CONFIG_AMD8111E_NAPI */ /* This function will indicate the link status to the kernel. */ @@ -896,12 +1051,14 @@ static struct net_device_stats *amd8111e new_stats->tx_bytes = amd8111e_read_mib(mmio, xmt_octets); /* stats.rx_errors */ + /* hw errors + errors driver reported */ new_stats->rx_errors = amd8111e_read_mib(mmio, rcv_undersize_pkts)+ amd8111e_read_mib(mmio, rcv_fragments)+ amd8111e_read_mib(mmio, rcv_jabbers)+ amd8111e_read_mib(mmio, rcv_alignment_errors)+ amd8111e_read_mib(mmio, rcv_fcs_errors)+ - amd8111e_read_mib(mmio, rcv_miss_pkts); + amd8111e_read_mib(mmio, rcv_miss_pkts)+ + lp->drv_rx_errors; /* stats.tx_errors */ new_stats->tx_errors = amd8111e_read_mib(mmio, xmt_underrun_pkts); @@ -1119,20 +1276,36 @@ static irqreturn_t amd8111e_interrupt(in /* Process all the INT event until INTR bit is clear. */ - if (!(intr0 & INTR)) { + if (!(intr0 & INTR)){ handled = 0; goto err_no_interrupt; } - /* Current driver processes 3 interrupts : RINT,TINT,LCINT */ + /* Current driver processes 4 interrupts : RINT,TINT,LCINT,STINT */ writel(intr0, mmio + INT0); /* Check if Receive Interrupt has occurred. */ +#if CONFIG_AMD8111E_NAPI + if(intr0 & RINT0){ + if(netif_rx_schedule_prep(dev)){ + /* Disable receive interupts */ + writel(RINTEN0, mmio + INTEN0); + /* Schedule a polling routine */ + __netif_rx_schedule(dev); + } + else { + printk("************Driver bug! \ + interrupt while in poll\n"); + /* Fix by disabling interrupts */ + writel(RINT0, mmio + INT0); + } + } +#else if(intr0 & RINT0){ amd8111e_rx(dev); writel(VAL2 | RDMD0, mmio + CMD0); } - +#endif /* CONFIG_AMD8111E_NAPI */ /* Check if Transmit Interrupt has occurred. */ if(intr0 & TINT0) amd8111e_tx(dev); @@ -1164,6 +1337,7 @@ static void amd8111e_poll(struct net_dev } #endif + /* This function closes the network interface and updates the statistics so that most recent statistics will be available after the interface is down. */ @@ -1186,7 +1360,7 @@ static int amd8111e_close(struct net_dev spin_unlock_irq(&lp->lock); free_irq(dev->irq, dev); - + /* Update the statistics before closing */ amd8111e_get_stats(dev); lp->opened = 0; @@ -1560,6 +1734,23 @@ static int amd8111e_ioctl(struct net_dev } return -EOPNOTSUPP; } +static int amd8111e_set_mac_address(struct net_device *dev, void *p) +{ + struct amd8111e_priv *lp = dev->priv; + int i; + struct sockaddr *addr = p; + + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + spin_lock_irq(&lp->lock); + /* Setting the MAC address to the device */ + for(i = 0; i < ETH_ADDR_LEN; i++) + writeb( dev->dev_addr[i], lp->mmio + PADR + i ); + + spin_unlock_irq(&lp->lock); + + return 0; +} + /* This function changes the mtu of the device. It restarts the device to initialize the descriptor with new receive buffers. */ @@ -1890,11 +2081,16 @@ static int __devinit amd8111e_probe_one( dev->stop = amd8111e_close; dev->get_stats = amd8111e_get_stats; dev->set_multicast_list = amd8111e_set_multicast_list; + dev->set_mac_address = amd8111e_set_mac_address; dev->do_ioctl = amd8111e_ioctl; dev->change_mtu = amd8111e_change_mtu; dev->irq =pdev->irq; dev->tx_timeout = amd8111e_tx_timeout; dev->watchdog_timeo = AMD8111E_TX_TIMEOUT; +#ifdef CONFIG_AMD8111E_NAPI + dev->poll = amd8111e_rx_poll; + dev->weight = 32; +#endif #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = amd8111e_poll; #endif @@ -1908,6 +2104,7 @@ static int __devinit amd8111e_probe_one( /* Set receive buffer length and set jumbo option*/ amd8111e_set_rx_buff_len(dev); + err = register_netdev(dev); if (err) { printk(KERN_ERR "amd8111e: Cannot register net device, " @@ -1954,7 +2151,7 @@ err_disable_pdev: } static struct pci_driver amd8111e_driver = { - .name = MODULE_NAME, + .name = MODULE_NAME, .id_table = amd8111e_pci_tbl, .probe = amd8111e_probe_one, .remove = __devexit_p(amd8111e_remove_one), --- linux-2.6.6-rc1/drivers/net/amd8111e.h 2003-10-17 15:58:03.000000000 -0700 +++ 25/drivers/net/amd8111e.h 2004-04-18 22:25:24.792061512 -0700 @@ -606,7 +606,7 @@ typedef enum { /* ipg parameters */ #define DEFAULT_IPG 0x60 #define IFS1_DELTA 36 -#define IPG_CONVERGE_JIFFIES (HZ / 2) +#define IPG_CONVERGE_JIFFIES (HZ/2) #define IPG_STABLE_TIME 5 #define MIN_IPG 96 #define MAX_IPG 255 @@ -790,6 +790,7 @@ struct amd8111e_priv{ #endif char opened; struct net_device_stats stats; + unsigned int drv_rx_errors; struct dev_mc_list* mc_list; struct amd8111e_coalesce_conf coal_conf; --- linux-2.6.6-rc1/drivers/net/arcnet/com20020-isa.c 2004-02-17 20:48:43.000000000 -0800 +++ 25/drivers/net/arcnet/com20020-isa.c 2004-04-18 22:25:24.793061360 -0700 @@ -185,8 +185,6 @@ static void __exit com20020_exit(void) #ifndef MODULE static int __init com20020isa_setup(char *s) { - struct net_device *dev; - struct arcnet_local *lp; int ints[8]; s = get_options(s, 8, ints); --- linux-2.6.6-rc1/drivers/net/ariadne.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/ariadne.c 2004-04-18 22:25:24.795061056 -0700 @@ -216,7 +216,7 @@ static int __devinit ariadne_init_one(st } zorro_set_drvdata(z, dev); - printk("%s: Ariadne at 0x%08lx, Ethernet Address " + printk(KERN_INFO "%s: Ariadne at 0x%08lx, Ethernet Address " "%02x:%02x:%02x:%02x:%02x:%02x\n", dev->name, board, dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2], dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]); @@ -245,16 +245,16 @@ static int ariadne_open(struct net_devic lance->RAP = CSR89; /* Chip ID */ version |= swapw(lance->RDP)<<16; if ((version & 0x00000fff) != 0x00000003) { - printk("ariadne_open: Couldn't find AMD Ethernet Chip\n"); + printk(KERN_WARNING "ariadne_open: Couldn't find AMD Ethernet Chip\n"); return -EAGAIN; } if ((version & 0x0ffff000) != 0x00003000) { - printk("ariadne_open: Couldn't find Am79C960 (Wrong part number = %ld)\n", - (version & 0x0ffff000)>>12); + printk(KERN_WARNING "ariadne_open: Couldn't find Am79C960 (Wrong part " + "number = %ld)\n", (version & 0x0ffff000)>>12); return -EAGAIN; } #if 0 - printk("ariadne_open: Am79C960 (PCnet-ISA) Revision %ld\n", + printk(KERN_DEBUG "ariadne_open: Am79C960 (PCnet-ISA) Revision %ld\n", (version & 0xf0000000)>>28); #endif @@ -354,8 +354,8 @@ static void ariadne_init_ring(struct net priv->tx_ring[i] = &lancedata->tx_ring[i]; priv->tx_buff[i] = lancedata->tx_buff[i]; #if 0 - printk("TX Entry %2d at %p, Buf at %p\n", i, &lancedata->tx_ring[i], - lancedata->tx_buff[i]); + printk(KERN_DEBUG "TX Entry %2d at %p, Buf at %p\n", i, + &lancedata->tx_ring[i], lancedata->tx_buff[i]); #endif } @@ -370,8 +370,8 @@ static void ariadne_init_ring(struct net priv->rx_ring[i] = &lancedata->rx_ring[i]; priv->rx_buff[i] = lancedata->rx_buff[i]; #if 0 - printk("RX Entry %2d at %p, Buf at %p\n", i, &lancedata->rx_ring[i], - lancedata->rx_buff[i]); + printk(KERN_DEBUG "RX Entry %2d at %p, Buf at %p\n", i, + &lancedata->rx_ring[i], lancedata->rx_buff[i]); #endif } } @@ -389,9 +389,9 @@ static int ariadne_close(struct net_devi lance->RAP = CSR0; /* PCnet-ISA Controller Status */ if (ariadne_debug > 1) { - printk("%s: Shutting down ethercard, status was %2.2x.\n", dev->name, - lance->RDP); - printk("%s: %lu packets missed\n", dev->name, + printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n", + dev->name, lance->RDP); + printk(KERN_DEBUG "%s: %lu packets missed\n", dev->name, priv->stats.rx_missed_errors); } @@ -425,7 +425,7 @@ static irqreturn_t ariadne_interrupt(int int handled = 0; if (dev == NULL) { - printk("ariadne_interrupt(): irq for unknown device.\n"); + printk(KERN_WARNING "ariadne_interrupt(): irq for unknown device.\n"); return IRQ_NONE; } @@ -443,8 +443,8 @@ static irqreturn_t ariadne_interrupt(int #if 0 if (ariadne_debug > 5) { - printk("%s: interrupt csr0=%#2.2x new csr=%#2.2x.", dev->name, - csr0, lance->RDP); + printk(KERN_DEBUG "%s: interrupt csr0=%#2.2x new csr=%#2.2x.", + dev->name, csr0, lance->RDP); printk("["); if (csr0 & INTR) printk(" INTR"); @@ -514,8 +514,8 @@ static irqreturn_t ariadne_interrupt(int /* Ackk! On FIFO errors the Tx unit is turned off! */ priv->stats.tx_fifo_errors++; /* Remove this verbosity later! */ - printk("%s: Tx FIFO error! Status %4.4x.\n", dev->name, - csr0); + printk(KERN_ERR "%s: Tx FIFO error! Status %4.4x.\n", + dev->name, csr0); /* Restart the chip. */ lance->RDP = STRT; } @@ -529,8 +529,8 @@ static irqreturn_t ariadne_interrupt(int #ifndef final_version if (priv->cur_tx - dirty_tx >= TX_RING_SIZE) { - printk("out-of-sync dirty pointer, %d vs. %d, full=%d.\n", - dirty_tx, priv->cur_tx, priv->tx_full); + printk(KERN_ERR "out-of-sync dirty pointer, %d vs. %d, " + "full=%d.\n", dirty_tx, priv->cur_tx, priv->tx_full); dirty_tx += TX_RING_SIZE; } #endif @@ -556,8 +556,8 @@ static irqreturn_t ariadne_interrupt(int } if (csr0 & MERR) { handled = 1; - printk("%s: Bus master arbitration failure, status %4.4x.\n", - dev->name, csr0); + printk(KERN_ERR "%s: Bus master arbitration failure, status " + "%4.4x.\n", dev->name, csr0); /* Restart the chip. */ lance->RDP = STRT; } @@ -569,8 +569,8 @@ static irqreturn_t ariadne_interrupt(int #if 0 if (ariadne_debug > 4) - printk("%s: exiting interrupt, csr%d=%#4.4x.\n", dev->name, lance->RAP, - lance->RDP); + printk(KERN_DEBUG "%s: exiting interrupt, csr%d=%#4.4x.\n", dev->name, + lance->RAP, lance->RDP); #endif return IRQ_RETVAL(handled); } @@ -598,8 +598,8 @@ static int ariadne_start_xmit(struct sk_ #if 0 if (ariadne_debug > 3) { lance->RAP = CSR0; /* PCnet-ISA Controller Status */ - printk("%s: ariadne_start_xmit() called, csr0 %4.4x.\n", dev->name, - lance->RDP); + printk(KERN_DEBUG "%s: ariadne_start_xmit() called, csr0 %4.4x.\n", + dev->name, lance->RDP); lance->RDP = 0x0000; } #endif @@ -616,7 +616,7 @@ static int ariadne_start_xmit(struct sk_ /* Fill in a Tx ring entry */ #if 0 - printk("TX pkt type 0x%04x from ", ((u_short *)skb->data)[6]); + printk(KERN_DEBUG "TX pkt type 0x%04x from ", ((u_short *)skb->data)[6]); { int i; u_char *ptr = &((u_char *)skb->data)[6]; @@ -652,7 +652,7 @@ static int ariadne_start_xmit(struct sk_ len >>= 1; for (i = 0; i < len; i += 8) { int j; - printk("%04x:", i); + printk(KERN_DEBUG "%04x:", i); for (j = 0; (j < 8) && ((i+j) < len); j++) { if (!(j & 1)) printk(" "); @@ -671,8 +671,8 @@ static int ariadne_start_xmit(struct sk_ if ((priv->cur_tx >= TX_RING_SIZE) && (priv->dirty_tx >= TX_RING_SIZE)) { #if 0 - printk("*** Subtracting TX_RING_SIZE from cur_tx (%d) and dirty_tx (%d)\n", - priv->cur_tx, priv->dirty_tx); + printk(KERN_DEBUG "*** Subtracting TX_RING_SIZE from cur_tx (%d) and " + "dirty_tx (%d)\n", priv->cur_tx, priv->dirty_tx); #endif priv->cur_tx -= TX_RING_SIZE; @@ -729,7 +729,8 @@ static int ariadne_rx(struct net_device skb = dev_alloc_skb(pkt_len+2); if (skb == NULL) { - printk("%s: Memory squeeze, deferring packet.\n", dev->name); + printk(KERN_WARNING "%s: Memory squeeze, deferring packet.\n", + dev->name); for (i = 0; i < RX_RING_SIZE; i++) if (lowb(priv->rx_ring[(entry+i) % RX_RING_SIZE]->RMD1) & RF_OWN) break; @@ -749,7 +750,8 @@ static int ariadne_rx(struct net_device eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0); skb->protocol=eth_type_trans(skb,dev); #if 0 - printk("RX pkt type 0x%04x from ", ((u_short *)skb->data)[6]); + printk(KERN_DEBUG "RX pkt type 0x%04x from ", + ((u_short *)skb->data)[6]); { int i; u_char *ptr = &((u_char *)skb->data)[6]; @@ -825,7 +827,7 @@ static void set_multicast_list(struct ne if (dev->flags & IFF_PROMISC) { /* Log any net taps. */ - printk("%s: Promiscuous mode enabled.\n", dev->name); + printk(KERN_INFO "%s: Promiscuous mode enabled.\n", dev->name); lance->RAP = CSR15; /* Mode Register */ lance->RDP = PROM; /* Set promiscuous mode */ } else { --- linux-2.6.6-rc1/drivers/net/arm/etherh.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/arm/etherh.c 2004-04-18 22:25:24.797060752 -0700 @@ -60,11 +60,28 @@ static unsigned int net_debug = NET_DEBU struct etherh_priv { struct ei_device eidev; + void *ioc_fast; + void *memc; unsigned int id; - unsigned int ctrl_port; + void *ctrl_port; unsigned int ctrl; }; +struct etherh_data { + unsigned long ns8390_offset; + unsigned long dataport_offset; + unsigned long ctrlport_offset; + int ctrl_ioc; + const char name[16]; + /* + * netdev flags and port + */ + unsigned short flags; + unsigned char if_port; + unsigned char tx_start_page; + unsigned char stop_page; +}; + MODULE_AUTHOR("Russell King"); MODULE_DESCRIPTION("EtherH/EtherM driver"); MODULE_LICENSE("GPL"); @@ -72,13 +89,13 @@ MODULE_LICENSE("GPL"); static char version[] __initdata = "EtherH/EtherM Driver (c) 2002 Russell King v1.09\n"; -#define ETHERH500_DATAPORT 0x200 /* MEMC */ +#define ETHERH500_DATAPORT 0x800 /* MEMC */ #define ETHERH500_NS8390 0x000 /* MEMC */ -#define ETHERH500_CTRLPORT 0x200 /* IOC */ +#define ETHERH500_CTRLPORT 0x800 /* IOC */ -#define ETHERH600_DATAPORT 16 /* MEMC */ -#define ETHERH600_NS8390 0x200 /* MEMC */ -#define ETHERH600_CTRLPORT 0x080 /* MEMC */ +#define ETHERH600_DATAPORT 0x040 /* MEMC */ +#define ETHERH600_NS8390 0x800 /* MEMC */ +#define ETHERH600_CTRLPORT 0x200 /* MEMC */ #define ETHERH_CP_IE 1 #define ETHERH_CP_IF 2 @@ -90,9 +107,9 @@ static char version[] __initdata = /* * These came from CK/TEW */ -#define ETHERM_DATAPORT 0x080 /* MEMC */ -#define ETHERM_NS8390 0x200 /* MEMC */ -#define ETHERM_CTRLPORT 0x08f /* MEMC */ +#define ETHERM_DATAPORT 0x200 /* MEMC */ +#define ETHERM_NS8390 0x800 /* MEMC */ +#define ETHERM_CTRLPORT 0x23c /* MEMC */ #define ETHERM_TX_START_PAGE 64 #define ETHERM_STOP_PAGE 127 @@ -102,18 +119,18 @@ static char version[] __initdata = static inline void etherh_set_ctrl(struct etherh_priv *eh, unsigned int mask) { eh->ctrl |= mask; - outb(eh->ctrl, eh->ctrl_port); + writeb(eh->ctrl, eh->ctrl_port); } static inline void etherh_clr_ctrl(struct etherh_priv *eh, unsigned int mask) { eh->ctrl &= ~mask; - outb(eh->ctrl, eh->ctrl_port); + writeb(eh->ctrl, eh->ctrl_port); } static inline unsigned int etherh_get_stat(struct etherh_priv *eh) { - return inb(eh->ctrl_port); + return readb(eh->ctrl_port); } @@ -158,10 +175,10 @@ etherh_setif(struct net_device *dev) switch (dev->if_port) { case IF_PORT_10BASE2: - outb((inb(addr) & 0xf8) | 1, addr); + writeb((readb(addr) & 0xf8) | 1, addr); break; case IF_PORT_10BASET: - outb((inb(addr) & 0xf8), addr); + writeb((readb(addr) & 0xf8), addr); break; } break; @@ -200,7 +217,7 @@ etherh_getifstat(struct net_device *dev) stat = 1; break; case IF_PORT_10BASET: - stat = inb(dev->base_addr+EN0_RCNTHI) & 4; + stat = readb(dev->base_addr+EN0_RCNTHI) & 4; break; } break; @@ -258,7 +275,7 @@ etherh_reset(struct net_device *dev) { struct ei_device *ei_local = netdev_priv(dev); - outb_p(E8390_NODMA+E8390_PAGE0+E8390_STOP, dev->base_addr); + writeb(E8390_NODMA+E8390_PAGE0+E8390_STOP, dev->base_addr); /* * See if we need to change the interface type. @@ -306,31 +323,31 @@ etherh_block_output (struct net_device * dma_addr = dev->mem_start; count = (count + 1) & ~1; - outb (E8390_NODMA | E8390_PAGE0 | E8390_START, addr + E8390_CMD); + writeb (E8390_NODMA | E8390_PAGE0 | E8390_START, addr + E8390_CMD); - outb (0x42, addr + EN0_RCNTLO); - outb (0x00, addr + EN0_RCNTHI); - outb (0x42, addr + EN0_RSARLO); - outb (0x00, addr + EN0_RSARHI); - outb (E8390_RREAD | E8390_START, addr + E8390_CMD); + writeb (0x42, addr + EN0_RCNTLO); + writeb (0x00, addr + EN0_RCNTHI); + writeb (0x42, addr + EN0_RSARLO); + writeb (0x00, addr + EN0_RSARHI); + writeb (E8390_RREAD | E8390_START, addr + E8390_CMD); udelay (1); - outb (ENISR_RDC, addr + EN0_ISR); - outb (count, addr + EN0_RCNTLO); - outb (count >> 8, addr + EN0_RCNTHI); - outb (0, addr + EN0_RSARLO); - outb (start_page, addr + EN0_RSARHI); - outb (E8390_RWRITE | E8390_START, addr + E8390_CMD); + writeb (ENISR_RDC, addr + EN0_ISR); + writeb (count, addr + EN0_RCNTLO); + writeb (count >> 8, addr + EN0_RCNTHI); + writeb (0, addr + EN0_RSARLO); + writeb (start_page, addr + EN0_RSARHI); + writeb (E8390_RWRITE | E8390_START, addr + E8390_CMD); if (ei_local->word16) - outsw (dma_addr, buf, count >> 1); + writesw (dma_addr, buf, count >> 1); else - outsb (dma_addr, buf, count); + writesb (dma_addr, buf, count); dma_start = jiffies; - while ((inb (addr + EN0_ISR) & ENISR_RDC) == 0) + while ((readb (addr + EN0_ISR) & ENISR_RDC) == 0) if (jiffies - dma_start > 2*HZ/100) { /* 20ms */ printk(KERN_ERR "%s: timeout waiting for TX RDC\n", dev->name); @@ -339,7 +356,7 @@ etherh_block_output (struct net_device * break; } - outb (ENISR_RDC, addr + EN0_ISR); + writeb (ENISR_RDC, addr + EN0_ISR); ei_local->dmaing = 0; } @@ -366,21 +383,21 @@ etherh_block_input (struct net_device *d dma_addr = dev->mem_start; buf = skb->data; - outb (E8390_NODMA | E8390_PAGE0 | E8390_START, addr + E8390_CMD); - outb (count, addr + EN0_RCNTLO); - outb (count >> 8, addr + EN0_RCNTHI); - outb (ring_offset, addr + EN0_RSARLO); - outb (ring_offset >> 8, addr + EN0_RSARHI); - outb (E8390_RREAD | E8390_START, addr + E8390_CMD); + writeb (E8390_NODMA | E8390_PAGE0 | E8390_START, addr + E8390_CMD); + writeb (count, addr + EN0_RCNTLO); + writeb (count >> 8, addr + EN0_RCNTHI); + writeb (ring_offset, addr + EN0_RSARLO); + writeb (ring_offset >> 8, addr + EN0_RSARHI); + writeb (E8390_RREAD | E8390_START, addr + E8390_CMD); if (ei_local->word16) { - insw (dma_addr, buf, count >> 1); + readsw (dma_addr, buf, count >> 1); if (count & 1) - buf[count - 1] = inb (dma_addr); + buf[count - 1] = readb (dma_addr); } else - insb (dma_addr, buf, count); + readsb (dma_addr, buf, count); - outb (ENISR_RDC, addr + EN0_ISR); + writeb (ENISR_RDC, addr + EN0_ISR); ei_local->dmaing = 0; } @@ -405,19 +422,19 @@ etherh_get_header (struct net_device *de addr = dev->base_addr; dma_addr = dev->mem_start; - outb (E8390_NODMA | E8390_PAGE0 | E8390_START, addr + E8390_CMD); - outb (sizeof (*hdr), addr + EN0_RCNTLO); - outb (0, addr + EN0_RCNTHI); - outb (0, addr + EN0_RSARLO); - outb (ring_page, addr + EN0_RSARHI); - outb (E8390_RREAD | E8390_START, addr + E8390_CMD); + writeb (E8390_NODMA | E8390_PAGE0 | E8390_START, addr + E8390_CMD); + writeb (sizeof (*hdr), addr + EN0_RCNTLO); + writeb (0, addr + EN0_RCNTHI); + writeb (0, addr + EN0_RSARLO); + writeb (ring_page, addr + EN0_RSARHI); + writeb (E8390_RREAD | E8390_START, addr + E8390_CMD); if (ei_local->word16) - insw (dma_addr, hdr, sizeof (*hdr) >> 1); + readsw (dma_addr, hdr, sizeof (*hdr) >> 1); else - insb (dma_addr, hdr, sizeof (*hdr)); + readsb (dma_addr, hdr, sizeof (*hdr)); - outb (ENISR_RDC, addr + EN0_ISR); + writeb (ENISR_RDC, addr + EN0_ISR); ei_local->dmaing = 0; } @@ -543,18 +560,22 @@ static u32 etherm_regoffsets[16]; static int __init etherh_probe(struct expansion_card *ec, const struct ecard_id *id) { + const struct etherh_data *data = id->data; struct ei_device *ei_local; struct net_device *dev; struct etherh_priv *eh; - const char *dev_type; - int i, size, ret; + int i, ret; etherh_banner(); + ret = ecard_request_resources(ec); + if (ret) + goto out; + dev = alloc_ei_netdev(); if (!dev) { ret = -ENOMEM; - goto out; + goto release; } eh = netdev_priv(dev); @@ -562,111 +583,64 @@ etherh_probe(struct expansion_card *ec, spin_lock_init(&eh->eidev.page_lock); SET_MODULE_OWNER(dev); + SET_NETDEV_DEV(dev, &ec->dev); dev->open = etherh_open; dev->stop = etherh_close; dev->set_config = etherh_set_config; dev->irq = ec->irq; - dev->base_addr = ecard_address(ec, ECARD_MEMC, 0); + dev->if_port = data->if_port; + dev->flags |= data->flags; - /* - * IRQ and control port handling - */ - if (ec->irq != 11) { - ec->ops = ðerh_ops; - ec->irq_data = eh; - } eh->ctrl = 0; eh->id = ec->cid.product; - - switch (ec->cid.product) { - case PROD_ANT_ETHERM: - etherm_addr(dev->dev_addr); - dev->base_addr += ETHERM_NS8390; - dev->mem_start = dev->base_addr + ETHERM_DATAPORT; - eh->ctrl_port = dev->base_addr + ETHERM_CTRLPORT; - break; - - case PROD_I3_ETHERLAN500: - etherh_addr(dev->dev_addr, ec); - dev->base_addr += ETHERH500_NS8390; - dev->mem_start = dev->base_addr + ETHERH500_DATAPORT; - eh->ctrl_port = ecard_address (ec, ECARD_IOC, ECARD_FAST) - + ETHERH500_CTRLPORT; - break; - - case PROD_I3_ETHERLAN600: - case PROD_I3_ETHERLAN600A: - etherh_addr(dev->dev_addr, ec); - dev->base_addr += ETHERH600_NS8390; - dev->mem_start = dev->base_addr + ETHERH600_DATAPORT; - eh->ctrl_port = dev->base_addr + ETHERH600_CTRLPORT; - break; - - default: - printk(KERN_ERR "%s: unknown card type %x\n", - dev->name, ec->cid.product); - ret = -ENODEV; + eh->memc = ioremap(ecard_resource_start(ec, ECARD_RES_MEMC), PAGE_SIZE); + if (!eh->memc) { + ret = -ENOMEM; goto free; } - size = 16; - if (ec->cid.product == PROD_ANT_ETHERM) - size <<= 3; - - if (!request_region(dev->base_addr, size, dev->name)) { - ret = -EBUSY; - goto free; + eh->ctrl_port = eh->memc; + if (data->ctrl_ioc) { + eh->ioc_fast = ioremap(ecard_resource_start(ec, ECARD_RES_IOCFAST), PAGE_SIZE); + if (!eh->ioc_fast) { + ret = -ENOMEM; + goto free; + } + eh->ctrl_port = eh->ioc_fast; } + dev->base_addr = (unsigned long)eh->memc + data->ns8390_offset; + dev->mem_start = (unsigned long)eh->memc + data->dataport_offset; + eh->ctrl_port += data->ctrlport_offset; + /* - * If we're in the NIC slot, make sure the IRQ is enabled + * IRQ and control port handling - only for non-NIC slot cards. */ - if (dev->irq == 11) + if (ec->slot_no != 8) { + ec->ops = ðerh_ops; + ec->irq_data = eh; + } else { + /* + * If we're in the NIC slot, make sure the IRQ is enabled + */ etherh_set_ctrl(eh, ETHERH_CP_IE); - - switch (ec->cid.product) { - case PROD_ANT_ETHERM: - dev_type = "ANT EtherM"; - dev->if_port = IF_PORT_UNKNOWN; - break; - - case PROD_I3_ETHERLAN500: - dev_type = "i3 EtherH 500"; - dev->if_port = IF_PORT_UNKNOWN; - break; - - case PROD_I3_ETHERLAN600: - dev_type = "i3 EtherH 600"; - dev->flags |= IFF_PORTSEL | IFF_AUTOMEDIA; - dev->if_port = IF_PORT_10BASET; - break; - - case PROD_I3_ETHERLAN600A: - dev_type = "i3 EtherH 600A"; - dev->flags |= IFF_PORTSEL | IFF_AUTOMEDIA; - dev->if_port = IF_PORT_10BASET; - break; - - default: - dev_type = "unknown"; - break; } - ei_local = netdev_priv(dev); + ei_local = &eh->eidev; if (ec->cid.product == PROD_ANT_ETHERM) { - ei_local->tx_start_page = ETHERM_TX_START_PAGE; - ei_local->stop_page = ETHERM_STOP_PAGE; - ei_local->reg_offset = etherm_regoffsets; + etherm_addr(dev->dev_addr); + ei_local->reg_offset = etherm_regoffsets; } else { - ei_local->tx_start_page = ETHERH_TX_START_PAGE; - ei_local->stop_page = ETHERH_STOP_PAGE; - ei_local->reg_offset = etherh_regoffsets; + etherh_addr(dev->dev_addr, ec); + ei_local->reg_offset = etherh_regoffsets; } ei_local->name = dev->name; ei_local->word16 = 1; + ei_local->tx_start_page = data->tx_start_page; ei_local->rx_start_page = ei_local->tx_start_page + TX_PAGES; + ei_local->stop_page = data->stop_page; ei_local->reset_8390 = etherh_reset; ei_local->block_input = etherh_block_input; ei_local->block_output = etherh_block_output; @@ -678,10 +652,10 @@ etherh_probe(struct expansion_card *ec, ret = register_netdev(dev); if (ret) - goto release; + goto free; printk(KERN_INFO "%s: %s in slot %d, ", - dev->name, dev_type, ec->slot_no); + dev->name, data->name, ec->slot_no); for (i = 0; i < 6; i++) printk("%2.2x%c", dev->dev_addr[i], i == 5 ? '\n' : ':'); @@ -690,10 +664,14 @@ etherh_probe(struct expansion_card *ec, return 0; - release: - release_region(dev->base_addr, 16); free: + if (eh->ioc_fast) + iounmap(eh->ioc_fast); + if (eh->memc) + iounmap(eh->memc); free_netdev(dev); + release: + ecard_release_resources(ec); out: return ret; } @@ -701,25 +679,69 @@ etherh_probe(struct expansion_card *ec, static void __devexit etherh_remove(struct expansion_card *ec) { struct net_device *dev = ecard_get_drvdata(ec); - int size = 16; + struct etherh_priv *eh = netdev_priv(dev); ecard_set_drvdata(ec, NULL); unregister_netdev(dev); - if (ec->cid.product == PROD_ANT_ETHERM) - size <<= 3; - release_region(dev->base_addr, size); + if (eh->ioc_fast) + iounmap(eh->ioc_fast); + iounmap(eh->memc); free_netdev(dev); ec->ops = NULL; kfree(ec->irq_data); + ecard_release_resources(ec); } +static struct etherh_data etherm_data = { + .ns8390_offset = ETHERM_NS8390, + .dataport_offset = ETHERM_NS8390 + ETHERM_DATAPORT, + .ctrlport_offset = ETHERM_NS8390 + ETHERM_CTRLPORT, + .name = "ANT EtherM", + .if_port = IF_PORT_UNKNOWN, + .tx_start_page = ETHERM_TX_START_PAGE, + .stop_page = ETHERM_STOP_PAGE, +}; + +static struct etherh_data etherlan500_data = { + .ns8390_offset = ETHERH500_NS8390, + .dataport_offset = ETHERH500_NS8390 + ETHERH500_DATAPORT, + .ctrlport_offset = ETHERH500_CTRLPORT, + .ctrl_ioc = 1, + .name = "i3 EtherH 500", + .if_port = IF_PORT_UNKNOWN, + .tx_start_page = ETHERH_TX_START_PAGE, + .stop_page = ETHERH_STOP_PAGE, +}; + +static struct etherh_data etherlan600_data = { + .ns8390_offset = ETHERH600_NS8390, + .dataport_offset = ETHERH600_NS8390 + ETHERH600_DATAPORT, + .ctrlport_offset = ETHERH600_NS8390 + ETHERH600_CTRLPORT, + .name = "i3 EtherH 600", + .flags = IFF_PORTSEL | IFF_AUTOMEDIA, + .if_port = IF_PORT_10BASET, + .tx_start_page = ETHERH_TX_START_PAGE, + .stop_page = ETHERH_STOP_PAGE, +}; + +static struct etherh_data etherlan600a_data = { + .ns8390_offset = ETHERH600_NS8390, + .dataport_offset = ETHERH600_NS8390 + ETHERH600_DATAPORT, + .ctrlport_offset = ETHERH600_NS8390 + ETHERH600_CTRLPORT, + .name = "i3 EtherH 600A", + .flags = IFF_PORTSEL | IFF_AUTOMEDIA, + .if_port = IF_PORT_10BASET, + .tx_start_page = ETHERH_TX_START_PAGE, + .stop_page = ETHERH_STOP_PAGE, +}; + static const struct ecard_id etherh_ids[] = { - { MANU_ANT, PROD_ANT_ETHERM }, - { MANU_I3, PROD_I3_ETHERLAN500 }, - { MANU_I3, PROD_I3_ETHERLAN600 }, - { MANU_I3, PROD_I3_ETHERLAN600A }, + { MANU_ANT, PROD_ANT_ETHERM, ðerm_data }, + { MANU_I3, PROD_I3_ETHERLAN500, ðerlan500_data }, + { MANU_I3, PROD_I3_ETHERLAN600, ðerlan600_data }, + { MANU_I3, PROD_I3_ETHERLAN600A, ðerlan600a_data }, { 0xffff, 0xffff } }; @@ -737,8 +759,8 @@ static int __init etherh_init(void) int i; for (i = 0; i < 16; i++) { - etherh_regoffsets[i] = i; - etherm_regoffsets[i] = i << 3; + etherh_regoffsets[i] = i << 2; + etherm_regoffsets[i] = i << 5; } return ecard_register_driver(ðerh_driver); --- linux-2.6.6-rc1/drivers/net/b44.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/b44.c 2004-04-18 22:25:24.799060448 -0700 @@ -1382,7 +1382,7 @@ static void b44_set_rx_mode(struct net_d spin_unlock_irq(&bp->lock); } -static int b44_ethtool_ioctl (struct net_device *dev, void *useraddr) +static int b44_ethtool_ioctl (struct net_device *dev, void __user *useraddr) { struct b44 *bp = dev->priv; struct pci_dev *pci_dev = bp->pdev; @@ -1625,13 +1625,13 @@ static int b44_ethtool_ioctl (struct net static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data; + struct mii_ioctl_data __user *data = (struct mii_ioctl_data __user *)&ifr->ifr_data; struct b44 *bp = dev->priv; int err; switch (cmd) { case SIOCETHTOOL: - return b44_ethtool_ioctl(dev, (void *) ifr->ifr_data); + return b44_ethtool_ioctl(dev, (void __user*) ifr->ifr_data); case SIOCGMIIPHY: data->phy_id = bp->phy_addr; --- linux-2.6.6-rc1/drivers/net/dummy.c 2004-02-17 20:48:43.000000000 -0800 +++ 25/drivers/net/dummy.c 2004-04-18 22:25:24.799060448 -0700 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,17 @@ static int numdummies = 1; static int dummy_xmit(struct sk_buff *skb, struct net_device *dev); static struct net_device_stats *dummy_get_stats(struct net_device *dev); +static int dummy_set_address(struct net_device *dev, void *p) +{ + struct sockaddr *sa = p; + + if (!is_valid_ether_addr(sa->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN); + return 0; +} + /* fake multicast ability */ static void set_multicast_list(struct net_device *dev) { @@ -58,6 +70,7 @@ static void __init dummy_setup(struct ne dev->get_stats = dummy_get_stats; dev->hard_start_xmit = dummy_xmit; dev->set_multicast_list = set_multicast_list; + dev->set_mac_address = dummy_set_address; #ifdef CONFIG_NET_FASTROUTE dev->accept_fastpath = dummy_accept_fastpath; #endif @@ -68,6 +81,7 @@ static void __init dummy_setup(struct ne dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; SET_MODULE_OWNER(dev); + random_ether_addr(dev->dev_addr); } static int dummy_xmit(struct sk_buff *skb, struct net_device *dev) @@ -90,6 +104,7 @@ static struct net_device **dummies; /* Number of dummy devices to be set up by this module. */ module_param(numdummies, int, 0); +MODULE_PARM_DESC(numdimmies, "Number of dummy psuedo devices"); static int __init dummy_init_one(int index) { --- linux-2.6.6-rc1/drivers/net/e1000/e1000.h 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/e1000/e1000.h 2004-04-18 22:25:24.800060296 -0700 @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -74,8 +75,6 @@ #define BAR_0 0 #define BAR_1 1 #define BAR_5 5 -#define PCI_DMA_64BIT 0xffffffffffffffffULL -#define PCI_DMA_32BIT 0x00000000ffffffffULL struct e1000_adapter; --- linux-2.6.6-rc1/drivers/net/e1000/e1000_main.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/e1000/e1000_main.c 2004-04-18 22:25:24.801060144 -0700 @@ -390,10 +390,10 @@ e1000_probe(struct pci_dev *pdev, if((err = pci_enable_device(pdev))) return err; - if(!(err = pci_set_dma_mask(pdev, PCI_DMA_64BIT))) { + if(!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) { pci_using_dac = 1; } else { - if((err = pci_set_dma_mask(pdev, PCI_DMA_32BIT))) { + if((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) { E1000_ERR("No usable DMA configuration, aborting\n"); return err; } --- linux-2.6.6-rc1/drivers/net/epic100.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/epic100.c 2004-04-18 22:25:29.935279624 -0700 @@ -96,9 +96,9 @@ static int rx_copybreak; Making the Tx ring too large decreases the effectiveness of channel bonding and packet priority. There are no ill effects from too-large receive rings. */ -#define TX_RING_SIZE 16 -#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */ -#define RX_RING_SIZE 32 +#define TX_RING_SIZE 256 +#define TX_QUEUE_LEN 240 /* Limit ring entries actually used. */ +#define RX_RING_SIZE 256 #define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct epic_tx_desc) #define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct epic_rx_desc) @@ -292,6 +292,12 @@ enum CommandBits { StopTxDMA=0x20, StopRxDMA=0x40, RestartTx=0x80, }; +#define EpicRemoved 0xffffffff /* Chip failed or removed (CardBus) */ + +#define EpicNapiEvent (TxEmpty | TxDone | \ + RxDone | RxStarted | RxEarlyWarn | RxOverflow | RxFull) +#define EpicNormalEvent (0x0000ffff & ~EpicNapiEvent) + static u16 media2miictl[16] = { 0, 0x0C00, 0x0C00, 0x2000, 0x0100, 0x2100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; @@ -330,9 +336,12 @@ struct epic_private { /* Ring pointers. */ spinlock_t lock; /* Group with Tx control cache line. */ + spinlock_t napi_lock; + unsigned int reschedule_in_poll; unsigned int cur_tx, dirty_tx; unsigned int cur_rx, dirty_rx; + u32 irq_mask; unsigned int rx_buf_sz; /* Based on MTU+slack. */ struct pci_dev *pci_dev; /* PCI bus location. */ @@ -359,7 +368,8 @@ static void epic_timer(unsigned long dat static void epic_tx_timeout(struct net_device *dev); static void epic_init_ring(struct net_device *dev); static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev); -static int epic_rx(struct net_device *dev); +static int epic_rx(struct net_device *dev, int budget); +static int epic_poll(struct net_device *dev, int *budget); static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs); static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); static struct ethtool_ops netdev_ethtool_ops; @@ -378,7 +388,7 @@ static int __devinit epic_init_one (stru int irq; struct net_device *dev; struct epic_private *ep; - int i, option = 0, duplex = 0; + int i, ret, option = 0, duplex = 0; void *ring_space; dma_addr_t ring_dma; @@ -392,29 +402,33 @@ static int __devinit epic_init_one (stru card_idx++; - i = pci_enable_device(pdev); - if (i) - return i; + ret = pci_enable_device(pdev); + if (ret) + goto out; irq = pdev->irq; if (pci_resource_len(pdev, 0) < pci_id_tbl[chip_idx].io_size) { printk (KERN_ERR "card %d: no PCI region space\n", card_idx); - return -ENODEV; + ret = -ENODEV; + goto err_out_disable; } pci_set_master(pdev); + ret = pci_request_regions(pdev, DRV_NAME); + if (ret < 0) + goto err_out_disable; + + ret = -ENOMEM; + dev = alloc_etherdev(sizeof (*ep)); if (!dev) { printk (KERN_ERR "card %d: no memory for eth device\n", card_idx); - return -ENOMEM; + goto err_out_free_res; } SET_MODULE_OWNER(dev); SET_NETDEV_DEV(dev, &pdev->dev); - if (pci_request_regions(pdev, DRV_NAME)) - goto err_out_free_netdev; - #ifdef USE_IO_OPS ioaddr = pci_resource_start (pdev, 0); #else @@ -422,7 +436,7 @@ static int __devinit epic_init_one (stru ioaddr = (long) ioremap (ioaddr, pci_resource_len (pdev, 1)); if (!ioaddr) { printk (KERN_ERR DRV_NAME " %d: ioremap failed\n", card_idx); - goto err_out_free_res; + goto err_out_free_netdev; } #endif @@ -459,7 +473,9 @@ static int __devinit epic_init_one (stru dev->base_addr = ioaddr; dev->irq = irq; - spin_lock_init (&ep->lock); + spin_lock_init(&ep->lock); + spin_lock_init(&ep->napi_lock); + ep->reschedule_in_poll = 0; /* Bring the chip out of low-power mode. */ outl(0x4200, ioaddr + GENCTL); @@ -489,6 +505,9 @@ static int __devinit epic_init_one (stru ep->pci_dev = pdev; ep->chip_id = chip_idx; ep->chip_flags = pci_id_tbl[chip_idx].drv_flags; + ep->irq_mask = + (ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170) + | CntFull | TxUnderrun | EpicNapiEvent; /* Find the connected MII xcvrs. Doing this in open() would allow detecting external xcvrs later, but @@ -543,10 +562,12 @@ static int __devinit epic_init_one (stru dev->ethtool_ops = &netdev_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; dev->tx_timeout = &epic_tx_timeout; + dev->poll = epic_poll; + dev->weight = 64; - i = register_netdev(dev); - if (i) - goto err_out_unmap_tx; + ret = register_netdev(dev); + if (ret < 0) + goto err_out_unmap_rx; printk(KERN_INFO "%s: %s at %#lx, IRQ %d, ", dev->name, pci_id_tbl[chip_idx].name, ioaddr, dev->irq); @@ -554,19 +575,24 @@ static int __devinit epic_init_one (stru printk("%2.2x:", dev->dev_addr[i]); printk("%2.2x.\n", dev->dev_addr[i]); - return 0; +out: + return ret; +err_out_unmap_rx: + pci_free_consistent(pdev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma); err_out_unmap_tx: pci_free_consistent(pdev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma); err_out_iounmap: #ifndef USE_IO_OPS iounmap(ioaddr); -err_out_free_res: -#endif - pci_release_regions(pdev); err_out_free_netdev: +#endif free_netdev(dev); - return -ENODEV; +err_out_free_res: + pci_release_regions(pdev); +err_out_disable: + pci_disable_device(pdev); + goto out; } /* Serial EEPROM section. */ @@ -592,6 +618,36 @@ err_out_free_netdev: #define EE_READ256_CMD (6 << 8) #define EE_ERASE_CMD (7 << 6) +static void epic_disable_int(struct net_device *dev, struct epic_private *ep) +{ + long ioaddr = dev->base_addr; + + outl(0x00000000, ioaddr + INTMASK); +} + +static inline void __epic_pci_commit(long ioaddr) +{ +#ifndef USE_IO_OPS + inl(ioaddr + INTMASK); +#endif +} + +static void epic_napi_irq_off(struct net_device *dev, struct epic_private *ep) +{ + long ioaddr = dev->base_addr; + + outl(ep->irq_mask & ~EpicNapiEvent, ioaddr + INTMASK); + __epic_pci_commit(ioaddr); +} + +static void epic_napi_irq_on(struct net_device *dev, struct epic_private *ep) +{ + long ioaddr = dev->base_addr; + + /* No need to commit possible posted write */ + outl(ep->irq_mask | EpicNapiEvent, ioaddr + INTMASK); +} + static int __devinit read_eeprom(long ioaddr, int location) { int i; @@ -752,9 +808,8 @@ static int epic_open(struct net_device * /* Enable interrupts by setting the interrupt mask. */ outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170) - | CntFull | TxUnderrun | TxDone | TxEmpty - | RxError | RxOverflow | RxFull | RxHeader | RxDone, - ioaddr + INTMASK); + | CntFull | TxUnderrun + | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK); if (debug > 1) printk(KERN_DEBUG "%s: epic_open() ioaddr %lx IRQ %d status %4.4x " @@ -795,7 +850,7 @@ static void epic_pause(struct net_device } /* Remove the packets on the Rx queue. */ - epic_rx(dev); + epic_rx(dev, RX_RING_SIZE); } static void epic_restart(struct net_device *dev) @@ -841,9 +896,9 @@ static void epic_restart(struct net_devi /* Enable interrupts by setting the interrupt mask. */ outl((ep->chip_flags & TYPE2_INTR ? PCIBusErr175 : PCIBusErr170) - | CntFull | TxUnderrun | TxDone | TxEmpty - | RxError | RxOverflow | RxFull | RxHeader | RxDone, - ioaddr + INTMASK); + | CntFull | TxUnderrun + | RxError | RxHeader | EpicNapiEvent, ioaddr + INTMASK); + printk(KERN_DEBUG "%s: epic_restart() done, cmd status %4.4x, ctl %4.4x" " interrupt %4.4x.\n", dev->name, (int)inl(ioaddr + COMMAND), (int)inl(ioaddr + GENCTL), @@ -929,7 +984,6 @@ static void epic_init_ring(struct net_de int i; ep->tx_full = 0; - ep->lock = (spinlock_t) SPIN_LOCK_UNLOCKED; ep->dirty_tx = ep->cur_tx = 0; ep->cur_rx = ep->dirty_rx = 0; ep->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32); @@ -1029,6 +1083,76 @@ static int epic_start_xmit(struct sk_buf return 0; } +static void epic_tx_error(struct net_device *dev, struct epic_private *ep, + int status) +{ + struct net_device_stats *stats = &ep->stats; + +#ifndef final_version + /* There was an major error, log it. */ + if (debug > 1) + printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n", + dev->name, status); +#endif + stats->tx_errors++; + if (status & 0x1050) + stats->tx_aborted_errors++; + if (status & 0x0008) + stats->tx_carrier_errors++; + if (status & 0x0040) + stats->tx_window_errors++; + if (status & 0x0010) + stats->tx_fifo_errors++; +} + +static void epic_tx(struct net_device *dev, struct epic_private *ep) +{ + unsigned int dirty_tx, cur_tx; + + /* + * Note: if this lock becomes a problem we can narrow the locked + * region at the cost of occasionally grabbing the lock more times. + */ + cur_tx = ep->cur_tx; + for (dirty_tx = ep->dirty_tx; cur_tx - dirty_tx > 0; dirty_tx++) { + struct sk_buff *skb; + int entry = dirty_tx % TX_RING_SIZE; + int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus); + + if (txstatus & DescOwn) + break; /* It still hasn't been Txed */ + + if (likely(txstatus & 0x0001)) { + ep->stats.collisions += (txstatus >> 8) & 15; + ep->stats.tx_packets++; + ep->stats.tx_bytes += ep->tx_skbuff[entry]->len; + } else + epic_tx_error(dev, ep, txstatus); + + /* Free the original skb. */ + skb = ep->tx_skbuff[entry]; + pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, + skb->len, PCI_DMA_TODEVICE); + dev_kfree_skb_irq(skb); + ep->tx_skbuff[entry] = 0; + } + +#ifndef final_version + if (cur_tx - dirty_tx > TX_RING_SIZE) { + printk(KERN_WARNING + "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n", + dev->name, dirty_tx, cur_tx, ep->tx_full); + dirty_tx += TX_RING_SIZE; + } +#endif + ep->dirty_tx = dirty_tx; + if (ep->tx_full && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) { + /* The ring is no longer full, allow new TX entries. */ + ep->tx_full = 0; + netif_wake_queue(dev); + } +} + /* The interrupt handler does all of the Rx thread work and cleans up after the Tx thread. */ static irqreturn_t epic_interrupt(int irq, void *dev_instance, struct pt_regs *regs) @@ -1042,7 +1166,7 @@ static irqreturn_t epic_interrupt(int ir do { status = inl(ioaddr + INTSTAT); /* Acknowledge all of the current interrupt sources ASAP. */ - outl(status & 0x00007fff, ioaddr + INTSTAT); + outl(status & EpicNormalEvent, ioaddr + INTSTAT); if (debug > 4) printk(KERN_DEBUG "%s: Interrupt, status=%#8.8x new " @@ -1053,74 +1177,21 @@ static irqreturn_t epic_interrupt(int ir break; handled = 1; - if (status & (RxDone | RxStarted | RxEarlyWarn | RxOverflow)) - epic_rx(dev); - - if (status & (TxEmpty | TxDone)) { - unsigned int dirty_tx, cur_tx; - - /* Note: if this lock becomes a problem we can narrow the locked - region at the cost of occasionally grabbing the lock more - times. */ - spin_lock(&ep->lock); - cur_tx = ep->cur_tx; - dirty_tx = ep->dirty_tx; - for (; cur_tx - dirty_tx > 0; dirty_tx++) { - struct sk_buff *skb; - int entry = dirty_tx % TX_RING_SIZE; - int txstatus = le32_to_cpu(ep->tx_ring[entry].txstatus); - - if (txstatus & DescOwn) - break; /* It still hasn't been Txed */ - - if ( ! (txstatus & 0x0001)) { - /* There was an major error, log it. */ -#ifndef final_version - if (debug > 1) - printk(KERN_DEBUG "%s: Transmit error, Tx status %8.8x.\n", - dev->name, txstatus); -#endif - ep->stats.tx_errors++; - if (txstatus & 0x1050) ep->stats.tx_aborted_errors++; - if (txstatus & 0x0008) ep->stats.tx_carrier_errors++; - if (txstatus & 0x0040) ep->stats.tx_window_errors++; - if (txstatus & 0x0010) ep->stats.tx_fifo_errors++; - } else { - ep->stats.collisions += (txstatus >> 8) & 15; - ep->stats.tx_packets++; - ep->stats.tx_bytes += ep->tx_skbuff[entry]->len; - } - - /* Free the original skb. */ - skb = ep->tx_skbuff[entry]; - pci_unmap_single(ep->pci_dev, ep->tx_ring[entry].bufaddr, - skb->len, PCI_DMA_TODEVICE); - dev_kfree_skb_irq(skb); - ep->tx_skbuff[entry] = 0; - } - -#ifndef final_version - if (cur_tx - dirty_tx > TX_RING_SIZE) { - printk(KERN_WARNING "%s: Out-of-sync dirty pointer, %d vs. %d, full=%d.\n", - dev->name, dirty_tx, cur_tx, ep->tx_full); - dirty_tx += TX_RING_SIZE; - } -#endif - ep->dirty_tx = dirty_tx; - if (ep->tx_full - && cur_tx - dirty_tx < TX_QUEUE_LEN - 4) { - /* The ring is no longer full, allow new TX entries. */ - ep->tx_full = 0; - spin_unlock(&ep->lock); - netif_wake_queue(dev); + if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) { + spin_lock(&ep->napi_lock); + if (netif_rx_schedule_prep(dev)) { + epic_napi_irq_off(dev, ep); + __netif_rx_schedule(dev); } else - spin_unlock(&ep->lock); + ep->reschedule_in_poll++; + spin_unlock(&ep->napi_lock); } + status &= ~EpicNapiEvent; /* Check uncommon events all at once. */ - if (status & (CntFull | TxUnderrun | RxOverflow | RxFull | - PCIBusErr170 | PCIBusErr175)) { - if (status == 0xffffffff) /* Chip failed or removed (CardBus). */ + if (status & + (CntFull | TxUnderrun | PCIBusErr170 | PCIBusErr175)) { + if (status == EpicRemoved) break; /* Always update the error counts to avoid overhead later. */ ep->stats.rx_missed_errors += inb(ioaddr + MPCNT); @@ -1133,11 +1204,6 @@ static irqreturn_t epic_interrupt(int ir /* Restart the transmit process. */ outl(RestartTx, ioaddr + COMMAND); } - if (status & RxOverflow) { /* Missed a Rx frame. */ - ep->stats.rx_errors++; - } - if (status & (RxOverflow | RxFull)) - outw(RxQueued, ioaddr + COMMAND); if (status & PCIBusErr170) { printk(KERN_ERR "%s: PCI Bus Error! EPIC status %4.4x.\n", dev->name, status); @@ -1147,6 +1213,8 @@ static irqreturn_t epic_interrupt(int ir /* Clear all error sources. */ outl(status & 0x7f18, ioaddr + INTSTAT); } + if (!(status & EpicNormalEvent)) + break; if (--boguscnt < 0) { printk(KERN_ERR "%s: Too much work at interrupt, " "IntrStatus=0x%8.8x.\n", @@ -1164,7 +1232,7 @@ static irqreturn_t epic_interrupt(int ir return IRQ_RETVAL(handled); } -static int epic_rx(struct net_device *dev) +static int epic_rx(struct net_device *dev, int budget) { struct epic_private *ep = dev->priv; int entry = ep->cur_rx % RX_RING_SIZE; @@ -1174,6 +1242,10 @@ static int epic_rx(struct net_device *de if (debug > 4) printk(KERN_DEBUG " In epic_rx(), entry %d %8.8x.\n", entry, ep->rx_ring[entry].rxstatus); + + if (rx_work_limit > budget) + rx_work_limit = budget; + /* If we own the next entry, it's a new packet. Send it up. */ while ((ep->rx_ring[entry].rxstatus & cpu_to_le32(DescOwn)) == 0) { int status = le32_to_cpu(ep->rx_ring[entry].rxstatus); @@ -1234,7 +1306,7 @@ static int epic_rx(struct net_device *de ep->rx_skbuff[entry] = NULL; } skb->protocol = eth_type_trans(skb, dev); - netif_rx(skb); + netif_receive_skb(skb); dev->last_rx = jiffies; ep->stats.rx_packets++; ep->stats.rx_bytes += pkt_len; @@ -1262,6 +1334,61 @@ static int epic_rx(struct net_device *de return work_done; } +static void epic_rx_err(struct net_device *dev, struct epic_private *ep) +{ + long ioaddr = dev->base_addr; + int status; + + status = inl(ioaddr + INTSTAT); + + if (status == EpicRemoved) + return; + if (status & RxOverflow) /* Missed a Rx frame. */ + ep->stats.rx_errors++; + if (status & (RxOverflow | RxFull)) + outw(RxQueued, ioaddr + COMMAND); +} + +static int epic_poll(struct net_device *dev, int *budget) +{ + struct epic_private *ep = dev->priv; + int work_done, orig_budget; + long ioaddr = dev->base_addr; + + orig_budget = (*budget > dev->quota) ? dev->quota : *budget; + +rx_action: + + epic_tx(dev, ep); + + work_done = epic_rx(dev, *budget); + + epic_rx_err(dev, ep); + + *budget -= work_done; + dev->quota -= work_done; + + if (netif_running(dev) && (work_done < orig_budget)) { + unsigned long flags; + + spin_lock_irqsave(&ep->napi_lock, flags); + + if (ep->reschedule_in_poll) { + ep->reschedule_in_poll--; + spin_unlock_irqrestore(&ep->napi_lock, flags); + goto rx_action; + } + + outl(EpicNapiEvent, ioaddr + INTSTAT); + epic_napi_irq_on(dev, ep); + __netif_rx_complete(dev); + + spin_unlock_irqrestore(&ep->napi_lock, flags); + } + + return (work_done >= orig_budget); +} + static int epic_close(struct net_device *dev) { long ioaddr = dev->base_addr; @@ -1276,9 +1403,13 @@ static int epic_close(struct net_device dev->name, (int)inl(ioaddr + INTSTAT)); del_timer_sync(&ep->timer); - epic_pause(dev); + + epic_disable_int(dev, ep); + free_irq(dev->irq, dev); + epic_pause(dev); + /* Free all the skbuffs in the Rx queue. */ for (i = 0; i < RX_RING_SIZE; i++) { skb = ep->rx_skbuff[i]; @@ -1476,6 +1607,7 @@ static void __devexit epic_remove_one (s #endif pci_release_regions(pdev); free_netdev(dev); + pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); /* pci_power_off(pdev, -1); */ } --- linux-2.6.6-rc1/drivers/net/fc/iph5526.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/fc/iph5526.c 2004-04-18 22:25:24.805059536 -0700 @@ -2910,7 +2910,7 @@ static void iph5526_timeout(struct net_d { struct fc_info *fi = dev->priv; printk(KERN_WARNING "%s: timed out on send.\n", dev->name); - fi->fc_stats.rx_dropped++; + fi->fc_stats.tx_dropped++; dev->trans_start = jiffies; netif_wake_queue(dev); } @@ -2953,7 +2953,7 @@ static int iph5526_send_packet(struct sk fi->fc_stats.tx_packets++; } else - fi->fc_stats.rx_dropped++; + fi->fc_stats.tx_dropped++; dev->trans_start = jiffies; /* We free up the IP buffers in the OCI_interrupt handler. * status == 0 implies that the frame was not transmitted. So the --- linux-2.6.6-rc1/drivers/net/hydra.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/hydra.c 2004-04-18 22:25:24.806059384 -0700 @@ -153,10 +153,11 @@ static int __devinit hydra_init(struct z zorro_set_drvdata(z, dev); - printk("%s: Hydra at 0x%08lx, address %02x:%02x:%02x:%02x:%02x:%02x " - "(hydra.c " HYDRA_VERSION ")\n", dev->name, z->resource.start, - dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2], - dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]); + printk(KERN_INFO "%s: Hydra at 0x%08lx, address " + "%02x:%02x:%02x:%02x:%02x:%02x (hydra.c " HYDRA_VERSION ")\n", + dev->name, z->resource.start, dev->dev_addr[0], dev->dev_addr[1], + dev->dev_addr[2], dev->dev_addr[3], dev->dev_addr[4], + dev->dev_addr[5]); return 0; } @@ -170,14 +171,14 @@ static int hydra_open(struct net_device static int hydra_close(struct net_device *dev) { if (ei_debug > 1) - printk("%s: Shutting down ethercard.\n", dev->name); + printk(KERN_DEBUG "%s: Shutting down ethercard.\n", dev->name); ei_close(dev); return 0; } static void hydra_reset_8390(struct net_device *dev) { - printk("Hydra hw reset not there\n"); + printk(KERN_INFO "Hydra hw reset not there\n"); } static void hydra_get_8390_hdr(struct net_device *dev, --- linux-2.6.6-rc1/drivers/net/irda/ali-ircc.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/irda/ali-ircc.c 2004-04-18 22:25:24.808059080 -0700 @@ -44,7 +44,7 @@ #include #include -#include +#include "ali-ircc.h" #define CHIP_IO_EXTENT 8 #define BROKEN_DONGLE_ID --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/net/irda/ali-ircc.h 2004-04-18 22:25:24.809058928 -0700 @@ -0,0 +1,228 @@ +/********************************************************************* + * + * Filename: ali-ircc.h + * Version: 0.5 + * Description: Driver for the ALI M1535D and M1543C FIR Controller + * Status: Experimental. + * Author: Benjamin Kong + * Created at: 2000/10/16 03:46PM + * Modified at: 2001/1/3 02:56PM + * Modified by: Benjamin Kong + * + * Copyright (c) 2000 Benjamin Kong + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + ********************************************************************/ + +#ifndef ALI_IRCC_H +#define ALI_IRCC_H + +#include + +#include +#include +#include + +/* SIR Register */ +/* Usr definition of linux/serial_reg.h */ + +/* FIR Register */ +#define BANK0 0x20 +#define BANK1 0x21 +#define BANK2 0x22 +#define BANK3 0x23 + +#define FIR_MCR 0x07 /* Master Control Register */ + +/* Bank 0 */ +#define FIR_DR 0x00 /* Alias 0, FIR Data Register (R/W) */ +#define FIR_IER 0x01 /* Alias 1, FIR Interrupt Enable Register (R/W) */ +#define FIR_IIR 0x02 /* Alias 2, FIR Interrupt Identification Register (Read only) */ +#define FIR_LCR_A 0x03 /* Alias 3, FIR Line Control Register A (R/W) */ +#define FIR_LCR_B 0x04 /* Alias 4, FIR Line Control Register B (R/W) */ +#define FIR_LSR 0x05 /* Alias 5, FIR Line Status Register (R/W) */ +#define FIR_BSR 0x06 /* Alias 6, FIR Bus Status Register (Read only) */ + + + /* Alias 1 */ + #define IER_FIFO 0x10 /* FIR FIFO Interrupt Enable */ + #define IER_TIMER 0x20 /* Timer Interrupt Enable */ + #define IER_EOM 0x40 /* End of Message Interrupt Enable */ + #define IER_ACT 0x80 /* Active Frame Interrupt Enable */ + + /* Alias 2 */ + #define IIR_FIFO 0x10 /* FIR FIFO Interrupt */ + #define IIR_TIMER 0x20 /* Timer Interrupt */ + #define IIR_EOM 0x40 /* End of Message Interrupt */ + #define IIR_ACT 0x80 /* Active Frame Interrupt */ + + /* Alias 3 */ + #define LCR_A_FIFO_RESET 0x80 /* FIFO Reset */ + + /* Alias 4 */ + #define LCR_B_BW 0x10 /* Brick Wall */ + #define LCR_B_SIP 0x20 /* SIP Enable */ + #define LCR_B_TX_MODE 0x40 /* Transmit Mode */ + #define LCR_B_RX_MODE 0x80 /* Receive Mode */ + + /* Alias 5 */ + #define LSR_FIR_LSA 0x00 /* FIR Line Status Address */ + #define LSR_FRAME_ABORT 0x08 /* Frame Abort */ + #define LSR_CRC_ERROR 0x10 /* CRC Error */ + #define LSR_SIZE_ERROR 0x20 /* Size Error */ + #define LSR_FRAME_ERROR 0x40 /* Frame Error */ + #define LSR_FIFO_UR 0x80 /* FIFO Underrun */ + #define LSR_FIFO_OR 0x80 /* FIFO Overrun */ + + /* Alias 6 */ + #define BSR_FIFO_NOT_EMPTY 0x80 /* FIFO Not Empty */ + +/* Bank 1 */ +#define FIR_CR 0x00 /* Alias 0, FIR Configuration Register (R/W) */ +#define FIR_FIFO_TR 0x01 /* Alias 1, FIR FIFO Threshold Register (R/W) */ +#define FIR_DMA_TR 0x02 /* Alias 2, FIR DMA Threshold Register (R/W) */ +#define FIR_TIMER_IIR 0x03 /* Alias 3, FIR Timer interrupt interval register (W/O) */ +#define FIR_FIFO_FR 0x03 /* Alias 3, FIR FIFO Flag register (R/O) */ +#define FIR_FIFO_RAR 0x04 /* Alias 4, FIR FIFO Read Address register (R/O) */ +#define FIR_FIFO_WAR 0x05 /* Alias 5, FIR FIFO Write Address register (R/O) */ +#define FIR_TR 0x06 /* Alias 6, Test REgister (W/O) */ + + /* Alias 0 */ + #define CR_DMA_EN 0x01 /* DMA Enable */ + #define CR_DMA_BURST 0x02 /* DMA Burst Mode */ + #define CR_TIMER_EN 0x08 /* Timer Enable */ + + /* Alias 3 */ + #define TIMER_IIR_500 0x00 /* 500 us */ + #define TIMER_IIR_1ms 0x01 /* 1 ms */ + #define TIMER_IIR_2ms 0x02 /* 2 ms */ + #define TIMER_IIR_4ms 0x03 /* 4 ms */ + +/* Bank 2 */ +#define FIR_IRDA_CR 0x00 /* Alias 0, IrDA Control Register (R/W) */ +#define FIR_BOF_CR 0x01 /* Alias 1, BOF Count Register (R/W) */ +#define FIR_BW_CR 0x02 /* Alias 2, Brick Wall Count Register (R/W) */ +#define FIR_TX_DSR_HI 0x03 /* Alias 3, TX Data Size Register (high) (R/W) */ +#define FIR_TX_DSR_LO 0x04 /* Alias 4, TX Data Size Register (low) (R/W) */ +#define FIR_RX_DSR_HI 0x05 /* Alias 5, RX Data Size Register (high) (R/W) */ +#define FIR_RX_DSR_LO 0x06 /* Alias 6, RX Data Size Register (low) (R/W) */ + + /* Alias 0 */ + #define IRDA_CR_HDLC1152 0x80 /* 1.152Mbps HDLC Select */ + #define IRDA_CR_CRC 0X40 /* CRC Select. */ + #define IRDA_CR_HDLC 0x20 /* HDLC select. */ + #define IRDA_CR_HP_MODE 0x10 /* HP mode (read only) */ + #define IRDA_CR_SD_ST 0x08 /* SD/MODE State. */ + #define IRDA_CR_FIR_SIN 0x04 /* FIR SIN Select. */ + #define IRDA_CR_ITTX_0 0x02 /* SOUT State. IRTX force to 0 */ + #define IRDA_CR_ITTX_1 0x03 /* SOUT State. IRTX force to 1 */ + +/* Bank 3 */ +#define FIR_ID_VR 0x00 /* Alias 0, FIR ID Version Register (R/O) */ +#define FIR_MODULE_CR 0x01 /* Alias 1, FIR Module Control Register (R/W) */ +#define FIR_IO_BASE_HI 0x02 /* Alias 2, FIR Higher I/O Base Address Register (R/O) */ +#define FIR_IO_BASE_LO 0x03 /* Alias 3, FIR Lower I/O Base Address Register (R/O) */ +#define FIR_IRQ_CR 0x04 /* Alias 4, FIR IRQ Channel Register (R/O) */ +#define FIR_DMA_CR 0x05 /* Alias 5, FIR DMA Channel Register (R/O) */ + +struct ali_chip { + char *name; + int cfg[2]; + unsigned char entr1; + unsigned char entr2; + unsigned char cid_index; + unsigned char cid_value; + int (*probe)(struct ali_chip *chip, chipio_t *info); + int (*init)(struct ali_chip *chip, chipio_t *info); +}; +typedef struct ali_chip ali_chip_t; + + +/* DMA modes needed */ +#define DMA_TX_MODE 0x08 /* Mem to I/O, ++, demand. */ +#define DMA_RX_MODE 0x04 /* I/O to mem, ++, demand. */ + +#define MAX_TX_WINDOW 7 +#define MAX_RX_WINDOW 7 + +#define TX_FIFO_Threshold 8 +#define RX_FIFO_Threshold 1 +#define TX_DMA_Threshold 1 +#define RX_DMA_Threshold 1 + +/* For storing entries in the status FIFO */ + +struct st_fifo_entry { + int status; + int len; +}; + +struct st_fifo { + struct st_fifo_entry entries[MAX_RX_WINDOW]; + int pending_bytes; + int head; + int tail; + int len; +}; + +struct frame_cb { + void *start; /* Start of frame in DMA mem */ + int len; /* Lenght of frame in DMA mem */ +}; + +struct tx_fifo { + struct frame_cb queue[MAX_TX_WINDOW]; /* Info about frames in queue */ + int ptr; /* Currently being sent */ + int len; /* Lenght of queue */ + int free; /* Next free slot */ + void *tail; /* Next free start in DMA mem */ +}; + +/* Private data for each instance */ +struct ali_ircc_cb { + + struct st_fifo st_fifo; /* Info about received frames */ + struct tx_fifo tx_fifo; /* Info about frames to be transmitted */ + + struct net_device *netdev; /* Yes! we are some kind of netdevice */ + struct net_device_stats stats; + + struct irlap_cb *irlap; /* The link layer we are binded to */ + struct qos_info qos; /* QoS capabilities for this device */ + + chipio_t io; /* IrDA controller information */ + iobuff_t tx_buff; /* Transmit buffer */ + iobuff_t rx_buff; /* Receive buffer */ + + __u8 ier; /* Interrupt enable register */ + + __u8 InterruptID; /* Interrupt ID */ + __u8 BusStatus; /* Bus Status */ + __u8 LineStatus; /* Line Status */ + + unsigned char rcvFramesOverflow; + + struct timeval stamp; + struct timeval now; + + spinlock_t lock; /* For serializing operations */ + + __u32 new_speed; + int index; /* Instance index */ + + unsigned char fifo_opti_buf; + + struct pm_dev *dev; +}; + +static inline void switch_bank(int iobase, int bank) +{ + outb(bank, iobase+FIR_MCR); +} + +#endif /* ALI_IRCC_H */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/net/irda/au1000_ircc.h 2004-04-18 22:25:24.810058776 -0700 @@ -0,0 +1,127 @@ +/* + * + * BRIEF MODULE DESCRIPTION + * Au1000 IrDA driver. + * + * Copyright 2001 MontaVista Software Inc. + * Author: MontaVista Software, Inc. + * ppopov@mvista.com or source@mvista.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef AU1000_IRCC_H +#define AU1000_IRCC_H + +#include + +#include +#include +#include + +#define NUM_IR_IFF 1 +#define NUM_IR_DESC 64 +#define RING_SIZE_4 0x0 +#define RING_SIZE_16 0x3 +#define RING_SIZE_64 0xF +#define MAX_NUM_IR_DESC 64 +#define MAX_BUF_SIZE 2048 + +#define BPS_115200 0 +#define BPS_57600 1 +#define BPS_38400 2 +#define BPS_19200 5 +#define BPS_9600 11 +#define BPS_2400 47 + +/* Ring descriptor flags */ +#define AU_OWN (1<<7) /* tx,rx */ + +#define IR_DIS_CRC (1<<6) /* tx */ +#define IR_BAD_CRC (1<<5) /* tx */ +#define IR_NEED_PULSE (1<<4) /* tx */ +#define IR_FORCE_UNDER (1<<3) /* tx */ +#define IR_DISABLE_TX (1<<2) /* tx */ +#define IR_HW_UNDER (1<<0) /* tx */ +#define IR_TX_ERROR (IR_DIS_CRC|IR_BAD_CRC|IR_HW_UNDER) + +#define IR_PHY_ERROR (1<<6) /* rx */ +#define IR_CRC_ERROR (1<<5) /* rx */ +#define IR_MAX_LEN (1<<4) /* rx */ +#define IR_FIFO_OVER (1<<3) /* rx */ +#define IR_SIR_ERROR (1<<2) /* rx */ +#define IR_RX_ERROR (IR_PHY_ERROR|IR_CRC_ERROR| \ + IR_MAX_LEN|IR_FIFO_OVER|IR_SIR_ERROR) + +typedef struct db_dest { + struct db_dest *pnext; + volatile u32 *vaddr; + dma_addr_t dma_addr; +} db_dest_t; + + +typedef struct ring_desc { + u8 count_0; /* 7:0 */ + u8 count_1; /* 12:8 */ + u8 reserved; + u8 flags; + u8 addr_0; /* 7:0 */ + u8 addr_1; /* 15:8 */ + u8 addr_2; /* 23:16 */ + u8 addr_3; /* 31:24 */ +} ring_dest_t; + + +/* Private data for each instance */ +struct au1k_private { + + db_dest_t *pDBfree; + db_dest_t db[2*NUM_IR_DESC]; + volatile ring_dest_t *rx_ring[NUM_IR_DESC]; + volatile ring_dest_t *tx_ring[NUM_IR_DESC]; + db_dest_t *rx_db_inuse[NUM_IR_DESC]; + db_dest_t *tx_db_inuse[NUM_IR_DESC]; + u32 rx_head; + u32 tx_head; + u32 tx_tail; + u32 tx_full; + + iobuff_t rx_buff; + + struct net_device *netdev; + struct net_device_stats stats; + + struct timeval stamp; + struct timeval now; + struct qos_info qos; + struct irlap_cb *irlap; + + u8 open; + u32 speed; + u32 newspeed; + + u32 intr_work_done; /* number of Rx and Tx pkts processed in the isr */ + struct timer_list timer; + + spinlock_t lock; /* For serializing operations */ + struct pm_dev *dev; +}; +#endif /* AU1000_IRCC_H */ --- linux-2.6.6-rc1/drivers/net/irda/au1k_ir.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/irda/au1k_ir.c 2004-04-18 22:25:24.810058776 -0700 @@ -52,7 +52,7 @@ #include #include #include -#include "net/irda/au1000_ircc.h" +#include "au1000_ircc.h" static int au1k_irda_net_init(struct net_device *); static int au1k_irda_start(struct net_device *); --- linux-2.6.6-rc1/drivers/net/irda/donauboe.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/irda/donauboe.c 2004-04-18 22:25:24.812058472 -0700 @@ -55,10 +55,6 @@ static char *rcsid = /* See below for a description of the logic in this driver */ -/* Is irda_crc16_table[] exported? not yet */ -/* define this if you get errors about multiple defns of irda_crc16_table */ -#undef CRC_EXPORTED - /* User servicable parts */ /* USE_PROBE Create the code which probes the chip and does a few tests */ /* do_probe module parameter Enable this code */ @@ -209,47 +205,6 @@ static int do_probe = 0; /**********************************************************************/ -/* Fcs code */ - -#ifdef CRC_EXPORTED -extern __u16 const irda_crc16_table[]; -#else -static __u16 const irda_crc16_table[256] = { - 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, - 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, - 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, - 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, - 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, - 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, - 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, - 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, - 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, - 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, - 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, - 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, - 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, - 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, - 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, - 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, - 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, - 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, - 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, - 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, - 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, - 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, - 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, - 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, - 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, - 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, - 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, - 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, - 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, - 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, - 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, - 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 -}; -#endif - static int toshoboe_checkfcs (unsigned char *buf, int len) { @@ -1669,7 +1624,7 @@ toshoboe_open (struct pci_dev *pci_dev, /*We need to align the taskfile on a taskfile size boundary */ { - __u32 addr; + unsigned long addr; addr = (__u32) self->ringbuf; addr &= ~(OBOE_RING_LEN - 1); --- linux-2.6.6-rc1/drivers/net/irda/irda-usb.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/irda/irda-usb.c 2004-04-18 22:25:24.813058320 -0700 @@ -62,7 +62,7 @@ #include #include -#include +#include "irda-usb.h" /*------------------------------------------------------------------*/ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/net/irda/irda-usb.h 2004-04-18 22:25:24.815058016 -0700 @@ -0,0 +1,163 @@ +/***************************************************************************** + * + * Filename: irda-usb.h + * Version: 0.9b + * Description: IrDA-USB Driver + * Status: Experimental + * Author: Dag Brattli + * + * Copyright (C) 2001, Roman Weissgaerber + * Copyright (C) 2000, Dag Brattli + * Copyright (C) 2001, Jean Tourrilhes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + *****************************************************************************/ + +#include + +#include +#include /* struct irlap_cb */ + +#define RX_COPY_THRESHOLD 200 +#define IRDA_USB_MAX_MTU 2051 +#define IRDA_USB_SPEED_MTU 64 /* Weird, but work like this */ + +/* Maximum number of active URB on the Rx path + * This is the amount of buffers the we keep between the USB harware and the + * IrDA stack. + * + * Note : the network layer does also queue the packets between us and the + * IrDA stack, and is actually pretty fast and efficient in doing that. + * Therefore, we don't need to have a large number of URBs, and we can + * perfectly live happy with only one. We certainly don't need to keep the + * full IrTTP window around here... + * I repeat for those who have trouble to understand : 1 URB is plenty + * good enough to handle back-to-back (brickwalled) frames. I tried it, + * it works (it's the hardware that has trouble doing it). + * + * Having 2 URBs would allow the USB stack to process one URB while we take + * care of the other and then swap the URBs... + * On the other hand, increasing the number of URB will have penalities + * in term of latency and will interact with the link management in IrLAP... + * Jean II */ +#define IU_MAX_ACTIVE_RX_URBS 1 /* Don't touch !!! */ + +/* When a Rx URB is passed back to us, we can't reuse it immediately, + * because it may still be referenced by the USB layer. Therefore we + * need to keep one extra URB in the Rx path. + * Jean II */ +#define IU_MAX_RX_URBS (IU_MAX_ACTIVE_RX_URBS + 1) + +/* Various ugly stuff to try to workaround generic problems */ +/* Send speed command in case of timeout, just for trying to get things sane */ +#define IU_BUG_KICK_TIMEOUT +/* Show the USB class descriptor */ +#undef IU_DUMP_CLASS_DESC +/* Assume a minimum round trip latency for USB transfer (in us)... + * USB transfer are done in the next USB slot if there is no traffic + * (1/19 msec) and is done at 12 Mb/s : + * Waiting for slot + tx = (53us + 16us) * 2 = 137us minimum. + * Rx notification will only be done at the end of the USB frame period : + * OHCI : frame period = 1ms + * UHCI : frame period = 1ms, but notification can take 2 or 3 ms :-( + * EHCI : frame period = 125us */ +#define IU_USB_MIN_RTT 500 /* This should be safe in most cases */ + +/* Inbound header */ +#define MEDIA_BUSY 0x80 + +#define SPEED_2400 0x01 +#define SPEED_9600 0x02 +#define SPEED_19200 0x03 +#define SPEED_38400 0x04 +#define SPEED_57600 0x05 +#define SPEED_115200 0x06 +#define SPEED_576000 0x07 +#define SPEED_1152000 0x08 +#define SPEED_4000000 0x09 + +/* Basic capabilities */ +#define IUC_DEFAULT 0x00 /* Basic device compliant with 1.0 spec */ +/* Main bugs */ +#define IUC_SPEED_BUG 0x01 /* Device doesn't set speed after the frame */ +#define IUC_NO_WINDOW 0x02 /* Device doesn't behave with big Rx window */ +#define IUC_NO_TURN 0x04 /* Device doesn't do turnaround by itself */ +/* Not currently used */ +#define IUC_SIR_ONLY 0x08 /* Device doesn't behave at FIR speeds */ +#define IUC_SMALL_PKT 0x10 /* Device doesn't behave with big Rx packets */ +#define IUC_MAX_WINDOW 0x20 /* Device underestimate the Rx window */ +#define IUC_MAX_XBOFS 0x40 /* Device need more xbofs than advertised */ + +/* USB class definitions */ +#define USB_IRDA_HEADER 0x01 +#define USB_CLASS_IRDA 0x02 /* USB_CLASS_APP_SPEC subclass */ +#define USB_DT_IRDA 0x21 + +struct irda_class_desc { + __u8 bLength; + __u8 bDescriptorType; + __u16 bcdSpecRevision; + __u8 bmDataSize; + __u8 bmWindowSize; + __u8 bmMinTurnaroundTime; + __u16 wBaudRate; + __u8 bmAdditionalBOFs; + __u8 bIrdaRateSniff; + __u8 bMaxUnicastList; +} __attribute__ ((packed)); + +/* class specific interface request to get the IrDA-USB class descriptor + * (6.2.5, USB-IrDA class spec 1.0) */ + +#define IU_REQ_GET_CLASS_DESC 0x06 + +struct irda_usb_cb { + struct irda_class_desc *irda_desc; + struct usb_device *usbdev; /* init: probe_irda */ + struct usb_interface *usbintf; /* init: probe_irda */ + int netopen; /* Device is active for network */ + int present; /* Device is present on the bus */ + __u32 capability; /* Capability of the hardware */ + __u8 bulk_in_ep; /* Rx Endpoint assignments */ + __u8 bulk_out_ep; /* Tx Endpoint assignments */ + __u16 bulk_out_mtu; /* Max Tx packet size in bytes */ + __u8 bulk_int_ep; /* Interrupt Endpoint assignments */ + + wait_queue_head_t wait_q; /* for timeouts */ + + struct urb *rx_urb[IU_MAX_RX_URBS]; /* URBs used to receive data frames */ + struct urb *idle_rx_urb; /* Pointer to idle URB in Rx path */ + struct urb *tx_urb; /* URB used to send data frames */ + struct urb *speed_urb; /* URB used to send speed commands */ + + struct net_device *netdev; /* Yes! we are some kind of netdev. */ + struct net_device_stats stats; + struct irlap_cb *irlap; /* The link layer we are binded to */ + struct qos_info qos; + hashbin_t *tx_list; /* Queued transmit skb's */ + char *speed_buff; /* Buffer for speed changes */ + + struct timeval stamp; + struct timeval now; + + spinlock_t lock; /* For serializing operations */ + + __u16 xbofs; /* Current xbofs setting */ + __s16 new_xbofs; /* xbofs we need to set */ + __u32 speed; /* Current speed */ + __s32 new_speed; /* speed we need to set */ +}; + --- linux-2.6.6-rc1/drivers/net/irda/irport.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/irda/irport.c 2004-04-18 22:25:24.816057864 -0700 @@ -58,7 +58,7 @@ #include #include -#include +#include "irport.h" #define IO_EXTENT 8 --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/net/irda/irport.h 2004-04-18 22:25:24.816057864 -0700 @@ -0,0 +1,90 @@ +/********************************************************************* + * + * Filename: irport.h + * Version: 0.1 + * Description: Serial driver for IrDA + * Status: Experimental. + * Author: Dag Brattli + * Created at: Sun Aug 3 13:49:59 1997 + * Modified at: Fri Jan 14 10:21:10 2000 + * Modified by: Dag Brattli + * + * Copyright (c) 1997, 1998-2000 Dag Brattli + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * Neither Dag Brattli nor University of Tromsø admit liability nor + * provide warranty for any of this software. This material is + * provided "AS-IS" and at no charge. + * + ********************************************************************/ + +#ifndef IRPORT_H +#define IRPORT_H + +#include +#include +#include +#include + +#include + +#define SPEED_DEFAULT 9600 +#define SPEED_MAX 115200 + +/* + * These are the supported serial types. + */ +#define PORT_UNKNOWN 0 +#define PORT_8250 1 +#define PORT_16450 2 +#define PORT_16550 3 +#define PORT_16550A 4 +#define PORT_CIRRUS 5 +#define PORT_16650 6 +#define PORT_MAX 6 + +#define FRAME_MAX_SIZE 2048 + +struct irport_cb { + struct net_device *netdev; /* Yes! we are some kind of netdevice */ + struct net_device_stats stats; + + struct irlap_cb *irlap; /* The link layer we are attached to */ + + chipio_t io; /* IrDA controller information */ + iobuff_t tx_buff; /* Transmit buffer */ + iobuff_t rx_buff; /* Receive buffer */ + + struct qos_info qos; /* QoS capabilities for this device */ + dongle_t *dongle; /* Dongle driver */ + + __u32 flags; /* Interface flags */ + __u32 new_speed; + int mode; + int index; /* Instance index */ + int transmitting; /* Are we transmitting ? */ + + spinlock_t lock; /* For serializing operations */ + + /* For piggyback drivers */ + void *priv; + void (*change_speed)(void *priv, __u32 speed); + int (*interrupt)(int irq, void *dev_id, struct pt_regs *regs); +}; + +struct irport_cb *irport_open(int i, unsigned int iobase, unsigned int irq); +int irport_close(struct irport_cb *self); +void irport_start(struct irport_cb *self); +void irport_stop(struct irport_cb *self); +void irport_change_speed(void *priv, __u32 speed); +irqreturn_t irport_interrupt(int irq, void *dev_id, struct pt_regs *regs); +int irport_hard_xmit(struct sk_buff *skb, struct net_device *dev); +int irport_net_open(struct net_device *dev); +int irport_net_close(struct net_device *dev); + +#endif /* IRPORT_H */ --- linux-2.6.6-rc1/drivers/net/irda/nsc-ircc.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/irda/nsc-ircc.c 2004-04-18 22:25:24.818057560 -0700 @@ -63,7 +63,7 @@ #include #include -#include +#include "nsc-ircc.h" #define CHIP_IO_EXTENT 8 #define BROKEN_DONGLE_ID --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/net/irda/nsc-ircc.h 2004-04-18 22:25:24.820057256 -0700 @@ -0,0 +1,277 @@ +/********************************************************************* + * + * Filename: nsc-ircc.h + * Version: + * Description: + * Status: Experimental. + * Author: Dag Brattli + * Created at: Fri Nov 13 14:37:40 1998 + * Modified at: Sun Jan 23 17:47:00 2000 + * Modified by: Dag Brattli + * + * Copyright (c) 1998-2000 Dag Brattli + * Copyright (c) 1998 Lichen Wang, + * Copyright (c) 1998 Actisys Corp., www.actisys.com + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * Neither Dag Brattli nor University of Tromsø admit liability nor + * provide warranty for any of this software. This material is + * provided "AS-IS" and at no charge. + * + ********************************************************************/ + +#ifndef NSC_IRCC_H +#define NSC_IRCC_H + +#include + +#include +#include +#include + +/* DMA modes needed */ +#define DMA_TX_MODE 0x08 /* Mem to I/O, ++, demand. */ +#define DMA_RX_MODE 0x04 /* I/O to mem, ++, demand. */ + +/* Config registers for the '108 */ +#define CFG_108_BAIC 0x00 +#define CFG_108_CSRT 0x01 +#define CFG_108_MCTL 0x02 + +/* Config registers for the '338 */ +#define CFG_338_FER 0x00 +#define CFG_338_FAR 0x01 +#define CFG_338_PTR 0x02 +#define CFG_338_PNP0 0x1b +#define CFG_338_PNP1 0x1c +#define CFG_338_PNP3 0x4f + +/* Config registers for the '39x (in the logical device bank) */ +#define CFG_39X_LDN 0x07 /* Logical device number (Super I/O bank) */ +#define CFG_39X_SIOCF1 0x21 /* SuperI/O Config */ +#define CFG_39X_ACT 0x30 /* Device activation */ +#define CFG_39X_BASEH 0x60 /* Device base address (high bits) */ +#define CFG_39X_BASEL 0x61 /* Device base address (low bits) */ +#define CFG_39X_IRQNUM 0x70 /* Interrupt number & wake up enable */ +#define CFG_39X_IRQSEL 0x71 /* Interrupt select (edge/level + polarity) */ +#define CFG_39X_DMA0 0x74 /* DMA 0 configuration */ +#define CFG_39X_DMA1 0x75 /* DMA 1 configuration */ +#define CFG_39X_SPC 0xF0 /* Serial port configuration register */ + +/* Flags for configuration register CRF0 */ +#define APEDCRC 0x02 +#define ENBNKSEL 0x01 + +/* Set 0 */ +#define TXD 0x00 /* Transmit data port */ +#define RXD 0x00 /* Receive data port */ + +/* Register 1 */ +#define IER 0x01 /* Interrupt Enable Register*/ +#define IER_RXHDL_IE 0x01 /* Receiver high data level interrupt */ +#define IER_TXLDL_IE 0x02 /* Transeiver low data level interrupt */ +#define IER_LS_IE 0x04//* Link Status Interrupt */ +#define IER_ETXURI 0x04 /* Tx underrun */ +#define IER_DMA_IE 0x10 /* DMA finished interrupt */ +#define IER_TXEMP_IE 0x20 +#define IER_SFIF_IE 0x40 /* Frame status FIFO intr */ +#define IER_TMR_IE 0x80 /* Timer event */ + +#define FCR 0x02 /* (write only) */ +#define FCR_FIFO_EN 0x01 /* Enable FIFO's */ +#define FCR_RXSR 0x02 /* Rx FIFO soft reset */ +#define FCR_TXSR 0x04 /* Tx FIFO soft reset */ +#define FCR_RXTH 0x40 /* Rx FIFO threshold (set to 16) */ +#define FCR_TXTH 0x20 /* Tx FIFO threshold (set to 17) */ + +#define EIR 0x02 /* (read only) */ +#define EIR_RXHDL_EV 0x01 +#define EIR_TXLDL_EV 0x02 +#define EIR_LS_EV 0x04 +#define EIR_DMA_EV 0x10 +#define EIR_TXEMP_EV 0x20 +#define EIR_SFIF_EV 0x40 +#define EIR_TMR_EV 0x80 + +#define LCR 0x03 /* Link control register */ +#define LCR_WLS_8 0x03 /* 8 bits */ + +#define BSR 0x03 /* Bank select register */ +#define BSR_BKSE 0x80 +#define BANK0 LCR_WLS_8 /* Must make sure that we set 8N1 */ +#define BANK1 0x80 +#define BANK2 0xe0 +#define BANK3 0xe4 +#define BANK4 0xe8 +#define BANK5 0xec +#define BANK6 0xf0 +#define BANK7 0xf4 + +#define MCR 0x04 /* Mode Control Register */ +#define MCR_MODE_MASK ~(0xd0) +#define MCR_UART 0x00 +#define MCR_RESERVED 0x20 +#define MCR_SHARP_IR 0x40 +#define MCR_SIR 0x60 +#define MCR_MIR 0x80 +#define MCR_FIR 0xa0 +#define MCR_CEIR 0xb0 +#define MCR_IR_PLS 0x10 +#define MCR_DMA_EN 0x04 +#define MCR_EN_IRQ 0x08 +#define MCR_TX_DFR 0x08 + +#define LSR 0x05 /* Link status register */ +#define LSR_RXDA 0x01 /* Receiver data available */ +#define LSR_TXRDY 0x20 /* Transmitter ready */ +#define LSR_TXEMP 0x40 /* Transmitter empty */ + +#define ASCR 0x07 /* Auxillary Status and Control Register */ +#define ASCR_RXF_TOUT 0x01 /* Rx FIFO timeout */ +#define ASCR_FEND_INF 0x02 /* Frame end bytes in rx FIFO */ +#define ASCR_S_EOT 0x04 /* Set end of transmission */ +#define ASCT_RXBSY 0x20 /* Rx busy */ +#define ASCR_TXUR 0x40 /* Transeiver underrun */ +#define ASCR_CTE 0x80 /* Clear timer event */ + +/* Bank 2 */ +#define BGDL 0x00 /* Baud Generator Divisor Port (Low Byte) */ +#define BGDH 0x01 /* Baud Generator Divisor Port (High Byte) */ + +#define ECR1 0x02 /* Extended Control Register 1 */ +#define ECR1_EXT_SL 0x01 /* Extended Mode Select */ +#define ECR1_DMANF 0x02 /* DMA Fairness */ +#define ECR1_DMATH 0x04 /* DMA Threshold */ +#define ECR1_DMASWP 0x08 /* DMA Swap */ + +#define EXCR2 0x04 +#define EXCR2_TFSIZ 0x01 /* Rx FIFO size = 32 */ +#define EXCR2_RFSIZ 0x04 /* Tx FIFO size = 32 */ + +#define TXFLV 0x06 /* Tx FIFO level */ +#define RXFLV 0x07 /* Rx FIFO level */ + +/* Bank 3 */ +#define MID 0x00 + +/* Bank 4 */ +#define TMRL 0x00 /* Timer low byte */ +#define TMRH 0x01 /* Timer high byte */ +#define IRCR1 0x02 /* Infrared control register 1 */ +#define IRCR1_TMR_EN 0x01 /* Timer enable */ + +#define TFRLL 0x04 +#define TFRLH 0x05 +#define RFRLL 0x06 +#define RFRLH 0x07 + +/* Bank 5 */ +#define IRCR2 0x04 /* Infrared control register 2 */ +#define IRCR2_MDRS 0x04 /* MIR data rate select */ +#define IRCR2_FEND_MD 0x20 /* */ + +#define FRM_ST 0x05 /* Frame status FIFO */ +#define FRM_ST_VLD 0x80 /* Frame status FIFO data valid */ +#define FRM_ST_ERR_MSK 0x5f +#define FRM_ST_LOST_FR 0x40 /* Frame lost */ +#define FRM_ST_MAX_LEN 0x10 /* Max frame len exceeded */ +#define FRM_ST_PHY_ERR 0x08 /* Physical layer error */ +#define FRM_ST_BAD_CRC 0x04 +#define FRM_ST_OVR1 0x02 /* Rx FIFO overrun */ +#define FRM_ST_OVR2 0x01 /* Frame status FIFO overrun */ + +#define RFLFL 0x06 +#define RFLFH 0x07 + +/* Bank 6 */ +#define IR_CFG2 0x00 +#define IR_CFG2_DIS_CRC 0x02 + +/* Bank 7 */ +#define IRM_CR 0x07 /* Infrared module control register */ +#define IRM_CR_IRX_MSL 0x40 +#define IRM_CR_AF_MNT 0x80 /* Automatic format */ + +/* NSC chip information */ +struct nsc_chip { + char *name; /* Name of chipset */ + int cfg[3]; /* Config registers */ + u_int8_t cid_index; /* Chip identification index reg */ + u_int8_t cid_value; /* Chip identification expected value */ + u_int8_t cid_mask; /* Chip identification revision mask */ + + /* Functions for probing and initializing the specific chip */ + int (*probe)(struct nsc_chip *chip, chipio_t *info); + int (*init)(struct nsc_chip *chip, chipio_t *info); +}; +typedef struct nsc_chip nsc_chip_t; + +/* For storing entries in the status FIFO */ +struct st_fifo_entry { + int status; + int len; +}; + +#define MAX_TX_WINDOW 7 +#define MAX_RX_WINDOW 7 + +struct st_fifo { + struct st_fifo_entry entries[MAX_RX_WINDOW]; + int pending_bytes; + int head; + int tail; + int len; +}; + +struct frame_cb { + void *start; /* Start of frame in DMA mem */ + int len; /* Lenght of frame in DMA mem */ +}; + +struct tx_fifo { + struct frame_cb queue[MAX_TX_WINDOW]; /* Info about frames in queue */ + int ptr; /* Currently being sent */ + int len; /* Lenght of queue */ + int free; /* Next free slot */ + void *tail; /* Next free start in DMA mem */ +}; + +/* Private data for each instance */ +struct nsc_ircc_cb { + struct st_fifo st_fifo; /* Info about received frames */ + struct tx_fifo tx_fifo; /* Info about frames to be transmitted */ + + struct net_device *netdev; /* Yes! we are some kind of netdevice */ + struct net_device_stats stats; + + struct irlap_cb *irlap; /* The link layer we are binded to */ + struct qos_info qos; /* QoS capabilities for this device */ + + chipio_t io; /* IrDA controller information */ + iobuff_t tx_buff; /* Transmit buffer */ + iobuff_t rx_buff; /* Receive buffer */ + + __u8 ier; /* Interrupt enable register */ + + struct timeval stamp; + struct timeval now; + + spinlock_t lock; /* For serializing operations */ + + __u32 new_speed; + int index; /* Instance index */ + + struct pm_dev *dev; +}; + +static inline void switch_bank(int iobase, int bank) +{ + outb(bank, iobase+BSR); +} + +#endif /* NSC_IRCC_H */ --- linux-2.6.6-rc1/drivers/net/irda/sir_dev.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/irda/sir_dev.c 2004-04-18 22:25:24.820057256 -0700 @@ -240,7 +240,8 @@ int sirdev_receive(struct sir_dev *dev, } if (!dev->irlap) { - WARNING("%s - too early: %p / %d!\n", __FUNCTION__, cp, count); + WARNING("%s - too early: %p / %zd!\n", + __FUNCTION__, cp, count); return -1; } @@ -250,7 +251,7 @@ int sirdev_receive(struct sir_dev *dev, */ irda_device_set_media_busy(dev->netdev, TRUE); dev->stats.rx_dropped++; - IRDA_DEBUG(0, "%s; rx-drop: %d\n", __FUNCTION__, count); + IRDA_DEBUG(0, "%s; rx-drop: %zd\n", __FUNCTION__, count); return 0; } --- linux-2.6.6-rc1/drivers/net/irda/vlsi_ir.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/irda/vlsi_ir.c 2004-04-18 22:25:24.824056648 -0700 @@ -44,6 +44,7 @@ MODULE_LICENSE("GPL"); #include #include #include +#include #include #include #include @@ -53,7 +54,7 @@ MODULE_LICENSE("GPL"); #include #include -#include +#include "vlsi_ir.h" /********************************************************/ @@ -160,72 +161,64 @@ static struct proc_dir_entry *vlsi_proc_ #ifdef CONFIG_PROC_FS -static int vlsi_proc_pdev(struct pci_dev *pdev, char *buf, int len) +static void vlsi_proc_pdev(struct seq_file *seq, struct pci_dev *pdev) { unsigned iobase = pci_resource_start(pdev, 0); unsigned i; - char *out = buf; - if (len < 500) - return 0; - - out += sprintf(out, "\n%s (vid/did: %04x/%04x)\n", - PCIDEV_NAME(pdev), (int)pdev->vendor, (int)pdev->device); - out += sprintf(out, "pci-power-state: %u\n", (unsigned) pdev->current_state); - out += sprintf(out, "resources: irq=%u / io=0x%04x / dma_mask=0x%016Lx\n", - pdev->irq, (unsigned)pci_resource_start(pdev, 0), (unsigned long long)pdev->dma_mask); - out += sprintf(out, "hw registers: "); + seq_printf(seq, "\n%s (vid/did: %04x/%04x)\n", + PCIDEV_NAME(pdev), (int)pdev->vendor, (int)pdev->device); + seq_printf(seq, "pci-power-state: %u\n", (unsigned) pdev->current_state); + seq_printf(seq, "resources: irq=%u / io=0x%04x / dma_mask=0x%016Lx\n", + pdev->irq, (unsigned)pci_resource_start(pdev, 0), (unsigned long long)pdev->dma_mask); + seq_printf(seq, "hw registers: "); for (i = 0; i < 0x20; i++) - out += sprintf(out, "%02x", (unsigned)inb((iobase+i))); - out += sprintf(out, "\n"); - return out - buf; + seq_printf(seq, "%02x", (unsigned)inb((iobase+i))); + seq_printf(seq, "\n"); } -static int vlsi_proc_ndev(struct net_device *ndev, char *buf, int len) +static void vlsi_proc_ndev(struct seq_file *seq, struct net_device *ndev) { vlsi_irda_dev_t *idev = ndev->priv; - char *out = buf; u8 byte; u16 word; unsigned delta1, delta2; struct timeval now; unsigned iobase = ndev->base_addr; - if (len < 1000) - return 0; - - out += sprintf(out, "\n%s link state: %s / %s / %s / %s\n", ndev->name, + seq_printf(seq, "\n%s link state: %s / %s / %s / %s\n", ndev->name, netif_device_present(ndev) ? "attached" : "detached", netif_running(ndev) ? "running" : "not running", netif_carrier_ok(ndev) ? "carrier ok" : "no carrier", netif_queue_stopped(ndev) ? "queue stopped" : "queue running"); + if (!netif_running(ndev)) - return out - buf; + return; - out += sprintf(out, "\nhw-state:\n"); + seq_printf(seq, "\nhw-state:\n"); pci_read_config_byte(idev->pdev, VLSI_PCI_IRMISC, &byte); - out += sprintf(out, "IRMISC:%s%s%s uart%s", + seq_printf(seq, "IRMISC:%s%s%s uart%s", (byte&IRMISC_IRRAIL) ? " irrail" : "", (byte&IRMISC_IRPD) ? " irpd" : "", (byte&IRMISC_UARTTST) ? " uarttest" : "", (byte&IRMISC_UARTEN) ? "@" : " disabled\n"); if (byte&IRMISC_UARTEN) { - out += sprintf(out, "0x%s\n", + seq_printf(seq, "0x%s\n", (byte&2) ? ((byte&1) ? "3e8" : "2e8") : ((byte&1) ? "3f8" : "2f8")); } pci_read_config_byte(idev->pdev, VLSI_PCI_CLKCTL, &byte); - out += sprintf(out, "CLKCTL: PLL %s%s%s / clock %s / wakeup %s\n", + seq_printf(seq, "CLKCTL: PLL %s%s%s / clock %s / wakeup %s\n", (byte&CLKCTL_PD_INV) ? "powered" : "down", (byte&CLKCTL_LOCK) ? " locked" : "", (byte&CLKCTL_EXTCLK) ? ((byte&CLKCTL_XCKSEL)?" / 40 MHz XCLK":" / 48 MHz XCLK") : "", (byte&CLKCTL_CLKSTP) ? "stopped" : "running", (byte&CLKCTL_WAKE) ? "enabled" : "disabled"); pci_read_config_byte(idev->pdev, VLSI_PCI_MSTRPAGE, &byte); - out += sprintf(out, "MSTRPAGE: 0x%02x\n", (unsigned)byte); + seq_printf(seq, "MSTRPAGE: 0x%02x\n", (unsigned)byte); byte = inb(iobase+VLSI_PIO_IRINTR); - out += sprintf(out, "IRINTR:%s%s%s%s%s%s%s%s\n", + seq_printf(seq, "IRINTR:%s%s%s%s%s%s%s%s\n", (byte&IRINTR_ACTEN) ? " ACTEN" : "", (byte&IRINTR_RPKTEN) ? " RPKTEN" : "", (byte&IRINTR_TPKTEN) ? " TPKTEN" : "", @@ -235,16 +228,16 @@ static int vlsi_proc_ndev(struct net_dev (byte&IRINTR_TPKTINT) ? " TPKTINT" : "", (byte&IRINTR_OE_INT) ? " OE_INT" : ""); word = inw(iobase+VLSI_PIO_RINGPTR); - out += sprintf(out, "RINGPTR: rx=%u / tx=%u\n", RINGPTR_GET_RX(word), RINGPTR_GET_TX(word)); + seq_printf(seq, "RINGPTR: rx=%u / tx=%u\n", RINGPTR_GET_RX(word), RINGPTR_GET_TX(word)); word = inw(iobase+VLSI_PIO_RINGBASE); - out += sprintf(out, "RINGBASE: busmap=0x%08x\n", + seq_printf(seq, "RINGBASE: busmap=0x%08x\n", ((unsigned)word << 10)|(MSTRPAGE_VALUE<<24)); word = inw(iobase+VLSI_PIO_RINGSIZE); - out += sprintf(out, "RINGSIZE: rx=%u / tx=%u\n", RINGSIZE_TO_RXSIZE(word), + seq_printf(seq, "RINGSIZE: rx=%u / tx=%u\n", RINGSIZE_TO_RXSIZE(word), RINGSIZE_TO_TXSIZE(word)); word = inw(iobase+VLSI_PIO_IRCFG); - out += sprintf(out, "IRCFG:%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + seq_printf(seq, "IRCFG:%s%s%s%s%s%s%s%s%s%s%s%s%s\n", (word&IRCFG_LOOP) ? " LOOP" : "", (word&IRCFG_ENTX) ? " ENTX" : "", (word&IRCFG_ENRX) ? " ENRX" : "", @@ -259,7 +252,7 @@ static int vlsi_proc_ndev(struct net_dev (word&IRCFG_TXPOL) ? " TXPOL" : "", (word&IRCFG_RXPOL) ? " RXPOL" : ""); word = inw(iobase+VLSI_PIO_IRENABLE); - out += sprintf(out, "IRENABLE:%s%s%s%s%s%s%s%s\n", + seq_printf(seq, "IRENABLE:%s%s%s%s%s%s%s%s\n", (word&IRENABLE_PHYANDCLOCK) ? " PHYANDCLOCK" : "", (word&IRENABLE_CFGER) ? " CFGERR" : "", (word&IRENABLE_FIR_ON) ? " FIR_ON" : "", @@ -269,22 +262,22 @@ static int vlsi_proc_ndev(struct net_dev (word&IRENABLE_ENRXST) ? " ENRXST" : "", (word&IRENABLE_CRC16_ON) ? " CRC16_ON" : ""); word = inw(iobase+VLSI_PIO_PHYCTL); - out += sprintf(out, "PHYCTL: baud-divisor=%u / pulsewidth=%u / preamble=%u\n", + seq_printf(seq, "PHYCTL: baud-divisor=%u / pulsewidth=%u / preamble=%u\n", (unsigned)PHYCTL_TO_BAUD(word), (unsigned)PHYCTL_TO_PLSWID(word), (unsigned)PHYCTL_TO_PREAMB(word)); word = inw(iobase+VLSI_PIO_NPHYCTL); - out += sprintf(out, "NPHYCTL: baud-divisor=%u / pulsewidth=%u / preamble=%u\n", + seq_printf(seq, "NPHYCTL: baud-divisor=%u / pulsewidth=%u / preamble=%u\n", (unsigned)PHYCTL_TO_BAUD(word), (unsigned)PHYCTL_TO_PLSWID(word), (unsigned)PHYCTL_TO_PREAMB(word)); word = inw(iobase+VLSI_PIO_MAXPKT); - out += sprintf(out, "MAXPKT: max. rx packet size = %u\n", word); + seq_printf(seq, "MAXPKT: max. rx packet size = %u\n", word); word = inw(iobase+VLSI_PIO_RCVBCNT) & RCVBCNT_MASK; - out += sprintf(out, "RCVBCNT: rx-fifo filling level = %u\n", word); + seq_printf(seq, "RCVBCNT: rx-fifo filling level = %u\n", word); - out += sprintf(out, "\nsw-state:\n"); - out += sprintf(out, "IrPHY setup: %d baud - %s encoding\n", idev->baud, + seq_printf(seq, "\nsw-state:\n"); + seq_printf(seq, "IrPHY setup: %d baud - %s encoding\n", idev->baud, (idev->mode==IFF_SIR)?"SIR":((idev->mode==IFF_MIR)?"MIR":"FIR")); do_gettimeofday(&now); if (now.tv_usec >= idev->last_rx.tv_usec) { @@ -295,216 +288,110 @@ static int vlsi_proc_ndev(struct net_dev delta2 = 1000000 + now.tv_usec - idev->last_rx.tv_usec; delta1 = 1; } - out += sprintf(out, "last rx: %lu.%06u sec\n", + seq_printf(seq, "last rx: %lu.%06u sec\n", now.tv_sec - idev->last_rx.tv_sec - delta1, delta2); - out += sprintf(out, "RX: packets=%lu / bytes=%lu / errors=%lu / dropped=%lu", + seq_printf(seq, "RX: packets=%lu / bytes=%lu / errors=%lu / dropped=%lu", idev->stats.rx_packets, idev->stats.rx_bytes, idev->stats.rx_errors, idev->stats.rx_dropped); - out += sprintf(out, " / overrun=%lu / length=%lu / frame=%lu / crc=%lu\n", + seq_printf(seq, " / overrun=%lu / length=%lu / frame=%lu / crc=%lu\n", idev->stats.rx_over_errors, idev->stats.rx_length_errors, idev->stats.rx_frame_errors, idev->stats.rx_crc_errors); - out += sprintf(out, "TX: packets=%lu / bytes=%lu / errors=%lu / dropped=%lu / fifo=%lu\n", + seq_printf(seq, "TX: packets=%lu / bytes=%lu / errors=%lu / dropped=%lu / fifo=%lu\n", idev->stats.tx_packets, idev->stats.tx_bytes, idev->stats.tx_errors, idev->stats.tx_dropped, idev->stats.tx_fifo_errors); - return out - buf; } -static int vlsi_proc_ring(struct vlsi_ring *r, char *buf, int len) +static void vlsi_proc_ring(struct seq_file *seq, struct vlsi_ring *r) { struct ring_descr *rd; unsigned i, j; int h, t; - char *out = buf; - if (len < 3000) - return 0; - - out += sprintf(out, "size %u / mask 0x%04x / len %u / dir %d / hw %p\n", + seq_printf(seq, "size %u / mask 0x%04x / len %u / dir %d / hw %p\n", r->size, r->mask, r->len, r->dir, r->rd[0].hw); h = atomic_read(&r->head) & r->mask; t = atomic_read(&r->tail) & r->mask; - out += sprintf(out, "head = %d / tail = %d ", h, t); + seq_printf(seq, "head = %d / tail = %d ", h, t); if (h == t) - out += sprintf(out, "(empty)\n"); + seq_printf(seq, "(empty)\n"); else { if (((t+1)&r->mask) == h) - out += sprintf(out, "(full)\n"); + seq_printf(seq, "(full)\n"); else - out += sprintf(out, "(level = %d)\n", ((unsigned)(t-h) & r->mask)); + seq_printf(seq, "(level = %d)\n", ((unsigned)(t-h) & r->mask)); rd = &r->rd[h]; j = (unsigned) rd_get_count(rd); - out += sprintf(out, "current: rd = %d / status = %02x / len = %u\n", + seq_printf(seq, "current: rd = %d / status = %02x / len = %u\n", h, (unsigned)rd_get_status(rd), j); if (j > 0) { - out += sprintf(out, " data:"); + seq_printf(seq, " data:"); if (j > 20) j = 20; for (i = 0; i < j; i++) - out += sprintf(out, " %02x", (unsigned)((unsigned char *)rd->buf)[i]); - out += sprintf(out, "\n"); + seq_printf(seq, " %02x", (unsigned)((unsigned char *)rd->buf)[i]); + seq_printf(seq, "\n"); } } for (i = 0; i < r->size; i++) { rd = &r->rd[i]; - out += sprintf(out, "> ring descr %u: ", i); - out += sprintf(out, "skb=%p data=%p hw=%p\n", rd->skb, rd->buf, rd->hw); - out += sprintf(out, " hw: status=%02x count=%u busaddr=0x%08x\n", + seq_printf(seq, "> ring descr %u: ", i); + seq_printf(seq, "skb=%p data=%p hw=%p\n", rd->skb, rd->buf, rd->hw); + seq_printf(seq, " hw: status=%02x count=%u busaddr=0x%08x\n", (unsigned) rd_get_status(rd), (unsigned) rd_get_count(rd), (unsigned) rd_get_addr(rd)); } - return out - buf; } -static int vlsi_proc_print(struct net_device *ndev, char *buf, int len) +static int vlsi_seq_show(struct seq_file *seq, void *v) { - vlsi_irda_dev_t *idev; + struct net_device *ndev = seq->private; + vlsi_irda_dev_t *idev = ndev->priv; unsigned long flags; - char *out = buf; - - if (!ndev || !ndev->priv) { - ERROR("%s: invalid ptr!\n", __FUNCTION__); - return 0; - } - idev = ndev->priv; - - if (len < 8000) - return 0; - - out += sprintf(out, "\n%s %s\n\n", DRIVER_NAME, DRIVER_VERSION); - out += sprintf(out, "clksrc: %s\n", + seq_printf(seq, "\n%s %s\n\n", DRIVER_NAME, DRIVER_VERSION); + seq_printf(seq, "clksrc: %s\n", (clksrc>=2) ? ((clksrc==3)?"40MHz XCLK":"48MHz XCLK") : ((clksrc==1)?"48MHz PLL":"autodetect")); - out += sprintf(out, "ringsize: tx=%d / rx=%d\n", + seq_printf(seq, "ringsize: tx=%d / rx=%d\n", ringsize[0], ringsize[1]); - out += sprintf(out, "sirpulse: %s\n", (sirpulse)?"3/16 bittime":"short"); - out += sprintf(out, "qos_mtt_bits: 0x%02x\n", (unsigned)qos_mtt_bits); + seq_printf(seq, "sirpulse: %s\n", (sirpulse)?"3/16 bittime":"short"); + seq_printf(seq, "qos_mtt_bits: 0x%02x\n", (unsigned)qos_mtt_bits); spin_lock_irqsave(&idev->lock, flags); if (idev->pdev != NULL) { - out += vlsi_proc_pdev(idev->pdev, out, len - (out-buf)); + vlsi_proc_pdev(seq, idev->pdev); + if (idev->pdev->current_state == 0) - out += vlsi_proc_ndev(ndev, out, len - (out-buf)); + vlsi_proc_ndev(seq, ndev); else - out += sprintf(out, "\nPCI controller down - resume_ok = %d\n", + seq_printf(seq, "\nPCI controller down - resume_ok = %d\n", idev->resume_ok); if (netif_running(ndev) && idev->rx_ring && idev->tx_ring) { - out += sprintf(out, "\n--------- RX ring -----------\n\n"); - out += vlsi_proc_ring(idev->rx_ring, out, len - (out-buf)); - out += sprintf(out, "\n--------- TX ring -----------\n\n"); - out += vlsi_proc_ring(idev->tx_ring, out, len - (out-buf)); + seq_printf(seq, "\n--------- RX ring -----------\n\n"); + vlsi_proc_ring(seq, idev->rx_ring); + seq_printf(seq, "\n--------- TX ring -----------\n\n"); + vlsi_proc_ring(seq, idev->tx_ring); } } - out += sprintf(out, "\n"); + seq_printf(seq, "\n"); spin_unlock_irqrestore(&idev->lock, flags); - return out - buf; -} - -struct vlsi_proc_data { - int size; - char *data; -}; - -/* most of the proc-fops code borrowed from usb/uhci */ - -static int vlsi_proc_open(struct inode *inode, struct file *file) -{ - const struct proc_dir_entry *pde = PDE(inode); - struct net_device *ndev = pde->data; - vlsi_irda_dev_t *idev = ndev->priv; - struct vlsi_proc_data *procdata; - const int maxdata = 8000; - - lock_kernel(); - procdata = kmalloc(sizeof(*procdata), GFP_KERNEL); - if (!procdata) { - unlock_kernel(); - return -ENOMEM; - } - procdata->data = kmalloc(maxdata, GFP_KERNEL); - if (!procdata->data) { - kfree(procdata); - unlock_kernel(); - return -ENOMEM; - } - - down(&idev->sem); - procdata->size = vlsi_proc_print(ndev, procdata->data, maxdata); - up(&idev->sem); - - file->private_data = procdata; - return 0; } -static loff_t vlsi_proc_lseek(struct file *file, loff_t off, int whence) -{ - struct vlsi_proc_data *procdata; - loff_t new = -1; - - lock_kernel(); - procdata = file->private_data; - - switch (whence) { - case 0: - new = off; - break; - case 1: - new = file->f_pos + off; - break; - } - if (new < 0 || new > procdata->size) { - unlock_kernel(); - return -EINVAL; - } - unlock_kernel(); - return (file->f_pos = new); -} - -static ssize_t vlsi_proc_read(struct file *file, char *buf, size_t nbytes, - loff_t *ppos) +static int vlsi_seq_open(struct inode *inode, struct file *file) { - struct vlsi_proc_data *procdata = file->private_data; - unsigned int pos; - unsigned int size; - - pos = *ppos; - size = procdata->size; - if (pos >= size) - return 0; - if (nbytes >= size) - nbytes = size; - if (pos + nbytes > size) - nbytes = size - pos; - - if (copy_to_user(buf, procdata->data + pos, nbytes)) - return -EFAULT; - - *ppos += nbytes; - - return nbytes; -} - -static int vlsi_proc_release(struct inode *inode, struct file *file) -{ - struct vlsi_proc_data *procdata = file->private_data; - - kfree(procdata->data); - kfree(procdata); - - return 0; + return single_open(file, vlsi_seq_show, PDE(inode)->data); } static struct file_operations vlsi_proc_fops = { - /* protect individual procdir file entry against rmmod */ - .owner = THIS_MODULE, - .open = vlsi_proc_open, - .llseek = vlsi_proc_lseek, - .read = vlsi_proc_read, - .release = vlsi_proc_release, + .owner = THIS_MODULE, + .open = vlsi_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, }; #define VLSI_PROC_FOPS (&vlsi_proc_fops) @@ -1787,13 +1674,12 @@ vlsi_irda_probe(struct pci_dev *pdev, co ent = create_proc_entry(ndev->name, S_IFREG|S_IRUGO, vlsi_proc_root); if (!ent) { WARNING("%s: failed to create proc entry\n", __FUNCTION__); - } - else { + } else { ent->data = ndev; ent->proc_fops = VLSI_PROC_FOPS; ent->size = 0; - idev->proc_entry = ent; } + idev->proc_entry = ent; } MESSAGE("%s: registered device %s\n", drivername, ndev->name); --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/net/irda/vlsi_ir.h 2004-04-18 22:25:24.828056040 -0700 @@ -0,0 +1,799 @@ + +/********************************************************************* + * + * vlsi_ir.h: VLSI82C147 PCI IrDA controller driver for Linux + * + * Version: 0.5 + * + * Copyright (c) 2001-2003 Martin Diehl + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + ********************************************************************/ + +#ifndef IRDA_VLSI_FIR_H +#define IRDA_VLSI_FIR_H + +/* ================================================================ + * compatibility stuff + */ + +/* definitions not present in pci_ids.h */ + +#ifndef PCI_CLASS_WIRELESS_IRDA +#define PCI_CLASS_WIRELESS_IRDA 0x0d00 +#endif + +#ifndef PCI_CLASS_SUBCLASS_MASK +#define PCI_CLASS_SUBCLASS_MASK 0xffff +#endif + +/* in recent 2.5 interrupt handlers have non-void return value */ +#ifndef IRQ_RETVAL +typedef void irqreturn_t; +#define IRQ_NONE +#define IRQ_HANDLED +#define IRQ_RETVAL(x) +#endif + +/* some stuff need to check kernelversion. Not all 2.5 stuff was present + * in early 2.5.x - the test is merely to separate 2.4 from 2.5 + */ +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) + +/* PDE() introduced in 2.5.4 */ +#ifdef CONFIG_PROC_FS +#define PDE(inode) ((inode)->u.generic_ip) +#endif + +/* irda crc16 calculation exported in 2.5.42 */ +#define irda_calc_crc16(fcs,buf,len) (GOOD_FCS) + +/* we use this for unified pci device name access */ +#define PCIDEV_NAME(pdev) ((pdev)->name) + +#else /* 2.5 or later */ + +/* recent 2.5/2.6 stores pci device names at varying places ;-) */ +#ifdef CONFIG_PCI_NAMES +/* human readable name */ +#define PCIDEV_NAME(pdev) ((pdev)->pretty_name) +#else +/* whatever we get from the associated struct device - bus:slot:dev.fn id */ +#define PCIDEV_NAME(pdev) (pci_name(pdev)) +#endif + +#endif + +/* ================================================================ */ + +/* non-standard PCI registers */ + +enum vlsi_pci_regs { + VLSI_PCI_CLKCTL = 0x40, /* chip clock input control */ + VLSI_PCI_MSTRPAGE = 0x41, /* addr [31:24] for all busmaster cycles */ + VLSI_PCI_IRMISC = 0x42 /* mainly legacy UART related */ +}; + +/* ------------------------------------------ */ + +/* VLSI_PCI_CLKCTL: Clock Control Register (u8, rw) */ + +/* Three possible clock sources: either on-chip 48MHz PLL or + * external clock applied to EXTCLK pin. External clock may + * be either 48MHz or 40MHz, which is indicated by XCKSEL. + * CLKSTP controls whether the selected clock source gets + * connected to the IrDA block. + * + * On my HP OB-800 the BIOS sets external 40MHz clock as source + * when IrDA enabled and I've never detected any PLL lock success. + * Apparently the 14.3...MHz OSC input required for the PLL to work + * is not connected and the 40MHz EXTCLK is provided externally. + * At least this is what makes the driver working for me. + */ + +enum vlsi_pci_clkctl { + + /* PLL control */ + + CLKCTL_PD_INV = 0x04, /* PD#: inverted power down signal, + * i.e. PLL is powered, if PD_INV set */ + CLKCTL_LOCK = 0x40, /* (ro) set, if PLL is locked */ + + /* clock source selection */ + + CLKCTL_EXTCLK = 0x20, /* set to select external clock input, not PLL */ + CLKCTL_XCKSEL = 0x10, /* set to indicate EXTCLK is 40MHz, not 48MHz */ + + /* IrDA block control */ + + CLKCTL_CLKSTP = 0x80, /* set to disconnect from selected clock source */ + CLKCTL_WAKE = 0x08 /* set to enable wakeup feature: whenever IR activity + * is detected, PD_INV gets set(?) and CLKSTP cleared */ +}; + +/* ------------------------------------------ */ + +/* VLSI_PCI_MSTRPAGE: Master Page Register (u8, rw) and busmastering stuff */ + +#define DMA_MASK_USED_BY_HW 0xffffffff +#define DMA_MASK_MSTRPAGE 0x00ffffff +#define MSTRPAGE_VALUE (DMA_MASK_MSTRPAGE >> 24) + + /* PCI busmastering is somewhat special for this guy - in short: + * + * We select to operate using fixed MSTRPAGE=0, use ISA DMA + * address restrictions to make the PCI BM api aware of this, + * but ensure the hardware is dealing with real 32bit access. + * + * In detail: + * The chip executes normal 32bit busmaster cycles, i.e. + * drives all 32 address lines. These addresses however are + * composed of [0:23] taken from various busaddr-pointers + * and [24:31] taken from the MSTRPAGE register in the VLSI82C147 + * config space. Therefore _all_ busmastering must be + * targeted to/from one single 16MB (busaddr-) superpage! + * The point is to make sure all the allocations for memory + * locations with busmaster access (ring descriptors, buffers) + * are indeed bus-mappable to the same 16MB range (for x86 this + * means they must reside in the same 16MB physical memory address + * range). The only constraint we have which supports "several objects + * mappable to common 16MB range" paradigma, is the old ISA DMA + * restriction to the first 16MB of physical address range. + * Hence the approach here is to enable PCI busmaster support using + * the correct 32bit dma-mask used by the chip. Afterwards the device's + * dma-mask gets restricted to 24bit, which must be honoured somehow by + * all allocations for memory areas to be exposed to the chip ... + * + * Note: + * Don't be surprised to get "Setting latency timer..." messages every + * time when PCI busmastering is enabled for the chip. + * The chip has its PCI latency timer RO fixed at 0 - which is not a + * problem here, because it is never requesting _burst_ transactions. + */ + +/* ------------------------------------------ */ + +/* VLSI_PCIIRMISC: IR Miscellaneous Register (u8, rw) */ + +/* legacy UART emulation - not used by this driver - would require: + * (see below for some register-value definitions) + * + * - IRMISC_UARTEN must be set to enable UART address decoding + * - IRMISC_UARTSEL configured + * - IRCFG_MASTER must be cleared + * - IRCFG_SIR must be set + * - IRENABLE_PHYANDCLOCK must be asserted 0->1 (and hence IRENABLE_SIR_ON) + */ + +enum vlsi_pci_irmisc { + + /* IR transceiver control */ + + IRMISC_IRRAIL = 0x40, /* (ro?) IR rail power indication (and control?) + * 0=3.3V / 1=5V. Probably set during power-on? + * unclear - not touched by driver */ + IRMISC_IRPD = 0x08, /* transceiver power down, if set */ + + /* legacy UART control */ + + IRMISC_UARTTST = 0x80, /* UART test mode - "always write 0" */ + IRMISC_UARTEN = 0x04, /* enable UART address decoding */ + + /* bits [1:0] IRMISC_UARTSEL to select legacy UART address */ + + IRMISC_UARTSEL_3f8 = 0x00, + IRMISC_UARTSEL_2f8 = 0x01, + IRMISC_UARTSEL_3e8 = 0x02, + IRMISC_UARTSEL_2e8 = 0x03 +}; + +/* ================================================================ */ + +/* registers mapped to 32 byte PCI IO space */ + +/* note: better access all registers at the indicated u8/u16 size + * although some of them contain only 1 byte of information. + * some of them (particaluarly PROMPT and IRCFG) ignore + * access when using the wrong addressing mode! + */ + +enum vlsi_pio_regs { + VLSI_PIO_IRINTR = 0x00, /* interrupt enable/request (u8, rw) */ + VLSI_PIO_RINGPTR = 0x02, /* rx/tx ring pointer (u16, ro) */ + VLSI_PIO_RINGBASE = 0x04, /* [23:10] of ring address (u16, rw) */ + VLSI_PIO_RINGSIZE = 0x06, /* rx/tx ring size (u16, rw) */ + VLSI_PIO_PROMPT = 0x08, /* triggers ring processing (u16, wo) */ + /* 0x0a-0x0f: reserved / duplicated UART regs */ + VLSI_PIO_IRCFG = 0x10, /* configuration select (u16, rw) */ + VLSI_PIO_SIRFLAG = 0x12, /* BOF/EOF for filtered SIR (u16, ro) */ + VLSI_PIO_IRENABLE = 0x14, /* enable and status register (u16, rw/ro) */ + VLSI_PIO_PHYCTL = 0x16, /* physical layer current status (u16, ro) */ + VLSI_PIO_NPHYCTL = 0x18, /* next physical layer select (u16, rw) */ + VLSI_PIO_MAXPKT = 0x1a, /* [11:0] max len for packet receive (u16, rw) */ + VLSI_PIO_RCVBCNT = 0x1c /* current receive-FIFO byte count (u16, ro) */ + /* 0x1e-0x1f: reserved / duplicated UART regs */ +}; + +/* ------------------------------------------ */ + +/* VLSI_PIO_IRINTR: Interrupt Register (u8, rw) */ + +/* enable-bits: + * 1 = enable / 0 = disable + * interrupt condition bits: + * set according to corresponding interrupt source + * (regardless of the state of the enable bits) + * enable bit status indicates whether interrupt gets raised + * write-to-clear + * note: RPKTINT and TPKTINT behave different in legacy UART mode (which we don't use :-) + */ + +enum vlsi_pio_irintr { + IRINTR_ACTEN = 0x80, /* activity interrupt enable */ + IRINTR_ACTIVITY = 0x40, /* activity monitor (traffic detected) */ + IRINTR_RPKTEN = 0x20, /* receive packet interrupt enable*/ + IRINTR_RPKTINT = 0x10, /* rx-packet transfered from fifo to memory finished */ + IRINTR_TPKTEN = 0x08, /* transmit packet interrupt enable */ + IRINTR_TPKTINT = 0x04, /* last bit of tx-packet+crc shifted to ir-pulser */ + IRINTR_OE_EN = 0x02, /* UART rx fifo overrun error interrupt enable */ + IRINTR_OE_INT = 0x01 /* UART rx fifo overrun error (read LSR to clear) */ +}; + +/* we use this mask to check whether the (shared PCI) interrupt is ours */ + +#define IRINTR_INT_MASK (IRINTR_ACTIVITY|IRINTR_RPKTINT|IRINTR_TPKTINT) + +/* ------------------------------------------ */ + +/* VLSI_PIO_RINGPTR: Ring Pointer Read-Back Register (u16, ro) */ + +/* _both_ ring pointers are indices relative to the _entire_ rx,tx-ring! + * i.e. the referenced descriptor is located + * at RINGBASE + PTR * sizeof(descr) for rx and tx + * therefore, the tx-pointer has offset MAX_RING_DESCR + */ + +#define MAX_RING_DESCR 64 /* tx, rx rings may contain up to 64 descr each */ + +#define RINGPTR_RX_MASK (MAX_RING_DESCR-1) +#define RINGPTR_TX_MASK ((MAX_RING_DESCR-1)<<8) + +#define RINGPTR_GET_RX(p) ((p)&RINGPTR_RX_MASK) +#define RINGPTR_GET_TX(p) (((p)&RINGPTR_TX_MASK)>>8) + +/* ------------------------------------------ */ + +/* VLSI_PIO_RINGBASE: Ring Pointer Base Address Register (u16, ro) */ + +/* Contains [23:10] part of the ring base (bus-) address + * which must be 1k-alinged. [31:24] is taken from + * VLSI_PCI_MSTRPAGE above. + * The controller initiates non-burst PCI BM cycles to + * fetch and update the descriptors in the ring. + * Once fetched, the descriptor remains cached onchip + * until it gets closed and updated due to the ring + * processing state machine. + * The entire ring area is split in rx and tx areas with each + * area consisting of 64 descriptors of 8 bytes each. + * The rx(tx) ring is located at ringbase+0 (ringbase+64*8). + */ + +#define BUS_TO_RINGBASE(p) (((p)>>10)&0x3fff) + +/* ------------------------------------------ */ + +/* VLSI_PIO_RINGSIZE: Ring Size Register (u16, rw) */ + +/* bit mask to indicate the ring size to be used for rx and tx. + * possible values encoded bits + * 4 0000 + * 8 0001 + * 16 0011 + * 32 0111 + * 64 1111 + * located at [15:12] for tx and [11:8] for rx ([7:0] unused) + * + * note: probably a good idea to have IRCFG_MSTR cleared when writing + * this so the state machines are stopped and the RINGPTR is reset! + */ + +#define SIZE_TO_BITS(num) ((((num)-1)>>2)&0x0f) +#define TX_RX_TO_RINGSIZE(tx,rx) ((SIZE_TO_BITS(tx)<<12)|(SIZE_TO_BITS(rx)<<8)) +#define RINGSIZE_TO_RXSIZE(rs) ((((rs)&0x0f00)>>6)+4) +#define RINGSIZE_TO_TXSIZE(rs) ((((rs)&0xf000)>>10)+4) + + +/* ------------------------------------------ */ + +/* VLSI_PIO_PROMPT: Ring Prompting Register (u16, write-to-start) */ + +/* writing any value kicks the ring processing state machines + * for both tx, rx rings as follows: + * - active rings (currently owning an active descriptor) + * ignore the prompt and continue + * - idle rings fetch the next descr from the ring and start + * their processing + */ + +/* ------------------------------------------ */ + +/* VLSI_PIO_IRCFG: IR Config Register (u16, rw) */ + +/* notes: + * - not more than one SIR/MIR/FIR bit must be set at any time + * - SIR, MIR, FIR and CRC16 select the configuration which will + * be applied on next 0->1 transition of IRENABLE_PHYANDCLOCK (see below). + * - besides allowing the PCI interface to execute busmaster cycles + * and therefore the ring SM to operate, the MSTR bit has side-effects: + * when MSTR is cleared, the RINGPTR's get reset and the legacy UART mode + * (in contrast to busmaster access mode) gets enabled. + * - clearing ENRX or setting ENTX while data is received may stall the + * receive fifo until ENRX reenabled _and_ another packet arrives + * - SIRFILT means the chip performs the required unwrapping of hardware + * headers (XBOF's, BOF/EOF) and un-escaping in the _receive_ direction. + * Only the resulting IrLAP payload is copied to the receive buffers - + * but with the 16bit FCS still encluded. Question remains, whether it + * was already checked or we should do it before passing the packet to IrLAP? + */ + +enum vlsi_pio_ircfg { + IRCFG_LOOP = 0x4000, /* enable loopback test mode */ + IRCFG_ENTX = 0x1000, /* transmit enable */ + IRCFG_ENRX = 0x0800, /* receive enable */ + IRCFG_MSTR = 0x0400, /* master enable */ + IRCFG_RXANY = 0x0200, /* receive any packet */ + IRCFG_CRC16 = 0x0080, /* 16bit (not 32bit) CRC select for MIR/FIR */ + IRCFG_FIR = 0x0040, /* FIR 4PPM encoding mode enable */ + IRCFG_MIR = 0x0020, /* MIR HDLC encoding mode enable */ + IRCFG_SIR = 0x0010, /* SIR encoding mode enable */ + IRCFG_SIRFILT = 0x0008, /* enable SIR decode filter (receiver unwrapping) */ + IRCFG_SIRTEST = 0x0004, /* allow SIR decode filter when not in SIR mode */ + IRCFG_TXPOL = 0x0002, /* invert tx polarity when set */ + IRCFG_RXPOL = 0x0001 /* invert rx polarity when set */ +}; + +/* ------------------------------------------ */ + +/* VLSI_PIO_SIRFLAG: SIR Flag Register (u16, ro) */ + +/* register contains hardcoded BOF=0xc0 at [7:0] and EOF=0xc1 at [15:8] + * which is used for unwrapping received frames in SIR decode-filter mode + */ + +/* ------------------------------------------ */ + +/* VLSI_PIO_IRENABLE: IR Enable Register (u16, rw/ro) */ + +/* notes: + * - IREN acts as gate for latching the configured IR mode information + * from IRCFG and IRPHYCTL when IREN=reset and applying them when + * IREN gets set afterwards. + * - ENTXST reflects IRCFG_ENTX + * - ENRXST = IRCFG_ENRX && (!IRCFG_ENTX || IRCFG_LOOP) + */ + +enum vlsi_pio_irenable { + IRENABLE_PHYANDCLOCK = 0x8000, /* enable IR phy and gate the mode config (rw) */ + IRENABLE_CFGER = 0x4000, /* mode configuration error (ro) */ + IRENABLE_FIR_ON = 0x2000, /* FIR on status (ro) */ + IRENABLE_MIR_ON = 0x1000, /* MIR on status (ro) */ + IRENABLE_SIR_ON = 0x0800, /* SIR on status (ro) */ + IRENABLE_ENTXST = 0x0400, /* transmit enable status (ro) */ + IRENABLE_ENRXST = 0x0200, /* Receive enable status (ro) */ + IRENABLE_CRC16_ON = 0x0100 /* 16bit (not 32bit) CRC enabled status (ro) */ +}; + +#define IRENABLE_MASK 0xff00 /* Read mask */ + +/* ------------------------------------------ */ + +/* VLSI_PIO_PHYCTL: IR Physical Layer Current Control Register (u16, ro) */ + +/* read-back of the currently applied physical layer status. + * applied from VLSI_PIO_NPHYCTL at rising edge of IRENABLE_PHYANDCLOCK + * contents identical to VLSI_PIO_NPHYCTL (see below) + */ + +/* ------------------------------------------ */ + +/* VLSI_PIO_NPHYCTL: IR Physical Layer Next Control Register (u16, rw) */ + +/* latched during IRENABLE_PHYANDCLOCK=0 and applied at 0-1 transition + * + * consists of BAUD[15:10], PLSWID[9:5] and PREAMB[4:0] bits defined as follows: + * + * SIR-mode: BAUD = (115.2kHz / baudrate) - 1 + * PLSWID = (pulsetime * freq / (BAUD+1)) - 1 + * where pulsetime is the requested IrPHY pulse width + * and freq is 8(16)MHz for 40(48)MHz primary input clock + * PREAMB: don't care for SIR + * + * The nominal SIR pulse width is 3/16 bit time so we have PLSWID=12 + * fixed for all SIR speeds at 40MHz input clock (PLSWID=24 at 48MHz). + * IrPHY also allows shorter pulses down to the nominal pulse duration + * at 115.2kbaud (minus some tolerance) which is 1.41 usec. + * Using the expression PLSWID = 12/(BAUD+1)-1 (multiplied by two for 48MHz) + * we get the minimum acceptable PLSWID values according to the VLSI + * specification, which provides 1.5 usec pulse width for all speeds (except + * for 2.4kbaud getting 6usec). This is fine with IrPHY v1.3 specs and + * reduces the transceiver power which drains the battery. At 9.6kbaud for + * example this amounts to more than 90% battery power saving! + * + * MIR-mode: BAUD = 0 + * PLSWID = 9(10) for 40(48) MHz input clock + * to get nominal MIR pulse width + * PREAMB = 1 + * + * FIR-mode: BAUD = 0 + * PLSWID: don't care + * PREAMB = 15 + */ + +#define PHYCTL_BAUD_SHIFT 10 +#define PHYCTL_BAUD_MASK 0xfc00 +#define PHYCTL_PLSWID_SHIFT 5 +#define PHYCTL_PLSWID_MASK 0x03e0 +#define PHYCTL_PREAMB_SHIFT 0 +#define PHYCTL_PREAMB_MASK 0x001f + +#define PHYCTL_TO_BAUD(bwp) (((bwp)&PHYCTL_BAUD_MASK)>>PHYCTL_BAUD_SHIFT) +#define PHYCTL_TO_PLSWID(bwp) (((bwp)&PHYCTL_PLSWID_MASK)>>PHYCTL_PLSWID_SHIFT) +#define PHYCTL_TO_PREAMB(bwp) (((bwp)&PHYCTL_PREAMB_MASK)>>PHYCTL_PREAMB_SHIFT) + +#define BWP_TO_PHYCTL(b,w,p) ((((b)<0) ? (tmp-1) : 0; +} + +#define PHYCTL_SIR(br,ws,cs) BWP_TO_PHYCTL(BAUD_BITS(br),calc_width_bits((br),(ws),(cs)),0) +#define PHYCTL_MIR(cs) BWP_TO_PHYCTL(0,((cs)?9:10),1) +#define PHYCTL_FIR BWP_TO_PHYCTL(0,0,15) + +/* quite ugly, I know. But implementing these calculations here avoids + * having magic numbers in the code and allows some playing with pulsewidths + * without risk to violate the standards. + * FWIW, here is the table for reference: + * + * baudrate BAUD min-PLSWID nom-PLSWID PREAMB + * 2400 47 0(0) 12(24) 0 + * 9600 11 0(0) 12(24) 0 + * 19200 5 1(2) 12(24) 0 + * 38400 2 3(6) 12(24) 0 + * 57600 1 5(10) 12(24) 0 + * 115200 0 11(22) 12(24) 0 + * MIR 0 - 9(10) 1 + * FIR 0 - 0 15 + * + * note: x(y) means x-value for 40MHz / y-value for 48MHz primary input clock + */ + +/* ------------------------------------------ */ + + +/* VLSI_PIO_MAXPKT: Maximum Packet Length register (u16, rw) */ + +/* maximum acceptable length for received packets */ + +/* hw imposed limitation - register uses only [11:0] */ +#define MAX_PACKET_LENGTH 0x0fff + +/* IrLAP I-field (apparently not defined elsewhere) */ +#define IRDA_MTU 2048 + +/* complete packet consists of A(1)+C(1)+I(<=IRDA_MTU) */ +#define IRLAP_SKB_ALLOCSIZE (1+1+IRDA_MTU) + +/* the buffers we use to exchange frames with the hardware need to be + * larger than IRLAP_SKB_ALLOCSIZE because we may have up to 4 bytes FCS + * appended and, in SIR mode, a lot of frame wrapping bytes. The worst + * case appears to be a SIR packet with I-size==IRDA_MTU and all bytes + * requiring to be escaped to provide transparency. Furthermore, the peer + * might ask for quite a number of additional XBOFs: + * up to 115+48 XBOFS 163 + * regular BOF 1 + * A-field 1 + * C-field 1 + * I-field, IRDA_MTU, all escaped 4096 + * FCS (16 bit at SIR, escaped) 4 + * EOF 1 + * AFAICS nothing in IrLAP guarantees A/C field not to need escaping + * (f.e. 0xc0/0xc1 - i.e. BOF/EOF - are legal values there) so in the + * worst case we have 4269 bytes total frame size. + * However, the VLSI uses 12 bits only for all buffer length values, + * which limits the maximum useable buffer size <= 4095. + * Note this is not a limitation in the receive case because we use + * the SIR filtering mode where the hw unwraps the frame and only the + * bare packet+fcs is stored into the buffer - in contrast to the SIR + * tx case where we have to pass frame-wrapped packets to the hw. + * If this would ever become an issue in real life, the only workaround + * I see would be using the legacy UART emulation in SIR mode. + */ + +#define XFER_BUF_SIZE MAX_PACKET_LENGTH + +/* ------------------------------------------ */ + +/* VLSI_PIO_RCVBCNT: Receive Byte Count Register (u16, ro) */ + +/* receive packet counter gets incremented on every non-filtered + * byte which was put in the receive fifo and reset for each + * new packet. Used to decide whether we are just in the middle + * of receiving + */ + +/* better apply the [11:0] mask when reading, as some docs say the + * reserved [15:12] would return 1 when reading - which is wrong AFAICS + */ +#define RCVBCNT_MASK 0x0fff + +/******************************************************************/ + +/* descriptors for rx/tx ring + * + * accessed by hardware - don't change! + * + * the descriptor is owned by hardware, when the ACTIVE status bit + * is set and nothing (besides reading status to test the bit) + * shall be done. The bit gets cleared by hw, when the descriptor + * gets closed. Premature reaping of descriptors owned be the chip + * can be achieved by disabling IRCFG_MSTR + * + * Attention: Writing addr overwrites status! + * + * ### FIXME: depends on endianess (but there ain't no non-i586 ob800 ;-) + */ + +struct ring_descr_hw { + volatile u16 rd_count; /* tx/rx count [11:0] */ + u16 reserved; + union { + u32 addr; /* [23:0] of the buffer's busaddress */ + struct { + u8 addr_res[3]; + volatile u8 status; /* descriptor status */ + } rd_s __attribute__((packed)); + } rd_u __attribute((packed)); +} __attribute__ ((packed)); + +#define rd_addr rd_u.addr +#define rd_status rd_u.rd_s.status + +/* ring descriptor status bits */ + +#define RD_ACTIVE 0x80 /* descriptor owned by hw (both TX,RX) */ + +/* TX ring descriptor status */ + +#define RD_TX_DISCRC 0x40 /* do not send CRC (for SIR) */ +#define RD_TX_BADCRC 0x20 /* force a bad CRC */ +#define RD_TX_PULSE 0x10 /* send indication pulse after this frame (MIR/FIR) */ +#define RD_TX_FRCEUND 0x08 /* force underrun */ +#define RD_TX_CLRENTX 0x04 /* clear ENTX after this frame */ +#define RD_TX_UNDRN 0x01 /* TX fifo underrun (probably PCI problem) */ + +/* RX ring descriptor status */ + +#define RD_RX_PHYERR 0x40 /* physical encoding error */ +#define RD_RX_CRCERR 0x20 /* CRC error (MIR/FIR) */ +#define RD_RX_LENGTH 0x10 /* frame exceeds buffer length */ +#define RD_RX_OVER 0x08 /* RX fifo overrun (probably PCI problem) */ +#define RD_RX_SIRBAD 0x04 /* EOF missing: BOF follows BOF (SIR, filtered) */ + +#define RD_RX_ERROR 0x7c /* any error in received frame */ + +/* the memory required to hold the 2 descriptor rings */ +#define HW_RING_AREA_SIZE (2 * MAX_RING_DESCR * sizeof(struct ring_descr_hw)) + +/******************************************************************/ + +/* sw-ring descriptors consists of a bus-mapped transfer buffer with + * associated skb and a pointer to the hw entry descriptor + */ + +struct ring_descr { + struct ring_descr_hw *hw; + struct sk_buff *skb; + void *buf; +}; + +/* wrappers for operations on hw-exposed ring descriptors + * access to the hw-part of the descriptors must use these. + */ + +static inline int rd_is_active(struct ring_descr *rd) +{ + return ((rd->hw->rd_status & RD_ACTIVE) != 0); +} + +static inline void rd_activate(struct ring_descr *rd) +{ + rd->hw->rd_status |= RD_ACTIVE; +} + +static inline void rd_set_status(struct ring_descr *rd, u8 s) +{ + rd->hw->rd_status = s; /* may pass ownership to the hardware */ +} + +static inline void rd_set_addr_status(struct ring_descr *rd, dma_addr_t a, u8 s) +{ + /* order is important for two reasons: + * - overlayed: writing addr overwrites status + * - we want to write status last so we have valid address in + * case status has RD_ACTIVE set + */ + + if ((a & ~DMA_MASK_MSTRPAGE)>>24 != MSTRPAGE_VALUE) { + ERROR("%s: pci busaddr inconsistency!\n", __FUNCTION__); + dump_stack(); + return; + } + + a &= DMA_MASK_MSTRPAGE; /* clear highbyte to make sure we won't write + * to status - just in case MSTRPAGE_VALUE!=0 + */ + rd->hw->rd_addr = cpu_to_le32(a); + wmb(); + rd_set_status(rd, s); /* may pass ownership to the hardware */ +} + +static inline void rd_set_count(struct ring_descr *rd, u16 c) +{ + rd->hw->rd_count = cpu_to_le16(c); +} + +static inline u8 rd_get_status(struct ring_descr *rd) +{ + return rd->hw->rd_status; +} + +static inline dma_addr_t rd_get_addr(struct ring_descr *rd) +{ + dma_addr_t a; + + a = le32_to_cpu(rd->hw->rd_addr); + return (a & DMA_MASK_MSTRPAGE) | (MSTRPAGE_VALUE << 24); +} + +static inline u16 rd_get_count(struct ring_descr *rd) +{ + return le16_to_cpu(rd->hw->rd_count); +} + +/******************************************************************/ + +/* sw descriptor rings for rx, tx: + * + * operations follow producer-consumer paradigm, with the hw + * in the middle doing the processing. + * ring size must be power of two. + * + * producer advances r->tail after inserting for processing + * consumer advances r->head after removing processed rd + * ring is empty if head==tail / full if (tail+1)==head + */ + +struct vlsi_ring { + struct pci_dev *pdev; + int dir; + unsigned len; + unsigned size; + unsigned mask; + atomic_t head, tail; + struct ring_descr *rd; +}; + +/* ring processing helpers */ + +static inline struct ring_descr *ring_last(struct vlsi_ring *r) +{ + int t; + + t = atomic_read(&r->tail) & r->mask; + return (((t+1) & r->mask) == (atomic_read(&r->head) & r->mask)) ? NULL : &r->rd[t]; +} + +static inline struct ring_descr *ring_put(struct vlsi_ring *r) +{ + atomic_inc(&r->tail); + return ring_last(r); +} + +static inline struct ring_descr *ring_first(struct vlsi_ring *r) +{ + int h; + + h = atomic_read(&r->head) & r->mask; + return (h == (atomic_read(&r->tail) & r->mask)) ? NULL : &r->rd[h]; +} + +static inline struct ring_descr *ring_get(struct vlsi_ring *r) +{ + atomic_inc(&r->head); + return ring_first(r); +} + +/******************************************************************/ + +/* our private compound VLSI-PCI-IRDA device information */ + +typedef struct vlsi_irda_dev { + struct pci_dev *pdev; + struct net_device_stats stats; + + struct irlap_cb *irlap; + + struct qos_info qos; + + unsigned mode; + int baud, new_baud; + + dma_addr_t busaddr; + void *virtaddr; + struct vlsi_ring *tx_ring, *rx_ring; + + struct timeval last_rx; + + spinlock_t lock; + struct semaphore sem; + + u32 cfg_space[64/sizeof(u32)]; + u8 resume_ok; + struct proc_dir_entry *proc_entry; + +} vlsi_irda_dev_t; + +/********************************************************/ + +/* the remapped error flags we use for returning from frame + * post-processing in vlsi_process_tx/rx() after it was completed + * by the hardware. These functions either return the >=0 number + * of transfered bytes in case of success or the negative (-) + * of the or'ed error flags. + */ + +#define VLSI_TX_DROP 0x0001 +#define VLSI_TX_FIFO 0x0002 + +#define VLSI_RX_DROP 0x0100 +#define VLSI_RX_OVER 0x0200 +#define VLSI_RX_LENGTH 0x0400 +#define VLSI_RX_FRAME 0x0800 +#define VLSI_RX_CRC 0x1000 + +/********************************************************/ + +#endif /* IRDA_VLSI_FIR_H */ + --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/net/irda/w83977af.h 2004-04-18 22:25:24.829055888 -0700 @@ -0,0 +1,53 @@ +#ifndef W83977AF_H +#define W83977AF_H + +#define W977_EFIO_BASE 0x370 +#define W977_EFIO2_BASE 0x3f0 +#define W977_DEVICE_IR 0x06 + + +/* + * Enter extended function mode + */ +static inline void w977_efm_enter(unsigned int efio) +{ + outb(0x87, efio); + outb(0x87, efio); +} + +/* + * Select a device to configure + */ + +static inline void w977_select_device(__u8 devnum, unsigned int efio) +{ + outb(0x07, efio); + outb(devnum, efio+1); +} + +/* + * Write a byte to a register + */ +static inline void w977_write_reg(__u8 reg, __u8 value, unsigned int efio) +{ + outb(reg, efio); + outb(value, efio+1); +} + +/* + * read a byte from a register + */ +static inline __u8 w977_read_reg(__u8 reg, unsigned int efio) +{ + outb(reg, efio); + return inb(efio+1); +} + +/* + * Exit extended function mode + */ +static inline void w977_efm_exit(unsigned int efio) +{ + outb(0xAA, efio); +} +#endif --- linux-2.6.6-rc1/drivers/net/irda/w83977af_ir.c 2004-03-10 20:41:28.000000000 -0800 +++ 25/drivers/net/irda/w83977af_ir.c 2004-04-18 22:25:24.830055736 -0700 @@ -58,8 +58,8 @@ #include #include #include -#include -#include +#include "w83977af.h" +#include "w83977af_ir.h" #ifdef CONFIG_ARCH_NETWINDER /* Adjust to NetWinder differences */ #undef CONFIG_NETWINDER_TX_DMA_PROBLEMS /* Not needed */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/net/irda/w83977af_ir.h 2004-04-18 22:25:24.831055584 -0700 @@ -0,0 +1,196 @@ +/********************************************************************* + * + * Filename: w83977af_ir.h + * Version: + * Description: + * Status: Experimental. + * Author: Paul VanderSpek + * Created at: Thu Nov 19 13:55:34 1998 + * Modified at: Tue Jan 11 13:08:19 2000 + * Modified by: Dag Brattli + * + * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * Neither Dag Brattli nor University of Tromsø admit liability nor + * provide warranty for any of this software. This material is + * provided "AS-IS" and at no charge. + * + ********************************************************************/ + +#ifndef W83977AF_IR_H +#define W83977AF_IR_H + +#include + +/* Flags for configuration register CRF0 */ +#define ENBNKSEL 0x01 +#define APEDCRC 0x02 +#define TXW4C 0x04 +#define RXW4C 0x08 + +/* Bank 0 */ +#define RBR 0x00 /* Receiver buffer register */ +#define TBR 0x00 /* Transmitter buffer register */ + +#define ICR 0x01 /* Interrupt configuration register */ +#define ICR_ERBRI 0x01 /* Receiver buffer register interrupt */ +#define ICR_ETBREI 0x02 /* Transeiver empty interrupt */ +#define ICR_EUSRI 0x04//* IR status interrupt */ +#define ICR_EHSRI 0x04 +#define ICR_ETXURI 0x04 /* Tx underrun */ +#define ICR_EDMAI 0x10 /* DMA interrupt */ +#define ICR_ETXTHI 0x20 /* Transmitter threshold interrupt */ +#define ICR_EFSFI 0x40 /* Frame status FIFO interrupt */ +#define ICR_ETMRI 0x80 /* Timer interrupt */ + +#define UFR 0x02 /* FIFO control register */ +#define UFR_EN_FIFO 0x01 /* Enable FIFO's */ +#define UFR_RXF_RST 0x02 /* Reset Rx FIFO */ +#define UFR_TXF_RST 0x04 /* Reset Tx FIFO */ +#define UFR_RXTL 0x80 /* Rx FIFO threshold (set to 16) */ +#define UFR_TXTL 0x20 /* Tx FIFO threshold (set to 17) */ + +#define ISR 0x02 /* Interrupt status register */ +#define ISR_RXTH_I 0x01 /* Receive threshold interrupt */ +#define ISR_TXEMP_I 0x02 /* Transmitter empty interrupt */ +#define ISR_FEND_I 0x04 +#define ISR_DMA_I 0x10 +#define ISR_TXTH_I 0x20 /* Transmitter threshold interrupt */ +#define ISR_FSF_I 0x40 +#define ISR_TMR_I 0x80 /* Timer interrupt */ + +#define UCR 0x03 /* Uart control register */ +#define UCR_DLS8 0x03 /* 8N1 */ + +#define SSR 0x03 /* Sets select register */ +#define SET0 UCR_DLS8 /* Make sure we keep 8N1 */ +#define SET1 (0x80|UCR_DLS8) /* Make sure we keep 8N1 */ +#define SET2 0xE0 +#define SET3 0xE4 +#define SET4 0xE8 +#define SET5 0xEC +#define SET6 0xF0 +#define SET7 0xF4 + +#define HCR 0x04 +#define HCR_MODE_MASK ~(0xD0) +#define HCR_SIR 0x60 +#define HCR_MIR_576 0x20 +#define HCR_MIR_1152 0x80 +#define HCR_FIR 0xA0 +#define HCR_EN_DMA 0x04 +#define HCR_EN_IRQ 0x08 +#define HCR_TX_WT 0x08 + +#define USR 0x05 /* IR status register */ +#define USR_RDR 0x01 /* Receive data ready */ +#define USR_TSRE 0x40 /* Transmitter empty? */ + +#define AUDR 0x07 +#define AUDR_SFEND 0x08 /* Set a frame end */ +#define AUDR_RXBSY 0x20 /* Rx busy */ +#define AUDR_UNDR 0x40 /* Transeiver underrun */ + +/* Set 2 */ +#define ABLL 0x00 /* Advanced baud rate divisor latch (low byte) */ +#define ABHL 0x01 /* Advanced baud rate divisor latch (high byte) */ + +#define ADCR1 0x02 +#define ADCR1_ADV_SL 0x01 +#define ADCR1_D_CHSW 0x08 /* the specs are wrong. its bit 3, not 4 */ +#define ADCR1_DMA_F 0x02 + +#define ADCR2 0x04 +#define ADCR2_TXFS32 0x01 +#define ADCR2_RXFS32 0x04 + +#define RXFDTH 0x07 + +/* Set 3 */ +#define AUID 0x00 + +/* Set 4 */ +#define TMRL 0x00 /* Timer value register (low byte) */ +#define TMRH 0x01 /* Timer value register (high byte) */ + +#define IR_MSL 0x02 /* Infrared mode select */ +#define IR_MSL_EN_TMR 0x01 /* Enable timer */ + +#define TFRLL 0x04 /* Transmitter frame length (low byte) */ +#define TFRLH 0x05 /* Transmitter frame length (high byte) */ +#define RFRLL 0x06 /* Receiver frame length (low byte) */ +#define RFRLH 0x07 /* Receiver frame length (high byte) */ + +/* Set 5 */ + +#define FS_FO 0x05 /* Frame status FIFO */ +#define FS_FO_FSFDR 0x80 /* Frame status FIFO data ready */ +#define FS_FO_LST_FR 0x40 /* Frame lost */ +#define FS_FO_MX_LEX 0x10 /* Max frame len exceeded */ +#define FS_FO_PHY_ERR 0x08 /* Physical layer error */ +#define FS_FO_CRC_ERR 0x04 +#define FS_FO_RX_OV 0x02 /* Receive overrun */ +#define FS_FO_FSF_OV 0x01 /* Frame status FIFO overrun */ +#define FS_FO_ERR_MSK 0x5f /* Error mask */ + +#define RFLFL 0x06 +#define RFLFH 0x07 + +/* Set 6 */ +#define IR_CFG2 0x00 +#define IR_CFG2_DIS_CRC 0x02 + +/* Set 7 */ +#define IRM_CR 0x07 /* Infrared module control register */ +#define IRM_CR_IRX_MSL 0x40 +#define IRM_CR_AF_MNT 0x80 /* Automatic format */ + +/* For storing entries in the status FIFO */ +struct st_fifo_entry { + int status; + int len; +}; + +struct st_fifo { + struct st_fifo_entry entries[10]; + int head; + int tail; + int len; +}; + +/* Private data for each instance */ +struct w83977af_ir { + struct st_fifo st_fifo; + + int tx_buff_offsets[10]; /* Offsets between frames in tx_buff */ + int tx_len; /* Number of frames in tx_buff */ + + struct net_device *netdev; /* Yes! we are some kind of netdevice */ + struct net_device_stats stats; + + struct irlap_cb *irlap; /* The link layer we are binded to */ + struct qos_info qos; /* QoS capabilities for this device */ + + chipio_t io; /* IrDA controller information */ + iobuff_t tx_buff; /* Transmit buffer */ + iobuff_t rx_buff; /* Receive buffer */ + + /* Note : currently locking is *very* incomplete, but this + * will get you started. Check in nsc-ircc.c for a proper + * locking strategy. - Jean II */ + spinlock_t lock; /* For serializing operations */ + + __u32 new_speed; +}; + +static inline void switch_bank( int iobase, int set) +{ + outb(set, iobase+SSR); +} + +#endif --- linux-2.6.6-rc1/drivers/net/ixgb/ixgb.h 2003-06-14 12:18:07.000000000 -0700 +++ 25/drivers/net/ixgb/ixgb.h 2004-04-18 22:25:24.831055584 -0700 @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -65,8 +66,6 @@ struct ixgb_adapter; #define BAR_0 0 #define BAR_1 1 #define BAR_5 5 -#define PCI_DMA_64BIT 0xffffffffffffffffULL -#define PCI_DMA_32BIT 0x00000000ffffffffULL #include "ixgb_hw.h" #include "ixgb_ee.h" --- linux-2.6.6-rc1/drivers/net/ixgb/ixgb_main.c 2004-02-17 20:48:43.000000000 -0800 +++ 25/drivers/net/ixgb/ixgb_main.c 2004-04-18 22:25:24.833055280 -0700 @@ -308,10 +308,10 @@ ixgb_probe(struct pci_dev *pdev, const s return i; } - if (!(i = pci_set_dma_mask(pdev, PCI_DMA_64BIT))) { + if (!(i = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) { pci_using_dac = 1; } else { - if ((i = pci_set_dma_mask(pdev, PCI_DMA_32BIT))) { + if ((i = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) { IXGB_ERR("No usable DMA configuration, aborting\n"); return i; } --- linux-2.6.6-rc1/drivers/net/Kconfig 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/Kconfig 2004-04-18 22:25:24.787062272 -0700 @@ -21,10 +21,6 @@ config NETDEVICES If unsure, say Y. -if NETDEVICES - source "drivers/net/arcnet/Kconfig" -endif - config DUMMY tristate "Dummy net driver support" depends on NETDEVICES @@ -155,6 +151,10 @@ config NET_SB1000 If you don't have this card, of course say N. +if NETDEVICES + source "drivers/net/arcnet/Kconfig" +endif + # # Ethernet # @@ -1178,6 +1178,17 @@ config IBMLANA boards with this driver should be possible, but has not been tested up to now due to lack of hardware. +config IBMVETH + tristate "IBM LAN Virtual Ethernet support" + depends on NETDEVICES && NET_ETHERNET && PPC_PSERIES + ---help--- + This driver supports virtual ethernet adapters on newer IBM iSeries + and pSeries systems. + + To compile this driver as a module, choose M here and read + . The module will + be called ibmveth. + config NET_PCI bool "EISA, VLB, PCI and on board controllers" depends on NET_ETHERNET && (ISA || EISA || PCI) @@ -1219,6 +1230,9 @@ config AMD8111_ETH To compile this driver as a module, choose M here and read . The module will be called amd8111e. +config AMD8111E_NAPI + bool "Enable NAPI support" + depends on AMD8111_ETH config ADAPTEC_STARFIRE tristate "Adaptec Starfire/DuraLAN support" @@ -2105,6 +2119,17 @@ config S2IO_NAPI endmenu +source "drivers/net/tokenring/Kconfig" + +source "drivers/net/wireless/Kconfig" + +source "drivers/net/pcmcia/Kconfig" + +source "drivers/net/wan/Kconfig" + +source "drivers/atm/Kconfig" + +source "drivers/s390/net/Kconfig" config ISERIES_VETH tristate "iSeries Virtual Ethernet driver support" @@ -2172,17 +2197,6 @@ config HIPPI under Linux, say Y here (you must also remember to enable the driver for your HIPPI card below). Most people will say N here. -config IBMVETH - tristate "IBM LAN Virtual Ethernet support" - depends on NETDEVICES && NET_ETHERNET && PPC_PSERIES - ---help--- - This driver supports virtual ethernet adapters on newer IBM iSeries - and pSeries systems. - - To compile this driver as a module, choose M here and read - . The module will - be called ibmveth. - config ROADRUNNER tristate "Essential RoadRunner HIPPI PCI adapter support (EXPERIMENTAL)" depends on HIPPI && PCI @@ -2440,10 +2454,6 @@ config SLIP_MODE_SLIP6 end of the link as well. It's good enough, for example, to run IP over the async ports of a Camtec JNT Pad. If unsure, say N. -source "drivers/net/wireless/Kconfig" - -source "drivers/net/tokenring/Kconfig" - config NET_FC bool "Fibre Channel driver support" depends on NETDEVICES && SCSI && PCI @@ -2503,11 +2513,3 @@ config NETCONSOLE ---help--- If you want to log kernel messages over the network, enable this. See Documentation/networking/netconsole.txt for details. - -source "drivers/net/wan/Kconfig" - -source "drivers/net/pcmcia/Kconfig" - -source "drivers/atm/Kconfig" - -source "drivers/s390/net/Kconfig" --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/drivers/net/kgdb_eth.c 2004-04-18 22:25:31.165092664 -0700 @@ -0,0 +1,132 @@ +/* + * Network interface GDB stub + * + * Written by San Mehat (nettwerk@biodome.org) + * Based upon 'gdbserial' by David Grothe (dave@gcom.com) + * and Scott Foehner (sfoehner@engr.sgi.com) + * + * Twiddled for 2.6 by Robert Walsh + * and wangdi . + * + * Refactored for netpoll API by Matt Mackall + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define IN_BUF_SIZE 512 /* power of 2, please */ +#define OUT_BUF_SIZE 256 + +static char in_buf[IN_BUF_SIZE], out_buf[OUT_BUF_SIZE]; +static int in_head, in_tail, out_count; +static atomic_t in_count; +int kgdboe = 0; /* Default to tty mode */ + +extern void set_debug_traps(void); +extern void breakpoint(void); +static void rx_hook(struct netpoll *np, int port, char *msg, int len); + +static struct netpoll np = { + .name = "kgdboe", + .dev_name = "eth0", + .rx_hook = rx_hook, + .local_port = 6443, + .remote_port = 6442, + .remote_mac = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, +}; +static int configured; + +int eth_getDebugChar(void) +{ + int chr; + + while (atomic_read(&in_count) == 0) + netpoll_poll(&np); + + chr = in_buf[in_tail++]; + in_tail &= (IN_BUF_SIZE - 1); + atomic_dec(&in_count); + return chr; +} + +void eth_flushDebugChar(void) +{ + if(out_count && np.dev) { + netpoll_send_udp(&np, out_buf, out_count); + out_count = 0; + } +} + +void eth_putDebugChar(int chr) +{ + out_buf[out_count++] = chr; + if(out_count == OUT_BUF_SIZE) + eth_flushDebugChar(); +} + +static void rx_hook(struct netpoll *np, int port, char *msg, int len) +{ + int i; + + np->remote_port = port; + + /* Is this gdb trying to attach? */ + if (!netpoll_trap() && len == 8 && !strncmp(msg, "$Hc-1#09", 8)) + kgdb_schedule_breakpoint(); + + for (i = 0; i < len; i++) { + if (msg[i] == 3) + kgdb_schedule_breakpoint(); + + if (atomic_read(&in_count) >= IN_BUF_SIZE) { + /* buffer overflow, clear it */ + in_head = in_tail = 0; + atomic_set(&in_count, 0); + break; + } + in_buf[in_head++] = msg[i]; + in_head &= (IN_BUF_SIZE - 1); + atomic_inc(&in_count); + } +} + +static int option_setup(char *opt) +{ + configured = !netpoll_parse_options(&np, opt); + return 0; +} +__setup("kgdboe=", option_setup); + +static int init_kgdboe(void) +{ +#ifdef CONFIG_SMP + if (num_online_cpus() > CONFIG_NO_KGDB_CPUS) { + printk("kgdb: too manu cpus. Cannot enable debugger with more than %d cpus\n", CONFIG_NO_KGDB_CPUS); + return -1; + } +#endif + + set_debug_traps(); + + if(!configured || netpoll_setup(&np)) + return 1; + + kgdboe = 1; + printk(KERN_INFO "kgdb: debugging over ethernet enabled\n"); + + return 0; +} + +module_init(init_kgdboe); --- linux-2.6.6-rc1/drivers/net/lasi_82596.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/lasi_82596.c 2004-04-18 22:26:02.380347224 -0700 @@ -87,7 +87,6 @@ #include #include #include -#include #include #include #include --- linux-2.6.6-rc1/drivers/net/macsonic.c 2004-02-17 20:48:43.000000000 -0800 +++ 25/drivers/net/macsonic.c 2004-04-18 22:26:02.381347072 -0700 @@ -53,7 +53,6 @@ #include #include #include -#include #define SREGS_PAD(n) u16 n; --- linux-2.6.6-rc1/drivers/net/Makefile 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/Makefile 2004-04-18 22:25:30.863138568 -0700 @@ -189,4 +189,6 @@ obj-$(CONFIG_NET_TULIP) += tulip/ obj-$(CONFIG_HAMRADIO) += hamradio/ obj-$(CONFIG_IRDA) += irda/ +# Must come after all NICs that might use them obj-$(CONFIG_NETCONSOLE) += netconsole.o +obj-$(CONFIG_KGDB) += kgdb_eth.o --- linux-2.6.6-rc1/drivers/net/natsemi.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/natsemi.c 2004-04-18 22:25:24.835054976 -0700 @@ -387,7 +387,7 @@ enum register_offsets { IntrStatus = 0x10, IntrMask = 0x14, IntrEnable = 0x18, - IntrHoldoff = 0x16, /* DP83816 only */ + IntrHoldoff = 0x1C, /* DP83816 only */ TxRingPtr = 0x20, TxConfig = 0x24, RxRingPtr = 0x30, --- linux-2.6.6-rc1/drivers/net/pcmcia/3c574_cs.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/pcmcia/3c574_cs.c 2004-04-18 22:25:44.164116512 -0700 @@ -384,6 +384,8 @@ static void tc574_detach(dev_link_t *lin #define CS_CHECK(fn, ret) \ do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) +static char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; + static void tc574_config(dev_link_t *link) { client_handle_t handle = link->handle; @@ -396,6 +398,7 @@ static void tc574_config(dev_link_t *lin ioaddr_t ioaddr; u16 *phys_addr; char *cardname; + union wn3_config config; phys_addr = (u16 *)dev->dev_addr; @@ -431,15 +434,7 @@ static void tc574_config(dev_link_t *lin dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - if (register_netdev(dev) != 0) { - printk(KERN_NOTICE "3c574_cs: register_netdev() failed\n"); - goto failed; - } - ioaddr = dev->base_addr; - strcpy(lp->node.dev_name, dev->name); - link->dev = &lp->node; - link->state &= ~DEV_CONFIG_PENDING; /* The 3c574 normally uses an EEPROM for configuration info, including the hardware address. The future products may include a modem chip @@ -467,24 +462,14 @@ static void tc574_config(dev_link_t *lin } else cardname = "3Com 3c574"; - printk(KERN_INFO "%s: %s at io %#3lx, irq %d, hw_addr ", - dev->name, cardname, dev->base_addr, dev->irq); - - for (i = 0; i < 6; i++) - printk("%02X%s", dev->dev_addr[i], ((i<5) ? ":" : ".\n")); - { - u_char mcr, *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; - union wn3_config config; + u_char mcr; outw(2<<11, ioaddr + RunnerRdCtrl); mcr = inb(ioaddr + 2); outw(0<<11, ioaddr + RunnerRdCtrl); printk(KERN_INFO " ASIC rev %d,", mcr>>3); EL3WINDOW(3); config.i = inl(ioaddr + Wn3_Config); - printk(" %dK FIFO split %s Rx:Tx, %sMII interface.\n", - 8 << config.u.ram_size, ram_split[config.u.ram_split], - config.u.autoselect ? "autoselect " : ""); lp->default_media = config.u.xcvr; lp->autoselect = config.u.autoselect; } @@ -531,6 +516,25 @@ static void tc574_config(dev_link_t *lin } } + link->state &= ~DEV_CONFIG_PENDING; + link->dev = &lp->node; + + if (register_netdev(dev) != 0) { + printk(KERN_NOTICE "3c574_cs: register_netdev() failed\n"); + link->dev = NULL; + goto failed; + } + + strcpy(lp->node.dev_name, dev->name); + + printk(KERN_INFO "%s: %s at io %#3lx, irq %d, hw_addr ", + dev->name, cardname, dev->base_addr, dev->irq); + for (i = 0; i < 6; i++) + printk("%02X%s", dev->dev_addr[i], ((i<5) ? ":" : ".\n")); + printk(" %dK FIFO split %s Rx:Tx, %sMII interface.\n", + 8 << config.u.ram_size, ram_split[config.u.ram_split], + config.u.autoselect ? "autoselect " : ""); + return; cs_failed: --- linux-2.6.6-rc1/drivers/net/pcmcia/3c589_cs.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/pcmcia/3c589_cs.c 2004-04-18 22:25:44.165116360 -0700 @@ -308,7 +308,7 @@ static void tc589_config(dev_link_t *lin tuple_t tuple; cisparse_t parse; u16 buf[32], *phys_addr; - int last_fn, last_ret, i, j, multi = 0; + int last_fn, last_ret, i, j, multi = 0, fifo; ioaddr_t ioaddr; char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; @@ -357,11 +357,6 @@ static void tc589_config(dev_link_t *lin dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - if (register_netdev(dev) != 0) { - printk(KERN_ERR "3c589_cs: register_netdev() failed\n"); - goto failed; - } - ioaddr = dev->base_addr; EL3WINDOW(0); @@ -382,13 +377,10 @@ static void tc589_config(dev_link_t *lin } } - strcpy(lp->node.dev_name, dev->name); - link->dev = &lp->node; - link->state &= ~DEV_CONFIG_PENDING; - /* The address and resource configuration register aren't loaded from the EEPROM and *must* be set to 0 and IRQ3 for the PCMCIA version. */ outw(0x3f00, ioaddr + 8); + fifo = inl(ioaddr); /* The if_port symbol can be set when the module is loaded */ if ((if_port >= 0) && (if_port <= 3)) @@ -396,14 +388,24 @@ static void tc589_config(dev_link_t *lin else printk(KERN_ERR "3c589_cs: invalid if_port requested\n"); + link->dev = &lp->node; + link->state &= ~DEV_CONFIG_PENDING; + + if (register_netdev(dev) != 0) { + printk(KERN_ERR "3c589_cs: register_netdev() failed\n"); + link->dev = NULL; + goto failed; + } + + strcpy(lp->node.dev_name, dev->name); + printk(KERN_INFO "%s: 3Com 3c%s, io %#3lx, irq %d, hw_addr ", dev->name, (multi ? "562" : "589"), dev->base_addr, dev->irq); for (i = 0; i < 6; i++) printk("%02X%s", dev->dev_addr[i], ((i<5) ? ":" : "\n")); - i = inl(ioaddr); printk(KERN_INFO " %dK FIFO split %s Rx:Tx, %s xcvr\n", - (i & 7) ? 32 : 8, ram_split[(i >> 16) & 3], + (fifo & 7) ? 32 : 8, ram_split[(fifo >> 16) & 3], if_names[dev->if_port]); return; --- linux-2.6.6-rc1/drivers/net/pcmcia/axnet_cs.c 2004-02-17 20:48:43.000000000 -0800 +++ 25/drivers/net/pcmcia/axnet_cs.c 2004-04-18 22:25:44.167116056 -0700 @@ -430,19 +430,11 @@ static void axnet_config(dev_link_t *lin ei_status.block_input = &block_input; ei_status.block_output = &block_output; - strcpy(info->node.dev_name, dev->name); - if (inb(dev->base_addr + AXNET_TEST) != 0) info->flags |= IS_AX88790; else info->flags |= IS_AX88190; - printk(KERN_INFO "%s: Asix AX88%d90: io %#3lx, irq %d, hw_addr ", - dev->name, ((info->flags & IS_AX88790) ? 7 : 1), - dev->base_addr, dev->irq); - for (i = 0; i < 6; i++) - printk("%02X%s", dev->dev_addr[i], ((i<5) ? ":" : "\n")); - if (info->flags & IS_AX88790) outb(0x10, dev->base_addr + AXNET_GPIO); /* select Internal PHY */ @@ -463,19 +455,27 @@ static void axnet_config(dev_link_t *lin } info->phy_id = (i < 32) ? i : -1; - if (i < 32) { - DEBUG(0, " MII transceiver at index %d, status %x.\n", i, j); - } else { - printk(KERN_NOTICE " No MII transceivers found!\n"); - } + link->dev = &info->node; + link->state &= ~DEV_CONFIG_PENDING; if (register_netdev(dev) != 0) { printk(KERN_NOTICE "axnet_cs: register_netdev() failed\n"); + link->dev = NULL; goto failed; } - link->dev = &info->node; - link->state &= ~DEV_CONFIG_PENDING; + strcpy(info->node.dev_name, dev->name); + + printk(KERN_INFO "%s: Asix AX88%d90: io %#3lx, irq %d, hw_addr ", + dev->name, ((info->flags & IS_AX88790) ? 7 : 1), + dev->base_addr, dev->irq); + for (i = 0; i < 6; i++) + printk("%02X%s", dev->dev_addr[i], ((i<5) ? ":" : "\n")); + if (info->phy_id != -1) { + DEBUG(0, " MII transceiver at index %d, status %x.\n", info->phy_id, j); + } else { + printk(KERN_NOTICE " No MII transceivers found!\n"); + } return; cs_failed: --- linux-2.6.6-rc1/drivers/net/pcmcia/com20020_cs.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/pcmcia/com20020_cs.c 2004-04-18 22:25:44.168115904 -0700 @@ -141,7 +141,6 @@ static dev_link_t *dev_list; typedef struct com20020_dev_t { struct net_device *dev; - int dev_configured; dev_node_t node; } com20020_dev_t; @@ -277,13 +276,10 @@ static void com20020_detach(dev_link_t * dev = info->dev; if (dev) { - if (info->dev_configured) + if (link->dev) { DEBUG(1,"unregister...\n"); - if (netif_running(dev)) - dev->stop(dev); - unregister_netdev(dev); /* @@ -398,17 +394,18 @@ static void com20020_config(dev_link_t * lp->card_name = "PCMCIA COM20020"; lp->card_flags = ARC_CAN_10MBIT; /* pretend all of them can 10Mbit */ + link->dev = &info->node; + link->state &= ~DEV_CONFIG_PENDING; + i = com20020_found(dev, 0); /* calls register_netdev */ if (i != 0) { DEBUG(1,KERN_NOTICE "com20020_cs: com20020_found() failed\n"); + link->dev = NULL; goto failed; } - info->dev_configured = 1; strcpy(info->node.dev_name, dev->name); - link->dev = &info->node; - link->state &= ~DEV_CONFIG_PENDING; DEBUG(1,KERN_INFO "%s: port %#3lx, irq %d\n", dev->name, dev->base_addr, dev->irq); --- linux-2.6.6-rc1/drivers/net/pcmcia/fmvj18x_cs.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/pcmcia/fmvj18x_cs.c 2004-04-18 22:25:44.169115752 -0700 @@ -510,10 +510,6 @@ static void fmvj18x_config(dev_link_t *l CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link->handle, &link->conf)); dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - if (register_netdev(dev) != 0) { - printk(KERN_NOTICE "fmvj18x_cs: register_netdev() failed\n"); - goto failed; - } if (link->io.BasePort2 != 0) fmvj18x_setup_mfc(link); @@ -575,7 +571,6 @@ static void fmvj18x_config(dev_link_t *l /* Read MACID from Buggy CIS */ if (fmvj18x_get_hwinfo(link, tuple.TupleData) == -1) { printk(KERN_NOTICE "fmvj18x_cs: unable to read hardware net address.\n"); - unregister_netdev(dev); goto failed; } for (i = 0 ; i < 6; i++) { @@ -592,10 +587,18 @@ static void fmvj18x_config(dev_link_t *l break; } - strcpy(lp->node.dev_name, dev->name); + lp->cardtype = cardtype; link->dev = &lp->node; + link->state &= ~DEV_CONFIG_PENDING; + + if (register_netdev(dev) != 0) { + printk(KERN_NOTICE "fmvj18x_cs: register_netdev() failed\n"); + link->dev = NULL; + goto failed; + } + + strcpy(lp->node.dev_name, dev->name); - lp->cardtype = cardtype; /* print current configuration */ printk(KERN_INFO "%s: %s, sram %s, port %#3lx, irq %d, hw_addr ", dev->name, card_name, sram_config == 0 ? "4K TX*2" : "8K TX*2", @@ -603,7 +606,6 @@ static void fmvj18x_config(dev_link_t *l for (i = 0; i < 6; i++) printk("%02X%s", dev->dev_addr[i], ((i<5) ? ":" : "\n")); - link->state &= ~DEV_CONFIG_PENDING; return; cs_failed: --- linux-2.6.6-rc1/drivers/net/pcmcia/ibmtr_cs.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/pcmcia/ibmtr_cs.c 2004-04-18 22:25:44.170115600 -0700 @@ -317,13 +317,10 @@ static void ibmtr_config(dev_link_t *lin /* Try PRIMARY card at 0xA20-0xA23 */ link->io.BasePort1 = 0xA20; i = pcmcia_request_io(link->handle, &link->io); - if (i == CS_SUCCESS) { - memcpy(info->node.dev_name, "tr0\0", 4); - } else { + if (i != CS_SUCCESS) { /* Couldn't get 0xA20-0xA23. Try ALTERNATE at 0xA24-0xA27. */ link->io.BasePort1 = 0xA24; CS_CHECK(RequestIO, pcmcia_request_io(link->handle, &link->io)); - memcpy(info->node.dev_name, "tr1\0", 4); } dev->base_addr = link->io.BasePort1; @@ -367,15 +364,17 @@ static void ibmtr_config(dev_link_t *lin Adapters Technical Reference" SC30-3585 for this info. */ ibmtr_hw_setup(dev, mmiobase); + link->dev = &info->node; + link->state &= ~DEV_CONFIG_PENDING; + i = ibmtr_probe_card(dev); - if (i != 0) { printk(KERN_NOTICE "ibmtr_cs: register_netdev() failed\n"); + link->dev = NULL; goto failed; } - link->dev = &info->node; - link->state &= ~DEV_CONFIG_PENDING; + strcpy(info->node.dev_name, dev->name); printk(KERN_INFO "%s: port %#3lx, irq %d,", dev->name, dev->base_addr, dev->irq); --- linux-2.6.6-rc1/drivers/net/pcmcia/nmclan_cs.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/pcmcia/nmclan_cs.c 2004-04-18 22:25:44.171115448 -0700 @@ -734,11 +734,6 @@ static void nmclan_config(dev_link_t *li CS_CHECK(RequestConfiguration, pcmcia_request_configuration(handle, &link->conf)); dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - i = register_netdev(dev); - if (i != 0) { - printk(KERN_NOTICE "nmclan_cs: register_netdev() failed\n"); - goto failed; - } ioaddr = dev->base_addr; @@ -777,10 +772,18 @@ static void nmclan_config(dev_link_t *li else printk(KERN_NOTICE "nmclan_cs: invalid if_port requested\n"); - strcpy(lp->node.dev_name, dev->name); link->dev = &lp->node; link->state &= ~DEV_CONFIG_PENDING; + i = register_netdev(dev); + if (i != 0) { + printk(KERN_NOTICE "nmclan_cs: register_netdev() failed\n"); + link->dev = NULL; + goto failed; + } + + strcpy(lp->node.dev_name, dev->name); + printk(KERN_INFO "%s: nmclan: port %#3lx, irq %d, %s port, hw_addr ", dev->name, dev->base_addr, dev->irq, if_names[dev->if_port]); for (i = 0; i < 6; i++) --- linux-2.6.6-rc1/drivers/net/pcmcia/smc91c92_cs.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/pcmcia/smc91c92_cs.c 2004-04-18 22:25:44.173115144 -0700 @@ -901,6 +901,7 @@ static void smc91c92_config(dev_link_t * char *name; int i, j, rev; ioaddr_t ioaddr; + u_long mir; DEBUG(0, "smc91c92_config(0x%p)\n", link); @@ -952,11 +953,6 @@ static void smc91c92_config(dev_link_t * else printk(KERN_NOTICE "smc91c92_cs: invalid if_port requested\n"); - if (register_netdev(dev) != 0) { - printk(KERN_ERR "smc91c92_cs: register_netdev() failed\n"); - goto config_undo; - } - switch (smc->manfid) { case MANFID_OSITECH: case MANFID_PSION: @@ -977,8 +973,6 @@ static void smc91c92_config(dev_link_t * goto config_undo; } - strcpy(smc->node.dev_name, dev->name); - link->dev = &smc->node; smc->duplex = 0; smc->rx_ovrn = 0; @@ -993,25 +987,16 @@ static void smc91c92_config(dev_link_t * case 8: name = "100-FD"; break; case 9: name = "110"; break; } - printk(KERN_INFO "%s: smc91c%s rev %d: io %#3lx, irq %d, " - "hw_addr ", dev->name, name, (rev & 0x0f), dev->base_addr, - dev->irq); - for (i = 0; i < 6; i++) - printk("%02X%s", dev->dev_addr[i], ((i<5) ? ":" : "\n")); ioaddr = dev->base_addr; if (rev > 0) { - u_long mir, mcr; + u_long mcr; SMC_SELECT_BANK(0); mir = inw(ioaddr + MEMINFO) & 0xff; if (mir == 0xff) mir++; /* Get scale factor for memory size */ mcr = ((rev >> 4) > 3) ? inw(ioaddr + MEMCFG) : 0x0200; mir *= 128 * (1<<((mcr >> 9) & 7)); - if (mir & 0x3ff) - printk(KERN_INFO " %lu byte", mir); - else - printk(KERN_INFO " %lu kb", mir>>10); SMC_SELECT_BANK(1); smc->cfg = inw(ioaddr + CONFIG) & ~CFG_AUI_SELECT; smc->cfg |= CFG_NO_WAIT | CFG_16BIT | CFG_STATIC; @@ -1019,9 +1004,8 @@ static void smc91c92_config(dev_link_t * smc->cfg |= CFG_IRQ_SEL_1 | CFG_IRQ_SEL_0; if ((rev >> 4) >= 7) smc->cfg |= CFG_MII_SELECT; - printk(" buffer, %s xcvr\n", (smc->cfg & CFG_MII_SELECT) ? - "MII" : if_names[dev->if_port]); - } + } else + mir = 0; if (smc->cfg & CFG_MII_SELECT) { SMC_SELECT_BANK(3); @@ -1031,16 +1015,45 @@ static void smc91c92_config(dev_link_t * if ((j != 0) && (j != 0xffff)) break; } smc->mii_if.phy_id = (i < 32) ? i : -1; - if (i < 32) { - DEBUG(0, " MII transceiver at index %d, status %x.\n", i, j); - } else { - printk(KERN_NOTICE " No MII transceivers found!\n"); - } SMC_SELECT_BANK(0); } + link->dev = &smc->node; link->state &= ~DEV_CONFIG_PENDING; + + if (register_netdev(dev) != 0) { + printk(KERN_ERR "smc91c92_cs: register_netdev() failed\n"); + link->dev = NULL; + goto config_undo; + } + + strcpy(smc->node.dev_name, dev->name); + + printk(KERN_INFO "%s: smc91c%s rev %d: io %#3lx, irq %d, " + "hw_addr ", dev->name, name, (rev & 0x0f), dev->base_addr, + dev->irq); + for (i = 0; i < 6; i++) + printk("%02X%s", dev->dev_addr[i], ((i<5) ? ":" : "\n")); + + if (rev > 0) { + if (mir & 0x3ff) + printk(KERN_INFO " %lu byte", mir); + else + printk(KERN_INFO " %lu kb", mir>>10); + printk(" buffer, %s xcvr\n", (smc->cfg & CFG_MII_SELECT) ? + "MII" : if_names[dev->if_port]); + } + + if (smc->cfg & CFG_MII_SELECT) { + if (smc->mii_if.phy_id != -1) { + DEBUG(0, " MII transceiver at index %d, status %x.\n", + smc->mii_if.phy_id, j); + } else { + printk(KERN_NOTICE " No MII transceivers found!\n"); + } + } + return; config_undo: --- linux-2.6.6-rc1/drivers/net/pcmcia/xirc2ps_cs.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/pcmcia/xirc2ps_cs.c 2004-04-18 22:25:44.175114840 -0700 @@ -1114,17 +1114,20 @@ xirc2ps_config(dev_link_t * link) /* we can now register the device with the net subsystem */ dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; + + if (local->dingo) + do_reset(dev, 1); /* a kludge to make the cem56 work */ + + link->dev = &local->node; + link->state &= ~DEV_CONFIG_PENDING; + if ((err=register_netdev(dev))) { printk(KNOT_XIRC "register_netdev() failed\n"); + link->dev = NULL; goto config_error; } strcpy(local->node.dev_name, dev->name); - link->dev = &local->node; - link->state &= ~DEV_CONFIG_PENDING; - - if (local->dingo) - do_reset(dev, 1); /* a kludge to make the cem56 work */ /* give some infos about the hardware */ printk(KERN_INFO "%s: %s: port %#3lx, irq %d, hwaddr", --- linux-2.6.6-rc1/drivers/net/pcnet32.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/pcnet32.c 2004-04-18 22:25:24.837054672 -0700 @@ -1022,10 +1022,11 @@ pcnet32_probe1(unsigned long ioaddr, uns * starting until the packet is loaded. Strike one for reliability, lose * one for latency - although on PCI this isnt a big loss. Older chips * have FIFO's smaller than a packet, so you can't do this. + * Turn on BCR18:BurstRdEn and BCR18:BurstWrEn. */ if (fset) { - a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0800)); + a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0860)); a->write_csr(ioaddr, 80, (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00); dxsuflo = 1; ltint = 1; --- linux-2.6.6-rc1/drivers/net/ppp_generic.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/ppp_generic.c 2004-04-18 22:25:24.839054368 -0700 @@ -994,7 +994,11 @@ ppp_send_frame(struct ppp *ppp, struct s /* check if we should pass this packet */ /* the filter instructions are constructed assuming a four-byte PPP header on each packet */ - *skb_push(skb, 2) = 1; + { + u_int16_t *p = (u_int16_t *) skb_push(skb, 2); + + *p = htons(4); /* indicate outbound in DLT_LINUX_SLL */; + } if (ppp->pass_filter.filter && sk_run_filter(skb, ppp->pass_filter.filter, ppp->pass_filter.len) == 0) { @@ -1537,7 +1541,11 @@ ppp_receive_nonmp_frame(struct ppp *ppp, /* check if the packet passes the pass and active filters */ /* the filter instructions are constructed assuming a four-byte PPP header on each packet */ - *skb_push(skb, 2) = 0; + { + u_int16_t *p = (u_int16_t *) skb_push(skb, 2); + + *p = 0; /* indicate inbound in DLT_LINUX_SLL */ + } if (ppp->pass_filter.filter && sk_run_filter(skb, ppp->pass_filter.filter, ppp->pass_filter.len) == 0) { --- linux-2.6.6-rc1/drivers/net/r8169.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/r8169.c 2004-04-18 22:25:29.937279320 -0700 @@ -50,6 +50,9 @@ VERSION 1.2 <2002/11/30> #define DMA_32BIT_MASK 0xffffffffULL #define DMA_64BIT_MASK 0xffffffffffffffffULL +#define DMA_64BIT_MASK 0xffffffffffffffffULL +#define DMA_32BIT_MASK 0xffffffffULL + #define RTL8169_VERSION "1.2" #define MODULENAME "r8169" #define RTL8169_DRIVER_NAME MODULENAME " Gigabit Ethernet driver " RTL8169_VERSION --- linux-2.6.6-rc1/drivers/net/sk98lin/skvpd.c 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/net/sk98lin/skvpd.c 2004-04-18 22:25:50.945085648 -0700 @@ -468,6 +468,17 @@ SK_IOC IoC) /* IO Context */ pAC->vpd.vpd_size = vpd_size; + /* Asus K8V Se Deluxe bugfix. Correct VPD content */ + /* MBo April 2004 */ + if (((unsigned char)pAC->vpd.vpd_buf[0x3f] == 0x38) && + ((unsigned char)pAC->vpd.vpd_buf[0x40] == 0x3c) && + ((unsigned char)pAC->vpd.vpd_buf[0x41] == 0x45)) { + printk("sk98lin: Asus mainboard with buggy VPD?" + "Correcting data.\n"); + pAC->vpd.vpd_buf[0x40] = 0x38; + } + + /* find the end tag of the RO area */ if (!(r = vpd_find_para(pAC, VPD_RV, &rp))) { SK_DBG_MSG(pAC, SK_DBGMOD_VPD, SK_DBGCAT_ERR | SK_DBGCAT_FATAL, --- linux-2.6.6-rc1/drivers/net/sk_mca.c 2004-02-17 20:48:44.000000000 -0800 +++ 25/drivers/net/sk_mca.c 2004-04-18 22:25:24.840054216 -0700 @@ -997,13 +997,13 @@ static void skmca_set_multicast_list(str block.Mode &= ~LANCE_INIT_PROM; if (dev->flags & IFF_ALLMULTI) { /* get all multicasts */ - memset(block.LAdrF, 8, 0xff); + memset(block.LAdrF, 0xff, sizeof(block.LAdrF)); } else { /* get selected/no multicasts */ struct dev_mc_list *mptr; int code; - memset(block.LAdrF, 8, 0x00); + memset(block.LAdrF, 0, sizeof(block.LAdrF)); for (mptr = dev->mc_list; mptr != NULL; mptr = mptr->next) { code = GetHash(mptr->dmi_addr); block.LAdrF[(code >> 3) & 7] |= 1 << (code & 7); --- linux-2.6.6-rc1/drivers/net/sun3lance.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/sun3lance.c 2004-04-18 22:26:02.382346920 -0700 @@ -42,7 +42,6 @@ static char *version = "sun3lance.c: v1. #include #include #include -#include #include #include #include --- linux-2.6.6-rc1/drivers/net/tg3.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/tg3.c 2004-04-18 22:25:24.848053000 -0700 @@ -56,8 +56,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "2.9" -#define DRV_MODULE_RELDATE "March 8, 2004" +#define DRV_MODULE_VERSION "3.1" +#define DRV_MODULE_RELDATE "April 3, 2004" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -643,7 +643,14 @@ static int tg3_phy_reset_5703_4_5(struct tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x8200); tg3_writephy(tp, 0x16, 0x0000); - tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0400); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) { + /* Set Extended packet length bit for jumbo frames */ + tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4400); + } + else { + tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0400); + } tg3_writephy(tp, MII_TG3_CTRL, phy9_orig); @@ -657,7 +664,7 @@ static int tg3_phy_reset_5703_4_5(struct /* This will reset the tigon3 PHY if there is no valid * link unless the FORCE argument is non-zero. */ -static int tg3_phy_reset(struct tg3 *tp, int force) +static int tg3_phy_reset(struct tg3 *tp) { u32 phy_status; int err; @@ -667,12 +674,6 @@ static int tg3_phy_reset(struct tg3 *tp, if (err != 0) return -EBUSY; - /* If we have link, and not forcing a reset, then nothing - * to do. - */ - if ((phy_status & BMSR_LSTATUS) != 0 && (force == 0)) - return 0; - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) { @@ -699,6 +700,15 @@ out: tg3_writephy(tp, 0x1c, 0x8d68); tg3_writephy(tp, 0x1c, 0x8d68); } + /* Set Extended packet length bit (bit 14) on all chips that */ + /* support jumbo frames */ + if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401 || + (tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5411) { + tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4c20); + } + else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705) { + tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4400); + } tg3_phy_set_wirespeed(tp); return 0; } @@ -1050,6 +1060,8 @@ static int tg3_phy_copper_begin(struct t u32 new_adv; int i; + tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0400); + if (tp->link_config.phy_is_low_power) { /* Entering low power mode. Disable gigabit and * 100baseT advertisements. @@ -1190,7 +1202,8 @@ static int tg3_init_5401phy_dsp(struct t int err; /* Turn off tap power management. */ - err = tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c20); + /* Set Extended packet length bit */ + err = tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4c20); err |= tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x0012); err |= tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x1804); @@ -1212,6 +1225,27 @@ static int tg3_init_5401phy_dsp(struct t return err; } +static int tg3_copper_is_advertising_all(struct tg3 *tp) +{ + u32 adv_reg, all_mask; + + tg3_readphy(tp, MII_ADVERTISE, &adv_reg); + all_mask = (ADVERTISE_10HALF | ADVERTISE_10FULL | + ADVERTISE_100HALF | ADVERTISE_100FULL); + if ((adv_reg & all_mask) != all_mask) + return 0; + if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY)) { + u32 tg3_ctrl; + + tg3_readphy(tp, MII_TG3_CTRL, &tg3_ctrl); + all_mask = (MII_TG3_CTRL_ADV_1000_HALF | + MII_TG3_CTRL_ADV_1000_FULL); + if ((tg3_ctrl & all_mask) != all_mask) + return 0; + } + return 1; +} + static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset) { int current_link_up; @@ -1240,7 +1274,7 @@ static int tg3_setup_copper_phy(struct t */ if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 || - tp->pci_chip_rev_id == CHIPREV_ID_5705_A0) && + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) && netif_carrier_ok(tp->dev)) { tg3_readphy(tp, MII_BMSR, &bmsr); tg3_readphy(tp, MII_BMSR, &bmsr); @@ -1248,7 +1282,7 @@ static int tg3_setup_copper_phy(struct t force_reset = 1; } if (force_reset) - tg3_phy_reset(tp, 1); + tg3_phy_reset(tp); if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) { tg3_readphy(tp, MII_BMSR, &bmsr); @@ -1275,7 +1309,7 @@ static int tg3_setup_copper_phy(struct t if ((tp->phy_id & PHY_ID_REV_MASK) == PHY_REV_BCM5401_B0 && !(bmsr & BMSR_LSTATUS) && tp->link_config.active_speed == SPEED_1000) { - err = tg3_phy_reset(tp, 1); + err = tg3_phy_reset(tp); if (!err) err = tg3_init_5401phy_dsp(tp); if (err) @@ -1310,8 +1344,14 @@ static int tg3_setup_copper_phy(struct t current_speed = SPEED_INVALID; current_duplex = DUPLEX_INVALID; - tg3_readphy(tp, MII_BMSR, &bmsr); - tg3_readphy(tp, MII_BMSR, &bmsr); + bmsr = 0; + for (i = 0; i < 100; i++) { + tg3_readphy(tp, MII_BMSR, &bmsr); + tg3_readphy(tp, MII_BMSR, &bmsr); + if (bmsr & BMSR_LSTATUS) + break; + udelay(40); + } if (bmsr & BMSR_LSTATUS) { u32 aux_stat, bmcr; @@ -1327,22 +1367,25 @@ static int tg3_setup_copper_phy(struct t tg3_aux_stat_to_speed_duplex(tp, aux_stat, ¤t_speed, ¤t_duplex); - tg3_readphy(tp, MII_BMCR, &bmcr); - tg3_readphy(tp, MII_BMCR, &bmcr); + + bmcr = 0; + for (i = 0; i < 200; i++) { + tg3_readphy(tp, MII_BMCR, &bmcr); + tg3_readphy(tp, MII_BMCR, &bmcr); + if (bmcr && bmcr != 0x7fff) + break; + udelay(10); + } + if (tp->link_config.autoneg == AUTONEG_ENABLE) { if (bmcr & BMCR_ANENABLE) { - u32 gig_ctrl; - current_link_up = 1; /* Force autoneg restart if we are exiting * low power mode. */ - tg3_readphy(tp, MII_TG3_CTRL, &gig_ctrl); - if (!(gig_ctrl & (MII_TG3_CTRL_ADV_1000_HALF | - MII_TG3_CTRL_ADV_1000_FULL))) { + if (!tg3_copper_is_advertising_all(tp)) current_link_up = 0; - } } else { current_link_up = 0; } @@ -2004,6 +2047,13 @@ static int tg3_setup_phy(struct tg3 *tp, (6 << TX_LENGTHS_IPG_SHIFT) | (32 << TX_LENGTHS_SLOT_TIME_SHIFT))); + if (netif_carrier_ok(tp->dev)) { + tw32(HOSTCC_STAT_COAL_TICKS, + DEFAULT_STAT_COAL_TICKS); + } else { + tw32(HOSTCC_STAT_COAL_TICKS, 0); + } + return err; } @@ -3398,10 +3448,11 @@ out: } /* tp->lock is held. */ -static void tg3_chip_reset(struct tg3 *tp) +static int tg3_chip_reset(struct tg3 *tp) { u32 val; u32 flags_save; + int i; if (!(tp->tg3_flags2 & TG3_FLG2_SUN_5704)) { /* Force NVRAM to settle. @@ -3469,6 +3520,8 @@ static void tg3_chip_reset(struct tg3 *t tw32(MEMARB_MODE, MEMARB_MODE_ENABLE); + tw32(GRC_MODE, tp->grc_mode); + if ((tp->nic_sram_data_cfg & NIC_SRAM_DATA_CFG_MINI_PCI) != 0 && GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705) { tp->pci_clock_ctrl |= @@ -3476,7 +3529,45 @@ static void tg3_chip_reset(struct tg3 *t tw32(TG3PCI_CLOCK_CTRL, tp->pci_clock_ctrl); } - tw32(TG3PCI_MISC_HOST_CTRL, tp->misc_host_ctrl); + /* Prevent PXE from restarting. */ + tg3_write_mem(tp, + NIC_SRAM_FIRMWARE_MBOX, + NIC_SRAM_FIRMWARE_MBOX_MAGIC1); + + if (tp->phy_id == PHY_ID_SERDES) { + tp->mac_mode = MAC_MODE_PORT_MODE_TBI; + tw32_f(MAC_MODE, tp->mac_mode); + } else + tw32_f(MAC_MODE, 0); + udelay(40); + + /* Wait for firmware initialization to complete. */ + for (i = 0; i < 100000; i++) { + tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val); + if (val == ~NIC_SRAM_FIRMWARE_MBOX_MAGIC1) + break; + udelay(10); + } + if (i >= 100000 && + !(tp->tg3_flags2 & TG3_FLG2_SUN_5704)) { + printk(KERN_ERR PFX "tg3_reset_hw timed out for %s, " + "firmware will not restart magic=%08x\n", + tp->dev->name, val); + return -ENODEV; + } + + /* Reprobe ASF enable state. */ + tp->tg3_flags &= ~TG3_FLAG_ENABLE_ASF; + tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val); + if (val == NIC_SRAM_DATA_SIG_MAGIC) { + u32 nic_cfg; + + tg3_read_mem(tp, NIC_SRAM_DATA_CFG, &nic_cfg); + if (nic_cfg & NIC_SRAM_DATA_CFG_ASF_ENABLE) + tp->tg3_flags |= TG3_FLAG_ENABLE_ASF; + } + + return 0; } /* tp->lock is held. */ @@ -3503,40 +3594,17 @@ static void tg3_stop_fw(struct tg3 *tp) /* tp->lock is held. */ static int tg3_halt(struct tg3 *tp) { - u32 val; - int i; + int err; tg3_stop_fw(tp); tg3_abort_hw(tp); - tg3_chip_reset(tp); - tg3_write_mem(tp, - NIC_SRAM_FIRMWARE_MBOX, - NIC_SRAM_FIRMWARE_MBOX_MAGIC1); - for (i = 0; i < 100000; i++) { - tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val); - if (val == ~NIC_SRAM_FIRMWARE_MBOX_MAGIC1) - break; - udelay(10); - } - - if (i >= 100000 && - !(tp->tg3_flags2 & TG3_FLG2_SUN_5704)) { - printk(KERN_ERR PFX "tg3_halt timed out for %s, " - "firmware will not restart magic=%08x\n", - tp->dev->name, val); - return -ENODEV; - } + err = tg3_chip_reset(tp); + if (err) + return err; - if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) { - if (tp->tg3_flags & TG3_FLAG_WOL_ENABLE) - tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX, - DRV_STATE_WOL); - else - tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX, - DRV_STATE_UNLOAD); - } else + if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX, - DRV_STATE_SUSPEND); + DRV_STATE_UNLOAD); return 0; } @@ -4500,36 +4568,9 @@ static int tg3_reset_hw(struct tg3 *tp) return err; } - tg3_chip_reset(tp); - - val = tr32(GRC_MODE); - val &= GRC_MODE_HOST_STACKUP; - tw32(GRC_MODE, val | tp->grc_mode); - - tg3_write_mem(tp, - NIC_SRAM_FIRMWARE_MBOX, - NIC_SRAM_FIRMWARE_MBOX_MAGIC1); - if (tp->phy_id == PHY_ID_SERDES) { - tp->mac_mode = MAC_MODE_PORT_MODE_TBI; - tw32_f(MAC_MODE, tp->mac_mode); - } else - tw32_f(MAC_MODE, 0); - udelay(40); - - /* Wait for firmware initialization to complete. */ - for (i = 0; i < 100000; i++) { - tg3_read_mem(tp, NIC_SRAM_FIRMWARE_MBOX, &val); - if (val == ~NIC_SRAM_FIRMWARE_MBOX_MAGIC1) - break; - udelay(10); - } - if (i >= 100000 && - !(tp->tg3_flags2 & TG3_FLG2_SUN_5704)) { - printk(KERN_ERR PFX "tg3_reset_hw timed out for %s, " - "firmware will not restart magic=%08x\n", - tp->dev->name, val); - return -ENODEV; - } + err = tg3_chip_reset(tp); + if (err) + return err; if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) tg3_write_mem(tp, NIC_SRAM_FW_DRV_STATE_MBOX, @@ -4552,6 +4593,13 @@ static int tg3_reset_hw(struct tg3 *tp) tw32(TG3PCI_PCISTATE, val); } + if (GET_CHIP_REV(tp->pci_chip_rev_id) == CHIPREV_5704_BX) { + /* Enable some hw fixes. */ + val = tr32(TG3PCI_MSI_DATA); + val |= (1 << 26) | (1 << 28) | (1 << 29); + tw32(TG3PCI_MSI_DATA, val); + } + /* Descriptor ring init may make accesses to the * NIC SRAM area to setup the TX descriptors, so we * can only do this after the hardware has been @@ -4582,8 +4630,10 @@ static int tg3_reset_hw(struct tg3 *tp) (GRC_MODE_IRQ_ON_MAC_ATTN | GRC_MODE_HOST_STACKUP)); /* Setup the timer prescalar register. Clock is always 66Mhz. */ - tw32(GRC_MISC_CFG, - (65 << GRC_MISC_CFG_PRESCALAR_SHIFT)); + val = tr32(GRC_MISC_CFG); + val &= ~0xff; + val |= (65 << GRC_MISC_CFG_PRESCALAR_SHIFT); + tw32(GRC_MISC_CFG, val); /* Initialize MBUF/DESC pool. */ if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705) { @@ -4644,19 +4694,6 @@ static int tg3_reset_hw(struct tg3 *tp) return -ENODEV; } - tw32(FTQ_RESET, 0xffffffff); - tw32(FTQ_RESET, 0x00000000); - for (i = 0; i < 2000; i++) { - if (tr32(FTQ_RESET) == 0x00000000) - break; - udelay(10); - } - if (i >= 2000) { - printk(KERN_ERR PFX "tg3_reset_hw cannot reset FTQ for %s.\n", - tp->dev->name); - return -ENODEV; - } - /* Clear statistics/status block in chip, and status block in ram. */ if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705) { for (i = NIC_SRAM_STATS_BLK; @@ -4988,8 +5025,17 @@ static int tg3_reset_hw(struct tg3 *tp) tw32_f(MAC_RX_MODE, tp->rx_mode); udelay(10); - if (tp->pci_chip_rev_id == CHIPREV_ID_5703_A1) - tw32(MAC_SERDES_CFG, 0x616000); + if (tp->phy_id == PHY_ID_SERDES) { + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) { + /* Set drive transmission level to 1.2V */ + val = tr32(MAC_SERDES_CFG); + val &= 0xfffff000; + val |= 0x880; + tw32(MAC_SERDES_CFG, val); + } + if (tp->pci_chip_rev_id == CHIPREV_ID_5703_A1) + tw32(MAC_SERDES_CFG, 0x616000); + } /* Prevent chip from dropping frames when flow control * is enabled. @@ -5882,6 +5928,16 @@ static int tg3_set_settings(struct net_d tp->link_config.phy_is_low_power) return -EAGAIN; + if (tp->phy_id == PHY_ID_SERDES) { + /* These are the only valid advertisement bits allowed. */ + if (cmd->autoneg == AUTONEG_ENABLE && + (cmd->advertising & ~(ADVERTISED_1000baseT_Half | + ADVERTISED_1000baseT_Full | + ADVERTISED_Autoneg | + ADVERTISED_FIBRE))) + return -EINVAL; + } + spin_lock_irq(&tp->lock); spin_lock(&tp->tx_lock); @@ -5891,6 +5947,7 @@ static int tg3_set_settings(struct net_d tp->link_config.speed = SPEED_INVALID; tp->link_config.duplex = DUPLEX_INVALID; } else { + tp->link_config.advertising = 0; tp->link_config.speed = cmd->speed; tp->link_config.duplex = cmd->duplex; } @@ -6357,8 +6414,8 @@ static struct subsys_tbl_ent subsys_id_t { PCI_VENDOR_ID_BROADCOM, 0x0007, PHY_ID_SERDES }, /* BCM95701A7 */ { PCI_VENDOR_ID_BROADCOM, 0x0008, PHY_ID_BCM5701 }, /* BCM95701A10 */ { PCI_VENDOR_ID_BROADCOM, 0x8008, PHY_ID_BCM5701 }, /* BCM95701A12 */ - { PCI_VENDOR_ID_BROADCOM, 0x0009, PHY_ID_BCM5701 }, /* BCM95703Ax1 */ - { PCI_VENDOR_ID_BROADCOM, 0x8009, PHY_ID_BCM5701 }, /* BCM95703Ax2 */ + { PCI_VENDOR_ID_BROADCOM, 0x0009, PHY_ID_BCM5703 }, /* BCM95703Ax1 */ + { PCI_VENDOR_ID_BROADCOM, 0x8009, PHY_ID_BCM5703 }, /* BCM95703Ax2 */ /* 3com boards. */ { PCI_VENDOR_ID_3COM, 0x1000, PHY_ID_BCM5401 }, /* 3C996T */ @@ -6458,19 +6515,27 @@ static int __devinit tg3_phy_probe(struc tp->tg3_flags |= TG3_FLAG_SERDES_WOL_CAP; } - /* Now read the physical PHY_ID from the chip and verify - * that it is sane. If it doesn't look good, we fall back - * to either the hard-coded table based PHY_ID and failing - * that the value found in the eeprom area. - */ - err = tg3_readphy(tp, MII_PHYSID1, &hw_phy_id_1); - err |= tg3_readphy(tp, MII_PHYSID2, &hw_phy_id_2); - - hw_phy_id = (hw_phy_id_1 & 0xffff) << 10; - hw_phy_id |= (hw_phy_id_2 & 0xfc00) << 16; - hw_phy_id |= (hw_phy_id_2 & 0x03ff) << 0; + /* Reading the PHY ID register can conflict with ASF + * firwmare access to the PHY hardware. + */ + err = 0; + if (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) { + hw_phy_id = hw_phy_id_masked = PHY_ID_INVALID; + } else { + /* Now read the physical PHY_ID from the chip and verify + * that it is sane. If it doesn't look good, we fall back + * to either the hard-coded table based PHY_ID and failing + * that the value found in the eeprom area. + */ + err |= tg3_readphy(tp, MII_PHYSID1, &hw_phy_id_1); + err |= tg3_readphy(tp, MII_PHYSID2, &hw_phy_id_2); + + hw_phy_id = (hw_phy_id_1 & 0xffff) << 10; + hw_phy_id |= (hw_phy_id_2 & 0xfc00) << 16; + hw_phy_id |= (hw_phy_id_2 & 0x03ff) << 0; - hw_phy_id_masked = hw_phy_id & PHY_ID_MASK; + hw_phy_id_masked = hw_phy_id & PHY_ID_MASK; + } if (!err && KNOWN_PHY_ID(hw_phy_id_masked)) { tp->phy_id = hw_phy_id; @@ -6487,38 +6552,61 @@ static int __devinit tg3_phy_probe(struc } } - err = tg3_phy_reset(tp, 1); - if (err) - return err; + if (tp->phy_id != PHY_ID_SERDES && + !(tp->tg3_flags & TG3_FLAG_ENABLE_ASF)) { + u32 bmsr, adv_reg, tg3_ctrl; - if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 || - tp->pci_chip_rev_id == CHIPREV_ID_5701_B0) { - u32 mii_tg3_ctrl; - - /* These chips, when reset, only advertise 10Mb - * capabilities. Fix that. - */ - err = tg3_writephy(tp, MII_ADVERTISE, - (ADVERTISE_CSMA | - ADVERTISE_PAUSE_CAP | - ADVERTISE_10HALF | - ADVERTISE_10FULL | - ADVERTISE_100HALF | - ADVERTISE_100FULL)); - mii_tg3_ctrl = (MII_TG3_CTRL_ADV_1000_HALF | - MII_TG3_CTRL_ADV_1000_FULL | - MII_TG3_CTRL_AS_MASTER | - MII_TG3_CTRL_ENABLE_AS_MASTER); - if (tp->tg3_flags & TG3_FLAG_10_100_ONLY) - mii_tg3_ctrl = 0; + tg3_readphy(tp, MII_BMSR, &bmsr); + tg3_readphy(tp, MII_BMSR, &bmsr); + + if ((bmsr & BMSR_LSTATUS) && + !(GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5705)) + goto skip_phy_reset; + + err = tg3_phy_reset(tp); + if (err) + return err; + + adv_reg = (ADVERTISE_10HALF | ADVERTISE_10FULL | + ADVERTISE_100HALF | ADVERTISE_100FULL | + ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP); + tg3_ctrl = 0; + if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY)) { + tg3_ctrl = (MII_TG3_CTRL_ADV_1000_HALF | + MII_TG3_CTRL_ADV_1000_FULL); + if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 || + tp->pci_chip_rev_id == CHIPREV_ID_5701_B0) + tg3_ctrl |= (MII_TG3_CTRL_AS_MASTER | + MII_TG3_CTRL_ENABLE_AS_MASTER); + } + + if (!tg3_copper_is_advertising_all(tp)) { + tg3_writephy(tp, MII_ADVERTISE, adv_reg); - err |= tg3_writephy(tp, MII_TG3_CTRL, mii_tg3_ctrl); - err |= tg3_writephy(tp, MII_BMCR, - (BMCR_ANRESTART | BMCR_ANENABLE)); + if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY)) + tg3_writephy(tp, MII_TG3_CTRL, tg3_ctrl); + + tg3_writephy(tp, MII_BMCR, + BMCR_ANENABLE | BMCR_ANRESTART); + } + tg3_phy_set_wirespeed(tp); + + tg3_writephy(tp, MII_ADVERTISE, adv_reg); + if (!(tp->tg3_flags & TG3_FLAG_10_100_ONLY)) + tg3_writephy(tp, MII_TG3_CTRL, tg3_ctrl); + } + +skip_phy_reset: + if ((tp->phy_id & PHY_ID_MASK) == PHY_ID_BCM5401) { + err = tg3_init_5401phy_dsp(tp); + if (err) + return err; } if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703) { - tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c00); + tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4c00); tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x201f); tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x2aaa); } @@ -7739,6 +7827,19 @@ static int __devinit tg3_init_one(struct printk("%2.2x%c", dev->dev_addr[i], i == 5 ? '\n' : ':'); + printk(KERN_INFO "%s: HostTXDS[%d] RXcsums[%d] LinkChgREG[%d] " + "MIirq[%d] ASF[%d] Split[%d] WireSpeed[%d] " + "TSOcap[%d] \n", + dev->name, + (tp->tg3_flags & TG3_FLAG_HOST_TXDS) != 0, + (tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) != 0, + (tp->tg3_flags & TG3_FLAG_USE_LINKCHG_REG) != 0, + (tp->tg3_flags & TG3_FLAG_USE_MI_INTERRUPT) != 0, + (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) != 0, + (tp->tg3_flags & TG3_FLAG_SPLIT_MODE) != 0, + (tp->tg3_flags2 & TG3_FLG2_NO_ETH_WIRE_SPEED) == 0, + (tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) != 0); + return 0; err_out_iounmap: --- linux-2.6.6-rc1/drivers/net/tulip/tulip_core.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/tulip/tulip_core.c 2004-04-18 22:25:42.342393456 -0700 @@ -1513,7 +1513,7 @@ static int __devinit tulip_init_one (str } } /* Lite-On boards have the address byte-swapped. */ - if ((dev->dev_addr[0] == 0xA0 || dev->dev_addr[0] == 0xC0) + if ((dev->dev_addr[0] == 0xA0 || dev->dev_addr[0] == 0xC0 || dev->dev_addr[0] == 0x02) && dev->dev_addr[1] == 0x00) for (i = 0; i < 6; i+=2) { char tmp = dev->dev_addr[i]; --- linux-2.6.6-rc1/drivers/net/via-rhine.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/via-rhine.c 2004-04-18 22:25:24.850052696 -0700 @@ -28,10 +28,10 @@ Linux kernel version history: - + LK1.1.0: - Jeff Garzik: softnet 'n stuff - + LK1.1.1: - Justin Guyett: softnet and locking fixes - Jeff Garzik: use PCI interface @@ -58,7 +58,7 @@ LK1.1.6: - Urban Widmark: merges from Beckers 1.08b version (VT6102 + mdio) set netif_running_on/off on startup, del_timer_sync - + LK1.1.7: - Manfred Spraul: added reset into tx_timeout @@ -83,7 +83,7 @@ LK1.1.13 (jgarzik): - Add ethtool support - Replace some MII-related magic numbers with constants - + LK1.1.14 (Ivan G.): - fixes comments for Rhine-III - removes W_MAX_TIMEOUT (unused) @@ -92,7 +92,7 @@ - sends chip_id as a parameter to wait_for_reset since np is not initialized on first call - changes mmio "else if (chip_id==VT6102)" to "else" so it will work - for Rhine-III's (documentation says same bit is correct) + for Rhine-III's (documentation says same bit is correct) - transmit frame queue message is off by one - fixed - adds IntrNormalSummary to "Something Wicked" exclusion list so normal interrupts will not trigger the message (src: Donald Becker) @@ -316,10 +316,10 @@ IIId. Synchronization The driver runs as two independent, single-threaded flows of control. One is the send-packet routine, which enforces single-threaded use by the -dev->priv->lock spinlock. The other thread is the interrupt handler, which +dev->priv->lock spinlock. The other thread is the interrupt handler, which is single threaded by the hardware and interrupt handling software. -The send packet thread has partial control over the Tx ring. It locks the +The send packet thread has partial control over the Tx ring. It locks the dev->priv->lock whenever it's queuing a Tx packet. If the next slot in the ring is not available it stops the transmit queue by calling netif_stop_queue. @@ -639,7 +639,7 @@ static int __devinit via_rhine_init_one #ifdef USE_MEM long ioaddr0; #endif - + /* when built into the kernel, we only print version if device is found */ #ifndef MODULE static int printed_version; @@ -660,7 +660,7 @@ static int __devinit via_rhine_init_one printk(KERN_ERR "32-bit PCI DMA addresses not supported by the card!?\n"); goto err_out; } - + /* sanity check */ if ((pci_resource_len (pdev, 0) < io_size) || (pci_resource_len (pdev, 1) < io_size)) { @@ -681,7 +681,7 @@ static int __devinit via_rhine_init_one } SET_MODULE_OWNER(dev); SET_NETDEV_DEV(dev, &pdev->dev); - + if (pci_request_regions(pdev, shortname)) goto err_out_free_netdev; @@ -847,6 +847,8 @@ static int __devinit via_rhine_init_one netif_carrier_on(dev); else netif_carrier_off(dev); + + break; } } np->mii_cnt = phy_idx; @@ -891,7 +893,7 @@ static int alloc_ring(struct net_device* void *ring; dma_addr_t ring_dma; - ring = pci_alloc_consistent(np->pdev, + ring = pci_alloc_consistent(np->pdev, RX_RING_SIZE * sizeof(struct rx_desc) + TX_RING_SIZE * sizeof(struct tx_desc), &ring_dma); @@ -903,7 +905,7 @@ static int alloc_ring(struct net_device* np->tx_bufs = pci_alloc_consistent(np->pdev, PKT_BUF_SZ * TX_RING_SIZE, &np->tx_bufs_dma); if (np->tx_bufs == NULL) { - pci_free_consistent(np->pdev, + pci_free_consistent(np->pdev, RX_RING_SIZE * sizeof(struct rx_desc) + TX_RING_SIZE * sizeof(struct tx_desc), ring, ring_dma); @@ -923,7 +925,7 @@ void free_ring(struct net_device* dev) { struct netdev_private *np = dev->priv; - pci_free_consistent(np->pdev, + pci_free_consistent(np->pdev, RX_RING_SIZE * sizeof(struct rx_desc) + TX_RING_SIZE * sizeof(struct tx_desc), np->rx_ring, np->rx_ring_dma); @@ -948,7 +950,7 @@ static void alloc_rbufs(struct net_devic np->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32); np->rx_head_desc = &np->rx_ring[0]; next = np->rx_ring_dma; - + /* Init the ring entries */ for (i = 0; i < RX_RING_SIZE; i++) { np->rx_ring[i].rx_status = 0; @@ -1151,7 +1153,7 @@ static int via_rhine_open(struct net_dev if (debug > 1) printk(KERN_DEBUG "%s: via_rhine_open() irq %d.\n", dev->name, np->pdev->irq); - + i = alloc_ring(dev); if (i) return i; @@ -1266,7 +1268,7 @@ static void via_rhine_tx_timeout (struct /* Reinitialize the hardware. */ wait_for_reset(dev, np->chip_id, dev->name); init_registers(dev); - + spin_unlock(&np->lock); enable_irq(np->pdev->irq); @@ -1316,7 +1318,7 @@ static int via_rhine_start_tx(struct sk_ np->tx_ring[entry].addr = cpu_to_le32(np->tx_skbuff_dma[entry]); } - np->tx_ring[entry].desc_length = + np->tx_ring[entry].desc_length = cpu_to_le32(TXDESC | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN)); /* lock eth irq */ @@ -1364,7 +1366,7 @@ static irqreturn_t via_rhine_interrupt(i int handled = 0; ioaddr = dev->base_addr; - + while ((intr_status = get_intr_status(dev))) { handled = 1; @@ -1584,7 +1586,7 @@ static void via_rhine_rx(struct net_devi break; /* Better luck next round. */ skb->dev = dev; /* Mark as being used by this device. */ np->rx_skbuff_dma[entry] = - pci_map_single(np->pdev, skb->tail, np->rx_buf_sz, + pci_map_single(np->pdev, skb->tail, np->rx_buf_sz, PCI_DMA_FROMDEVICE); np->rx_ring[entry].addr = cpu_to_le32(np->rx_skbuff_dma[entry]); } @@ -1892,7 +1894,7 @@ static int via_rhine_close(struct net_de static void __devexit via_rhine_remove_one (struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); - + unregister_netdev(dev); pci_release_regions(pdev); --- linux-2.6.6-rc1/drivers/net/wan/lapbether.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/wan/lapbether.c 2004-04-18 22:25:24.851052544 -0700 @@ -392,6 +392,8 @@ static void lapbeth_free_device(struct l /* * Handle device status changes. + * + * Called from notifier with RTNL held. */ static int lapbeth_device_event(struct notifier_block *this, unsigned long event, void *ptr) @@ -402,7 +404,6 @@ static int lapbeth_device_event(struct n if (!dev_is_ethdev(dev)) return NOTIFY_DONE; - rcu_read_lock(); switch (event) { case NETDEV_UP: /* New ethernet device -> new LAPB interface */ @@ -422,7 +423,6 @@ static int lapbeth_device_event(struct n lapbeth_free_device(lapbeth); break; } - rcu_read_unlock(); return NOTIFY_DONE; } --- linux-2.6.6-rc1/drivers/net/wan/pc300_drv.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/net/wan/pc300_drv.c 2004-04-18 22:25:52.537843512 -0700 @@ -370,7 +370,7 @@ static void tx_dma_buf_check(pc300_t * c ucshort first_bd = card->chan[ch].tx_first_bd; ucshort next_bd = card->chan[ch].tx_next_bd; - printk("#CH%d: f_bd = %d(0x%08x), n_bd = %d(0x%08x)\n", ch, + printk("#CH%d: f_bd = %d(0x%08zx), n_bd = %d(0x%08zx)\n", ch, first_bd, TX_BD_ADDR(ch, first_bd), next_bd, TX_BD_ADDR(ch, next_bd)); for (i = first_bd, --- linux-2.6.6-rc1/drivers/net/wireless/strip.c 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/net/wireless/strip.c 2004-04-18 22:25:24.853052240 -0700 @@ -1727,7 +1727,7 @@ static void get_radio_version(struct str sprintf(strip_info->serial_number.c, "%.*s", len, p); } else { printk(KERN_DEBUG - "STRIP: radio serial number shorter (%d) than expected (%d)\n", + "STRIP: radio serial number shorter (%zd) than expected (%d)\n", end - p, len); } } @@ -1745,7 +1745,7 @@ static void get_radio_voltage(struct str sprintf(strip_info->battery_voltage.c, "%.*s", len, ptr); } else { printk(KERN_DEBUG - "STRIP: radio voltage string shorter (%d) than expected (%d)\n", + "STRIP: radio voltage string shorter (%zd) than expected (%d)\n", end - ptr, len); } } @@ -2330,7 +2330,7 @@ static void strip_receive_buf(struct tty if (*cp == 0x0D) { /* If end of packet, decide what to do with it */ if (strip_info->sx_count > 3000) printk(KERN_INFO - "%s: Cut a %d byte packet (%d bytes remaining)%s\n", + "%s: Cut a %d byte packet (%zd bytes remaining)%s\n", strip_info->dev->name, strip_info->sx_count, end - cp - 1, --- linux-2.6.6-rc1/drivers/net/zorro8390.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/net/zorro8390.c 2004-04-18 22:25:24.854052088 -0700 @@ -64,7 +64,7 @@ static struct card_info { zorro_id id; const char *name; unsigned int offset; -} cards[] __initdata = { +} cards[] __devinitdata = { { ZORRO_PROD_VILLAGE_TRONIC_ARIADNE2, "Ariadne II", 0x0600 }, { ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF, "X-Surf", 0x8600 }, }; @@ -150,7 +150,7 @@ static int __devinit zorro8390_init(stru while ((z_readb(ioaddr + NE_EN0_ISR) & ENISR_RESET) == 0) if (jiffies - reset_start_time > 2*HZ/100) { - printk(" not found (no reset ack).\n"); + printk(KERN_WARNING " not found (no reset ack).\n"); return -ENODEV; } @@ -233,7 +233,7 @@ static int __devinit zorro8390_init(stru return err; } - printk("%s: %s at 0x%08lx, Ethernet Address " + printk(KERN_INFO "%s: %s at 0x%08lx, Ethernet Address " "%02x:%02x:%02x:%02x:%02x:%02x\n", dev->name, name, board, dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2], dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]); @@ -250,7 +250,7 @@ static int zorro8390_open(struct net_dev static int zorro8390_close(struct net_device *dev) { if (ei_debug > 1) - printk("%s: Shutting down ethercard.\n", dev->name); + printk(KERN_DEBUG "%s: Shutting down ethercard.\n", dev->name); ei_close(dev); return 0; } @@ -262,7 +262,7 @@ static void zorro8390_reset_8390(struct unsigned long reset_start_time = jiffies; if (ei_debug > 1) - printk("resetting the 8390 t=%ld...", jiffies); + printk(KERN_DEBUG "resetting the 8390 t=%ld...\n", jiffies); z_writeb(z_readb(NE_BASE + NE_RESET), NE_BASE + NE_RESET); @@ -272,7 +272,8 @@ static void zorro8390_reset_8390(struct /* This check _should_not_ be necessary, omit eventually. */ while ((z_readb(NE_BASE+NE_EN0_ISR) & ENISR_RESET) == 0) if (jiffies - reset_start_time > 2*HZ/100) { - printk("%s: ne_reset_8390() did not complete.\n", dev->name); + printk(KERN_WARNING "%s: ne_reset_8390() did not complete.\n", + dev->name); break; } z_writeb(ENISR_RESET, NE_BASE + NE_EN0_ISR); /* Ack intr. */ @@ -291,7 +292,7 @@ static void zorro8390_get_8390_hdr(struc /* This *shouldn't* happen. If it does, it's the last thing you'll see */ if (ei_status.dmaing) { - printk("%s: DMAing conflict in ne_get_8390_hdr " + printk(KERN_ERR "%s: DMAing conflict in ne_get_8390_hdr " "[DMAstat:%d][irqlock:%d].\n", dev->name, ei_status.dmaing, ei_status.irqlock); return; @@ -332,7 +333,7 @@ static void zorro8390_block_input(struct /* This *shouldn't* happen. If it does, it's the last thing you'll see */ if (ei_status.dmaing) { - printk("%s: DMAing conflict in ne_block_input " + printk(KERN_ERR "%s: DMAing conflict in ne_block_input " "[DMAstat:%d][irqlock:%d].\n", dev->name, ei_status.dmaing, ei_status.irqlock); return; @@ -372,7 +373,7 @@ static void zorro8390_block_output(struc /* This *shouldn't* happen. If it does, it's the last thing you'll see */ if (ei_status.dmaing) { - printk("%s: DMAing conflict in ne_block_output." + printk(KERN_ERR "%s: DMAing conflict in ne_block_output." "[DMAstat:%d][irqlock:%d]\n", dev->name, ei_status.dmaing, ei_status.irqlock); return; @@ -398,7 +399,8 @@ static void zorro8390_block_output(struc while ((z_readb(NE_BASE + NE_EN0_ISR) & ENISR_RDC) == 0) if (jiffies - dma_start > 2*HZ/100) { /* 20ms */ - printk("%s: timeout waiting for Tx RDC.\n", dev->name); + printk(KERN_ERR "%s: timeout waiting for Tx RDC.\n", + dev->name); zorro8390_reset_8390(dev); NS8390_init(dev,1); break; --- linux-2.6.6-rc1/drivers/oprofile/oprofile_stats.c 2004-02-03 20:42:36.000000000 -0800 +++ 25/drivers/oprofile/oprofile_stats.c 2004-04-18 22:25:24.855051936 -0700 @@ -55,7 +55,7 @@ void oprofile_create_stats_files(struct continue; cpu_buf = &cpu_buffer[i]; - snprintf(buf, 6, "cpu%d", i); + snprintf(buf, 10, "cpu%d", i); cpudir = oprofilefs_mkdir(sb, dir, buf); /* Strictly speaking access to these ulongs is racy, --- linux-2.6.6-rc1/drivers/parisc/ccio-dma.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/parisc/ccio-dma.c 2004-04-18 22:26:02.383346768 -0700 @@ -44,7 +44,6 @@ #include #include /* for L1_CACHE_BYTES */ #include -#include #include #include #include --- linux-2.6.6-rc1/drivers/parisc/ccio-rm-dma.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/parisc/ccio-rm-dma.c 2004-04-18 22:26:02.383346768 -0700 @@ -40,7 +40,6 @@ #include #include -#include #include #include --- linux-2.6.6-rc1/drivers/pci/hotplug/acpiphp_glue.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/hotplug/acpiphp_glue.c 2004-04-18 22:25:24.856051784 -0700 @@ -1243,40 +1243,38 @@ int acpiphp_disable_slot(struct acpiphp_ /** * acpiphp_check_bridge - re-enumerate devices + * + * Iterate over all slots under this bridge and make sure that if a + * card is present they are enabled, and if not they are disabled. */ int acpiphp_check_bridge(struct acpiphp_bridge *bridge) { struct acpiphp_slot *slot; - unsigned int sta; int retval = 0; int enabled, disabled; enabled = disabled = 0; for (slot = bridge->slots; slot; slot = slot->next) { - sta = get_slot_status(slot); + unsigned int status = get_slot_status(slot); if (slot->flags & SLOT_ENABLED) { - /* if enabled but not present, disable */ - if (sta != ACPI_STA_ALL) { - retval = acpiphp_disable_slot(slot); - if (retval) { - err("Error occurred in enabling\n"); - up(&slot->crit_sect); - goto err_exit; - } - disabled++; + if (status == ACPI_STA_ALL) + continue; + retval = acpiphp_disable_slot(slot); + if (retval) { + err("Error occurred in disabling\n"); + goto err_exit; } + disabled++; } else { - /* if disabled but present, enable */ - if (sta == ACPI_STA_ALL) { - retval = acpiphp_enable_slot(slot); - if (retval) { - err("Error occurred in enabling\n"); - up(&slot->crit_sect); - goto err_exit; - } - enabled++; + if (status != ACPI_STA_ALL) + continue; + retval = acpiphp_enable_slot(slot); + if (retval) { + err("Error occurred in enabling\n"); + goto err_exit; } + enabled++; } } --- linux-2.6.6-rc1/drivers/pci/hotplug/acpiphp_pci.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/hotplug/acpiphp_pci.c 2004-04-18 22:25:24.857051632 -0700 @@ -198,106 +198,42 @@ static int init_config_space (struct acp /* detect_used_resource - subtract resource under dev from bridge */ static int detect_used_resource (struct acpiphp_bridge *bridge, struct pci_dev *dev) { - u32 bar, len; - u64 base; - u32 address[] = { - PCI_BASE_ADDRESS_0, - PCI_BASE_ADDRESS_1, - PCI_BASE_ADDRESS_2, - PCI_BASE_ADDRESS_3, - PCI_BASE_ADDRESS_4, - PCI_BASE_ADDRESS_5, - 0 - }; int count; - struct pci_resource *res; dbg("Device %s\n", pci_name(dev)); - for (count = 0; address[count]; count++) { /* for 6 BARs */ - pci_read_config_dword(dev, address[count], &bar); + for (count = 0; count < DEVICE_COUNT_RESOURCE; count++) { + struct pci_resource *res; + struct pci_resource **head; + unsigned long base = dev->resource[count].start; + unsigned long len = dev->resource[count].end - base + 1; + unsigned long flags = dev->resource[count].flags; - if (!bar) /* This BAR is not implemented */ + if (!flags) continue; - pci_write_config_dword(dev, address[count], 0xFFFFFFFF); - pci_read_config_dword(dev, address[count], &len); - - if (len & PCI_BASE_ADDRESS_SPACE_IO) { - /* This is IO */ - base = bar & 0xFFFFFFFC; - len = len & (PCI_BASE_ADDRESS_IO_MASK & 0xFFFF); - len = len & ~(len - 1); - - dbg("BAR[%d] %08x - %08x (IO)\n", count, (u32)base, (u32)base + len - 1); + dbg("BAR[%d] 0x%lx - 0x%lx (0x%lx)\n", count, base, + base + len - 1, flags); - spin_lock(&bridge->res_lock); - res = acpiphp_get_resource_with_base(&bridge->io_head, base, len); - spin_unlock(&bridge->res_lock); - if (res) - kfree(res); + if (flags & IORESOURCE_IO) { + head = &bridge->io_head; + } else if (flags & IORESOURCE_PREFETCH) { + head = &bridge->p_mem_head; } else { - /* This is Memory */ - base = bar & 0xFFFFFFF0; - if (len & PCI_BASE_ADDRESS_MEM_PREFETCH) { - /* pfmem */ - - len &= 0xFFFFFFF0; - len = ~len + 1; - - if (len & PCI_BASE_ADDRESS_MEM_TYPE_64) { /* takes up another dword */ - dbg("prefetch mem 64\n"); - count += 1; - } - dbg("BAR[%d] %08x - %08x (PMEM)\n", count, (u32)base, (u32)base + len - 1); - spin_lock(&bridge->res_lock); - res = acpiphp_get_resource_with_base(&bridge->p_mem_head, base, len); - spin_unlock(&bridge->res_lock); - if (res) - kfree(res); - } else { - /* regular memory */ - - len &= 0xFFFFFFF0; - len = ~len + 1; - - if (len & PCI_BASE_ADDRESS_MEM_TYPE_64) { - /* takes up another dword */ - dbg("mem 64\n"); - count += 1; - } - dbg("BAR[%d] %08x - %08x (MEM)\n", count, (u32)base, (u32)base + len - 1); - spin_lock(&bridge->res_lock); - res = acpiphp_get_resource_with_base(&bridge->mem_head, base, len); - spin_unlock(&bridge->res_lock); - if (res) - kfree(res); - } + head = &bridge->mem_head; } - pci_write_config_dword(dev, address[count], bar); + spin_lock(&bridge->res_lock); + res = acpiphp_get_resource_with_base(head, base, len); + spin_unlock(&bridge->res_lock); + if (res) + kfree(res); } return 0; } -/* detect_pci_resource_bus - subtract resource under pci_bus */ -static void detect_used_resource_bus(struct acpiphp_bridge *bridge, struct pci_bus *bus) -{ - struct list_head *l; - struct pci_dev *dev; - - list_for_each (l, &bus->devices) { - dev = pci_dev_b(l); - detect_used_resource(bridge, dev); - /* XXX recursive call */ - if (dev->subordinate) - detect_used_resource_bus(bridge, dev->subordinate); - } -} - - /** * acpiphp_detect_pci_resource - detect resources under bridge * @bridge: detect all resources already used under this bridge @@ -306,7 +242,13 @@ static void detect_used_resource_bus(str */ int acpiphp_detect_pci_resource (struct acpiphp_bridge *bridge) { - detect_used_resource_bus(bridge, bridge->pci_bus); + struct list_head *l; + struct pci_dev *dev; + + list_for_each (l, &bridge->pci_bus->devices) { + dev = pci_dev_b(l); + detect_used_resource(bridge, dev); + } return 0; } --- linux-2.6.6-rc1/drivers/pci/hotplug/pciehp_ctrl.c 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/pci/hotplug/pciehp_ctrl.c 2004-04-18 22:25:24.859051328 -0700 @@ -135,7 +135,7 @@ u8 pciehp_handle_switch_change(u8 hp_slo p_slot->hpc_ops->get_adapter_status(p_slot, &(func->presence_save)); p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); - if (!getstatus) { + if (getstatus) { /* * Switch opened */ @@ -1705,7 +1705,7 @@ int pciehp_enable_slot (struct slot *p_s } rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); - if (rc || !getstatus) { + if (rc || getstatus) { info("%s: latch open on slot(%x)\n", __FUNCTION__, p_slot->number); up(&p_slot->ctrl->crit_sect); return (0); @@ -1792,7 +1792,7 @@ int pciehp_disable_slot (struct slot *p_ } ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); - if (ret || !getstatus) { + if (ret || getstatus) { info("%s: latch open on slot(%x)\n", __FUNCTION__, p_slot->number); up(&p_slot->ctrl->crit_sect); return (0); --- linux-2.6.6-rc1/drivers/pci/hotplug/pciehp_hpc.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/hotplug/pciehp_hpc.c 2004-04-18 22:25:24.862050872 -0700 @@ -37,6 +37,7 @@ #include #include #include +#include "../pci.h" #include "pciehp.h" #ifdef DEBUG @@ -315,12 +316,13 @@ static int pcie_write_cmd(struct slot *s dbg("%s : CMD_COMPLETED not clear after 1 sec.\n", __FUNCTION__); } - retval = hp_register_write_word(php_ctlr->pci_dev, SLOT_CTRL, cmd); + dbg("%s: Before hp_register_write_word SLOT_CTRL %x\n", __FUNCTION__, cmd); + retval = hp_register_write_word(php_ctlr->pci_dev, SLOT_CTRL, cmd | CMD_CMPL_INTR_ENABLE); if (retval) { err("%s : hp_register_write_word SLOT_CTRL failed\n", __FUNCTION__); return retval; } - dbg("%s : hp_register_write_word SLOT_CTRL %x\n", __FUNCTION__, cmd); + dbg("%s : hp_register_write_word SLOT_CTRL %x\n", __FUNCTION__, cmd | CMD_CMPL_INTR_ENABLE); dbg("%s : Exit\n", __FUNCTION__); DBG_LEAVE_ROUTINE @@ -918,13 +920,32 @@ static irqreturn_t pcie_isr(int IRQ, voi return IRQ_NONE; } - temp_word = (temp_word & ~HP_INTR_ENABLE) | 0x00; + dbg("%s: Set Mask Hot-plug Interrupt Enable\n", __FUNCTION__); + dbg("%s: hp_register_read_word SLOT_CTRL with value %x\n", __FUNCTION__, temp_word); + temp_word = (temp_word & ~HP_INTR_ENABLE & ~CMD_CMPL_INTR_ENABLE) | 0x00; rc = hp_register_write_word(php_ctlr->pci_dev, SLOT_CTRL, temp_word); if (rc) { err("%s : hp_register_write_word SLOT_CTRL failed\n", __FUNCTION__); return IRQ_NONE; } + dbg("%s: hp_register_write_word SLOT_CTRL with value %x\n", __FUNCTION__, temp_word); + + rc = hp_register_read_word(php_ctlr->pci_dev, SLOT_STATUS, slot_status); + if (rc) { + err("%s : hp_register_read_word SLOT_STATUS failed\n", __FUNCTION__); + return IRQ_NONE; + } + dbg("%s: hp_register_read_word SLOT_STATUS with value %x\n", __FUNCTION__, slot_status); + + /* Clear command complete interrupt caused by this write */ + temp_word = 0x1f; + rc = hp_register_write_word(php_ctlr->pci_dev, SLOT_STATUS, temp_word); + if (rc) { + err("%s : hp_register_write_word SLOT_STATUS failed\n", __FUNCTION__); + return IRQ_NONE; + } + dbg("%s: hp_register_write_word SLOT_STATUS with value %x\n", __FUNCTION__, temp_word); } if (intr_loc & CMD_COMPLETED) { @@ -949,7 +970,7 @@ static irqreturn_t pcie_isr(int IRQ, voi hp_slot, php_ctlr->callback_instance_id); /* Clear all events after serving them */ - temp_word = slot_status | 0xff; + temp_word = 0x1F; rc = hp_register_write_word(php_ctlr->pci_dev, SLOT_STATUS, temp_word); if (rc) { err("%s : hp_register_write_word SLOT_STATUS failed\n", __FUNCTION__); @@ -963,6 +984,8 @@ static irqreturn_t pcie_isr(int IRQ, voi return IRQ_NONE; } + dbg("%s: Unmask Hot-plug Interrupt Enable\n", __FUNCTION__); + dbg("%s: hp_register_read_word SLOT_CTRL with value %x\n", __FUNCTION__, temp_word); temp_word = (temp_word & ~HP_INTR_ENABLE) | HP_INTR_ENABLE; rc = hp_register_write_word(php_ctlr->pci_dev, SLOT_CTRL, temp_word); @@ -970,6 +993,23 @@ static irqreturn_t pcie_isr(int IRQ, voi err("%s : hp_register_write_word SLOT_CTRL failed\n", __FUNCTION__); return IRQ_NONE; } + dbg("%s: hp_register_write_word SLOT_CTRL with value %x\n", __FUNCTION__, temp_word); + + rc = hp_register_read_word(php_ctlr->pci_dev, SLOT_STATUS, slot_status); + if (rc) { + err("%s : hp_register_read_word SLOT_STATUS failed\n", __FUNCTION__); + return IRQ_NONE; + } + dbg("%s: hp_register_read_word SLOT_STATUS with value %x\n", __FUNCTION__, slot_status); + + /* Clear command complete interrupt caused by this write */ + temp_word = 0x1F; + rc = hp_register_write_word(php_ctlr->pci_dev, SLOT_STATUS, temp_word); + if (rc) { + err("%s : hp_register_write_word SLOT_STATUS failed\n", __FUNCTION__); + return IRQ_NONE; + } + dbg("%s: hp_register_write_word SLOT_STATUS with value %x\n", __FUNCTION__, temp_word); } return IRQ_HANDLED; @@ -1330,7 +1370,7 @@ int pcie_init(struct controller * ctrl, } dbg("%s: SLOT_CTRL %x value read %x\n", __FUNCTION__, SLOT_CTRL, temp_word); - temp_word = (temp_word & ~HP_INTR_ENABLE) | 0x00; + temp_word = (temp_word & ~HP_INTR_ENABLE & ~CMD_CMPL_INTR_ENABLE) | 0x00; rc = hp_register_write_word(pdev, SLOT_CTRL, temp_word); if (rc) { @@ -1346,12 +1386,13 @@ int pcie_init(struct controller * ctrl, } dbg("%s: Mask HPIE SLOT_STATUS offset %x reads slot_status %x\n", __FUNCTION__, SLOT_STATUS, slot_status); - rc = hp_register_write_word(php_ctlr->pci_dev, SLOT_STATUS, slot_status); + temp_word = 0x1F; /* Clear all events */ + rc = hp_register_write_word(php_ctlr->pci_dev, SLOT_STATUS, temp_word); if (rc) { err("%s : hp_register_write_word SLOT_STATUS failed\n", __FUNCTION__); goto abort_free_ctlr; } - dbg("%s: SLOT_STATUS offset %x writes slot_status %x\n", __FUNCTION__, SLOT_STATUS, slot_status); + dbg("%s: SLOT_STATUS offset %x writes slot_status %x\n", __FUNCTION__, SLOT_STATUS, temp_word); if (pciehp_poll_mode) {/* Install interrupt polling code */ /* Install and start the interrupt polling timer */ @@ -1359,15 +1400,16 @@ int pcie_init(struct controller * ctrl, start_int_poll_timer( php_ctlr, 10 ); /* start with 10 second delay */ } else { /* Installs the interrupt handler */ -#ifdef CONFIG_PCI_USE_VECTOR - rc = pci_enable_msi(pdev); - if (rc) { - err("Can't get msi for the hotplug controller\n"); - dbg("%s: rc = %x\n", __FUNCTION__, rc); - goto abort_free_ctlr; + dbg("%s: pciehp_msi_quirk = %x\n", __FUNCTION__, pciehp_msi_quirk); + if (!pciehp_msi_quirk) { + rc = pci_enable_msi(pdev); + if (rc) { + info("Can't get msi for the hotplug controller\n"); + info("Use INTx for the hotplug controller\n"); + dbg("%s: rc = %x\n", __FUNCTION__, rc); + } else + php_ctlr->irq = pdev->irq; } - php_ctlr->irq = pdev->irq; -#endif rc = request_irq(php_ctlr->irq, pcie_isr, SA_SHIRQ, MY_NAME, (void *) ctrl); dbg("%s: request_irq %d for hpc%d (returns %d)\n", __FUNCTION__, php_ctlr->irq, ctlr_seq_num, rc); if (rc) { @@ -1384,7 +1426,7 @@ int pcie_init(struct controller * ctrl, dbg("%s: SLOT_CTRL %x value read %x\n", __FUNCTION__, SLOT_CTRL, temp_word); intr_enable = ATTN_BUTTN_ENABLE | PWR_FAULT_DETECT_ENABLE | MRL_DETECT_ENABLE | - PRSN_DETECT_ENABLE | CMD_CMPL_INTR_ENABLE; + PRSN_DETECT_ENABLE; temp_word = (temp_word & ~intr_enable) | intr_enable; @@ -1402,6 +1444,21 @@ int pcie_init(struct controller * ctrl, goto abort_free_ctlr; } dbg("%s : Unmask HPIE hp_register_write_word SLOT_CTRL with %x\n", __FUNCTION__, temp_word); + rc = hp_register_read_word(php_ctlr->pci_dev, SLOT_STATUS, slot_status); + if (rc) { + err("%s : hp_register_read_word SLOT_STATUS failed\n", __FUNCTION__); + goto abort_free_ctlr; + } + dbg("%s: Unmask HPIE SLOT_STATUS offset %x reads slot_status %x\n", __FUNCTION__, + SLOT_STATUS, slot_status); + + temp_word = 0x1F; /* Clear all events */ + rc = hp_register_write_word(php_ctlr->pci_dev, SLOT_STATUS, temp_word); + if (rc) { + err("%s : hp_register_write_word SLOT_STATUS failed\n", __FUNCTION__); + goto abort_free_ctlr; + } + dbg("%s: SLOT_STATUS offset %x writes slot_status %x\n", __FUNCTION__, SLOT_STATUS, temp_word); /* Add this HPC instance into the HPC list */ spin_lock(&list_lock); --- linux-2.6.6-rc1/drivers/pci/hotplug/pciehp_pci.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/hotplug/pciehp_pci.c 2004-04-18 22:25:24.863050720 -0700 @@ -192,7 +192,6 @@ int pciehp_save_config(struct controller for (device = FirstSupported; device <= LastSupported; device++) { ID = 0xFFFFFFFF; rc = pci_bus_read_config_dword(pci_bus, PCI_DEVFN(device, 0), PCI_VENDOR_ID, &ID); - dbg("%s: ID = %x\n", __FUNCTION__, ID); if (ID != 0xFFFFFFFF) { /* device in slot */ dbg("%s: ID = %x\n", __FUNCTION__, ID); @@ -325,7 +324,6 @@ int pciehp_save_config(struct controller new_slot->presence_save = 0; new_slot->switch_save = 0; } - dbg("%s: End of For loop\n", __FUNCTION__); } /* End of FOR loop */ dbg("%s: Exit\n", __FUNCTION__); --- linux-2.6.6-rc1/drivers/pci/hotplug/pci_hotplug.h 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/pci/hotplug/pci_hotplug.h 2004-04-18 22:25:24.858051480 -0700 @@ -43,7 +43,7 @@ enum pci_bus_speed { PCI_SPEED_100MHz_PCIX_266 = 0x0a, PCI_SPEED_133MHz_PCIX_266 = 0x0b, PCI_SPEED_66MHz_PCIX_533 = 0x11, - PCI_SPEED_100MHz_PCIX_533 = 0X12, + PCI_SPEED_100MHz_PCIX_533 = 0x12, PCI_SPEED_133MHz_PCIX_533 = 0x13, PCI_SPEED_UNKNOWN = 0xff, }; --- linux-2.6.6-rc1/drivers/pci/hotplug/rpadlpar_core.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/hotplug/rpadlpar_core.c 2004-04-18 22:25:24.863050720 -0700 @@ -79,25 +79,18 @@ static struct device_node *find_php_slot return np; } -static inline struct hotplug_slot *find_php_slot(char *drc_name) -{ - struct kobject *k; - - k = kset_find_obj(&pci_hotplug_slots_subsys.kset, drc_name); - if (!k) - return NULL; - - return to_hotplug_slot(k); -} - static struct slot *find_slot(char *drc_name) { - struct hotplug_slot *php_slot = find_php_slot(drc_name); + struct list_head *tmp, *n; + struct slot *slot; - if (!php_slot) - return NULL; + list_for_each_safe(tmp, n, &rpaphp_slot_head) { + slot = list_entry(tmp, struct slot, rpaphp_slot_list); + if (strcmp(slot->location, drc_name) == 0) + return slot; + } - return (struct slot *) php_slot->private; + return NULL; } static void rpadlpar_claim_one_bus(struct pci_bus *b) --- linux-2.6.6-rc1/drivers/pci/hotplug/rpaphp_core.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/hotplug/rpaphp_core.c 2004-04-18 22:25:24.864050568 -0700 @@ -246,17 +246,14 @@ static int get_cur_bus_speed(struct hotp int rpaphp_remove_slot(struct slot *slot) { int retval = 0; - char *rm_link; + struct hotplug_slot *php_slot = slot->hotplug_slot; - dbg("%s - Entry: slot[%s]\n", __FUNCTION__, slot->name); - if (slot->dev_type == PCI_DEV) - rm_link = pci_name(slot->bridge); - else - rm_link = strstr(slot->dn->full_name, "@"); - - sysfs_remove_link(slot->hotplug_slot->kobj.parent, rm_link); list_del(&slot->rpaphp_slot_list); - retval = pci_hp_deregister(slot->hotplug_slot); + + /* remove "php_location" file */ + rpaphp_sysfs_remove_attr_location(php_slot); + + retval = pci_hp_deregister(php_slot); if (retval) err("Problem unregistering a slot %s\n", slot->name); @@ -380,14 +377,7 @@ static void cleanup_slots(void) */ list_for_each_safe(tmp, n, &rpaphp_slot_head) { - char *rm_link; - slot = list_entry(tmp, struct slot, rpaphp_slot_list); - if (slot->dev_type == PCI_DEV) - rm_link = pci_name(slot->bridge); - else - rm_link = strstr(slot->dn->full_name, "@"); - sysfs_remove_link(slot->hotplug_slot->kobj.parent, rm_link); list_del(&slot->rpaphp_slot_list); pci_hp_deregister(slot->hotplug_slot); } @@ -478,3 +468,4 @@ module_exit(rpaphp_exit); EXPORT_SYMBOL_GPL(rpaphp_add_slot); EXPORT_SYMBOL_GPL(rpaphp_remove_slot); +EXPORT_SYMBOL_GPL(rpaphp_slot_head); --- linux-2.6.6-rc1/drivers/pci/hotplug/rpaphp.h 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/hotplug/rpaphp.h 2004-04-18 22:25:24.864050568 -0700 @@ -85,6 +85,7 @@ struct slot { u32 type; u32 power_domain; char *name; + char *location; struct device_node *dn; /* slot's device_node in OFDT */ /* dn has phb info */ struct pci_dev *bridge; /* slot's pci_dev in pci_devices */ @@ -129,5 +130,6 @@ extern struct slot *alloc_slot_struct(st extern int register_slot(struct slot *slot); extern int rpaphp_get_power_status(struct slot *slot, u8 * value); extern int rpaphp_set_attention_status(struct slot *slot, u8 status); +extern void rpaphp_sysfs_remove_attr_location(struct hotplug_slot *slot); #endif /* _PPC64PHP_H */ --- linux-2.6.6-rc1/drivers/pci/hotplug/rpaphp_pci.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/hotplug/rpaphp_pci.c 2004-04-18 22:25:24.865050416 -0700 @@ -304,7 +304,6 @@ static int setup_pci_hotplug_slot_info(s if (slot->hotplug_slot->info->adapter_status == NOT_VALID) { dbg("%s: NOT_VALID: skip dn->full_name=%s\n", __FUNCTION__, slot->dn->full_name); - dealloc_slot_struct(slot); return (-1); } return (0); --- linux-2.6.6-rc1/drivers/pci/hotplug/rpaphp_slot.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/hotplug/rpaphp_slot.c 2004-04-18 22:25:24.866050264 -0700 @@ -29,8 +29,36 @@ #include #include "rpaphp.h" -/* free up the memory user by a slot */ +static ssize_t location_read_file (struct hotplug_slot *php_slot, char *buf) +{ + char *value; + int retval = -ENOENT; + struct slot *slot = (struct slot *)php_slot->private; + + if (!slot) + return retval; + + value = slot->location; + retval = sprintf (buf, "%s\n", value); + return retval; +} +static struct hotplug_slot_attribute hotplug_slot_attr_location = { + .attr = {.name = "phy_location", .mode = S_IFREG | S_IRUGO}, + .show = location_read_file, +}; + +static void rpaphp_sysfs_add_attr_location (struct hotplug_slot *slot) +{ + sysfs_create_file(&slot->kobj, &hotplug_slot_attr_location.attr); +} + +void rpaphp_sysfs_remove_attr_location (struct hotplug_slot *slot) +{ + sysfs_remove_file(&slot->kobj, &hotplug_slot_attr_location.attr); +} + +/* free up the memory user by a slot */ static void rpaphp_release_slot(struct hotplug_slot *hotplug_slot) { struct slot *slot = hotplug_slot? (struct slot *) hotplug_slot->private:NULL; @@ -76,17 +104,25 @@ struct slot *alloc_slot_struct(struct de return (NULL); } memset(slot->hotplug_slot->info, 0, sizeof (struct hotplug_slot_info)); - slot->hotplug_slot->name = kmalloc(strlen(drc_name) + 1, GFP_KERNEL); + slot->hotplug_slot->name = kmalloc(BUS_ID_SIZE + 1, GFP_KERNEL); if (!slot->hotplug_slot->name) { kfree(slot->hotplug_slot->info); kfree(slot->hotplug_slot); kfree(slot); return (NULL); } + slot->location = kmalloc(strlen(drc_name) + 1, GFP_KERNEL); + if (!slot->location) { + kfree(slot->hotplug_slot->info); + kfree(slot->hotplug_slot->name); + kfree(slot->hotplug_slot); + kfree(slot); + return (NULL); + } slot->name = slot->hotplug_slot->name; slot->dn = dn; slot->index = drc_index; - strcpy(slot->name, drc_name); + strcpy(slot->location, drc_name); slot->power_domain = power_domain; slot->magic = SLOT_MAGIC; slot->hotplug_slot->private = slot; @@ -110,41 +146,9 @@ int register_slot(struct slot *slot) rpaphp_release_slot(slot->hotplug_slot); return (retval); } - switch (slot->dev_type) { - case PCI_DEV: - /* create symlink between slot->name and it's bus_id */ - - dbg("%s: sysfs_create_link: %s --> %s\n", __FUNCTION__, - pci_name(slot->bridge), slot->name); - - retval = sysfs_create_link(slot->hotplug_slot->kobj.parent, - &slot->hotplug_slot->kobj, - pci_name(slot->bridge)); - if (retval) { - err("sysfs_create_link failed with error %d\n", retval); - rpaphp_release_slot(slot->hotplug_slot); - return (retval); - } - break; - case VIO_DEV: - /* create symlink between slot->name and it's uni-address */ - vio_uni_addr = strchr(slot->dn->full_name, '@'); - if (!vio_uni_addr) - return (1); - dbg("%s: sysfs_create_link: %s --> %s\n", __FUNCTION__, - vio_uni_addr, slot->name); - retval = sysfs_create_link(slot->hotplug_slot->kobj.parent, - &slot->hotplug_slot->kobj, - vio_uni_addr); - if (retval) { - err("sysfs_create_link failed with error %d\n", retval); - rpaphp_release_slot(slot->hotplug_slot); - return (retval); - } - break; - default: - return (1); - } + + /* create "phy_locatoin" file */ + rpaphp_sysfs_add_attr_location(slot->hotplug_slot); /* add slot to our internal list */ dbg("%s adding slot[%s] to rpaphp_slot_list\n", --- linux-2.6.6-rc1/drivers/pci/hotplug/shpchp_ctrl.c 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/pci/hotplug/shpchp_ctrl.c 2004-04-18 22:25:24.868049960 -0700 @@ -138,7 +138,7 @@ u8 shpchp_handle_switch_change(u8 hp_slo p_slot->hpc_ops->get_adapter_status(p_slot, &(func->presence_save)); p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); - if (!getstatus) { + if (getstatus) { /* * Switch opened */ @@ -1219,7 +1219,7 @@ static u32 board_added(struct pci_func * up(&ctrl->crit_sect); } } else { - if ((bus_speed > 0x4) || (max_bus_speed > 0x4)) { + if (bus_speed > 0x4) { err("%s: speed of bus %x and adapter %x mismatch\n", __FUNCTION__, bus_speed, adapter_speed); return WRONG_BUS_FREQUENCY; } @@ -1302,7 +1302,7 @@ static u32 board_added(struct pci_func * up(&ctrl->crit_sect); } } else { - if ((bus_speed > 0x2) || (max_bus_speed > 0x2)) { + if (bus_speed > 0x2) { err("%s: speed of bus %x and adapter %x mismatch\n", __FUNCTION__, bus_speed, adapter_speed); return WRONG_BUS_FREQUENCY; } @@ -2107,7 +2107,7 @@ int shpchp_enable_slot (struct slot *p_s return (0); } rc = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); - if (rc || !getstatus) { + if (rc || getstatus) { info("%s: latch open on slot(%x)\n", __FUNCTION__, p_slot->number); up(&p_slot->ctrl->crit_sect); return (0); @@ -2192,7 +2192,7 @@ int shpchp_disable_slot (struct slot *p_ return (0); } ret = p_slot->hpc_ops->get_latch_status(p_slot, &getstatus); - if (ret || !getstatus) { + if (ret || getstatus) { info("%s: latch open on slot(%x)\n", __FUNCTION__, p_slot->number); up(&p_slot->ctrl->crit_sect); return (0); --- linux-2.6.6-rc1/drivers/pci/hotplug/shpchp_hpc.c 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/pci/hotplug/shpchp_hpc.c 2004-04-18 22:25:24.869049808 -0700 @@ -104,12 +104,12 @@ #define PCIX_66MHZ_ECC 0x5 #define PCIX_100MHZ_ECC 0x6 #define PCIX_133MHZ_ECC 0x7 -#define PCIX_66MHZ_266 0x8 -#define PCIX_100MHZ_266 0x9 -#define PCIX_133MHZ_266 0x0a -#define PCIX_66MHZ_533 0x0b -#define PCIX_100MHZ_533 0x0c -#define PCIX_133MHZ_533 0x0d +#define PCIX_66MHZ_266 0x9 +#define PCIX_100MHZ_266 0xa +#define PCIX_133MHZ_266 0xb +#define PCIX_66MHZ_533 0x11 +#define PCIX_100MHZ_533 0x12 +#define PCIX_133MHZ_533 0x13 /* Slot Configuration */ #define SLOT_NUM 0x0000001F @@ -464,7 +464,8 @@ static int hpc_get_latch_status(struct s slot_reg = readl(php_ctlr->creg + SLOT1 + 4*(slot->hp_slot)); slot_status = (u16)slot_reg; - *status = ((slot_status & 0x0100) == 0) ? 1 : 0; + *status = ((slot_status & 0x0100) == 0) ? 0 : 1; /* 0 -> close; 1 -> open */ + DBG_LEAVE_ROUTINE return 0; @@ -1441,6 +1442,7 @@ int shpc_init(struct controller * ctrl, err("%s : shpc_cap_offset == 0\n", __FUNCTION__); goto abort_free_ctlr; } + dbg("%s: shpc_cap_offset = %x\n", __FUNCTION__, shpc_cap_offset); rc = pci_write_config_byte(pdev, (u8)shpc_cap_offset + DWORD_SELECT , BASE_OFFSET); if (rc) { @@ -1547,15 +1549,13 @@ int shpc_init(struct controller * ctrl, start_int_poll_timer( php_ctlr, 10 ); /* start with 10 second delay */ } else { /* Installs the interrupt handler */ -#ifdef CONFIG_PCI_USE_VECTOR rc = pci_enable_msi(pdev); if (rc) { - err("Can't get msi for the hotplug controller\n"); + info("Can't get msi for the hotplug controller\n"); + info("Use INTx for the hotplug controller\n"); dbg("%s: rc = %x\n", __FUNCTION__, rc); - goto abort_free_ctlr; - } - php_ctlr->irq = pdev->irq; -#endif + } else + php_ctlr->irq = pdev->irq; rc = request_irq(php_ctlr->irq, shpc_isr, SA_SHIRQ, MY_NAME, (void *) ctrl); dbg("%s: request_irq %d for hpc%d (returns %d)\n", __FUNCTION__, php_ctlr->irq, ctlr_seq_num, rc); --- linux-2.6.6-rc1/drivers/pci/hotplug/shpchprm_acpi.c 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/pci/hotplug/shpchprm_acpi.c 2004-04-18 22:25:24.870049656 -0700 @@ -1267,7 +1267,8 @@ static int print_acpi_resources (struct int shpchprm_print_pirt(void) { dbg("SHPCHPRM ACPI Slots\n"); - print_acpi_resources (acpi_bridges_head); + if (acpi_bridges_head) + print_acpi_resources (acpi_bridges_head); return 0; } --- linux-2.6.6-rc1/drivers/pci/hotplug/shpchprm_legacy.c 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/pci/hotplug/shpchprm_legacy.c 2004-04-18 22:25:24.871049504 -0700 @@ -96,23 +96,6 @@ static void *detect_HRT_floating_pointer return fp; } -#if link_available -/* - * Links available memory, IO, and IRQ resources for programming - * devices which may be added to the system - * - * Returns 0 if success - */ -static int -link_available_resources ( - struct controller *ctrl, - struct pci_func *func, - int index ) -{ - return shpchp_save_used_resources (ctrl, func, !DISABLE_CARD); -} -#endif - /* * shpchprm_find_available_resources * @@ -345,19 +328,6 @@ int shpchprm_find_available_resources(st } } -#if link_available - ++index; - - while (index < 8) { - if (((func = shpchp_slot_find(primary_bus, dev_func >> 3, index)) != NULL) && populated_slot) - rc = link_available_resources(ctrl, func, index); - - if (rc) - break; - - ++index; - } -#endif i--; one_slot += sizeof(struct slot_rt); } --- linux-2.6.6-rc1/drivers/pci/Kconfig 2003-06-14 12:18:05.000000000 -0700 +++ 25/drivers/pci/Kconfig 2004-04-18 22:25:24.855051936 -0700 @@ -1,6 +1,25 @@ # # PCI configuration # +config PCI_USE_VECTOR + bool "Vector-based interrupt indexing (MSI)" + depends on (X86_LOCAL_APIC && X86_IO_APIC && !X86_64) || IA64 + default n + help + This replaces the current existing IRQ-based index interrupt scheme + with the vector-base index scheme. The advantages of vector base + over IRQ base are listed below: + 1) Support MSI implementation. + 2) Support future IOxAPIC hotplug + + Note that this allows the device drivers to enable MSI, Message + Signaled Interrupt, on all MSI capable device functions detected. + Message Signal Interrupt enables an MSI-capable hardware device to + send an inbound Memory Write on its PCI bus instead of asserting + IRQ signal on device IRQ pin. + + If you don't know what to do here, say N. + config PCI_LEGACY_PROC bool "Legacy /proc/pci interface" depends on PCI --- linux-2.6.6-rc1/drivers/pci/pci.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/pci.c 2004-04-18 22:25:35.140488312 -0700 @@ -111,21 +111,15 @@ static int __pci_bus_find_cap(struct pci * support it. Possible values for @cap: * * %PCI_CAP_ID_PM Power Management - * * %PCI_CAP_ID_AGP Accelerated Graphics Port - * * %PCI_CAP_ID_VPD Vital Product Data - * * %PCI_CAP_ID_SLOTID Slot Identification - * * %PCI_CAP_ID_MSI Message Signalled Interrupts - * * %PCI_CAP_ID_CHSWP CompactPCI HotSwap - * * %PCI_CAP_ID_PCIX PCI-X + * %PCI_CAP_ID_EXP PCI Express */ -int -pci_find_capability(struct pci_dev *dev, int cap) +int pci_find_capability(struct pci_dev *dev, int cap) { return __pci_bus_find_cap(dev->bus, dev->devfn, dev->hdr_type, cap); } @@ -153,6 +147,54 @@ int pci_bus_find_capability(struct pci_b } /** + * pci_find_ext_capability - Find an extended capability + * @dev: PCI device to query + * @cap: capability code + * + * Returns the address of the requested extended capability structure + * within the device's PCI configuration space or 0 if the device does + * not support it. Possible values for @cap: + * + * %PCI_EXT_CAP_ID_ERR Advanced Error Reporting + * %PCI_EXT_CAP_ID_VC Virtual Channel + * %PCI_EXT_CAP_ID_DSN Device Serial Number + * %PCI_EXT_CAP_ID_PWR Power Budgeting + */ +int pci_find_ext_capability(struct pci_dev *dev, int cap) +{ + u32 header; + int ttl = 480; /* 3840 bytes, minimum 8 bytes per capability */ + int pos = 0x100; + + if (dev->cfg_size <= 256) + return 0; + + if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL) + return 0; + + /* + * If we have no capabilities, this is indicated by cap ID, + * cap version and next pointer all being 0. + */ + if (header == 0) + return 0; + + while (ttl-- > 0) { + if (PCI_EXT_CAP_ID(header) == cap) + return pos; + + pos = PCI_EXT_CAP_NEXT(header); + if (pos < 0x100) + break; + + if (pci_read_config_dword(dev, pos, &header) != PCIBIOS_SUCCESSFUL) + break; + } + + return 0; +} + +/** * pci_find_parent_resource - return resource region of parent bus of given region * @dev: PCI device structure contains resources to be searched * @res: child resource record for which parent is sought @@ -205,6 +247,8 @@ pci_set_power_state(struct pci_dev *dev, int pm; u16 pmcsr; + might_sleep(); + /* bound the state we're entering */ if (state > 3) state = 3; @@ -658,6 +702,10 @@ pci_clear_mwi(struct pci_dev *dev) } } +#ifndef HAVE_ARCH_PCI_SET_DMA_MASK +/* + * These can be overridden by arch-specific implementations + */ int pci_set_dma_mask(struct pci_dev *dev, u64 mask) { @@ -690,6 +738,7 @@ pci_set_consistent_dma_mask(struct pci_d return 0; } +#endif static int __devinit pci_init(void) { --- linux-2.6.6-rc1/drivers/pci/pci.h 2003-07-10 18:50:31.000000000 -0700 +++ 25/drivers/pci/pci.h 2004-04-18 22:25:24.873049200 -0700 @@ -60,3 +60,5 @@ extern int pci_visit_dev(struct pci_visi /* Lock for read/write access to pci device and bus lists */ extern spinlock_t pci_bus_lock; + +extern int pciehp_msi_quirk; --- linux-2.6.6-rc1/drivers/pci/pci-sysfs.c 2004-02-17 20:48:44.000000000 -0800 +++ 25/drivers/pci/pci-sysfs.c 2004-04-18 22:25:24.872049352 -0700 @@ -1,8 +1,8 @@ /* * drivers/pci/pci-sysfs.c * - * (C) Copyright 2002 Greg Kroah-Hartman - * (C) Copyright 2002 IBM Corp. + * (C) Copyright 2002-2004 Greg Kroah-Hartman + * (C) Copyright 2002-2004 IBM Corp. * (C) Copyright 2003 Matthew Wilcox * (C) Copyright 2003 Hewlett-Packard * @@ -71,7 +71,7 @@ pci_read_config(struct kobject *kobj, ch /* Several chips lock up trying to read undefined config space */ if (capable(CAP_SYS_ADMIN)) { - size = 256; + size = dev->cfg_size; } else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) { size = 128; } @@ -123,10 +123,10 @@ pci_write_config(struct kobject *kobj, c unsigned int size = count; loff_t init_off = off; - if (off > 256) + if (off > dev->cfg_size) return 0; - if (off + count > 256) { - size = 256 - off; + if (off + count > dev->cfg_size) { + size = dev->cfg_size - off; count = size; } @@ -167,6 +167,17 @@ static struct bin_attribute pci_config_a .write = pci_write_config, }; +static struct bin_attribute pcie_config_attr = { + .attr = { + .name = "config", + .mode = S_IRUGO | S_IWUSR, + .owner = THIS_MODULE, + }, + .size = 4096, + .read = pci_read_config, + .write = pci_write_config, +}; + void pci_create_sysfs_dev_files (struct pci_dev *pdev) { struct device *dev = &pdev->dev; @@ -179,7 +190,11 @@ void pci_create_sysfs_dev_files (struct device_create_file (dev, &dev_attr_class); device_create_file (dev, &dev_attr_irq); device_create_file (dev, &dev_attr_resource); - sysfs_create_bin_file(&dev->kobj, &pci_config_attr); + + if (pdev->cfg_size < 4096) + sysfs_create_bin_file(&dev->kobj, &pci_config_attr); + else + sysfs_create_bin_file(&dev->kobj, &pcie_config_attr); /* add platform-specific attributes */ pcibios_add_platform_entries(pdev); --- linux-2.6.6-rc1/drivers/pci/probe.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/probe.c 2004-04-18 22:25:24.874049048 -0700 @@ -18,6 +18,8 @@ #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ #define CARDBUS_RESERVE_BUSNR 3 +#define PCI_CFG_SPACE_SIZE 256 +#define PCI_CFG_SPACE_EXP_SIZE 4096 /* Ugh. Need to stop exporting this to modules. */ LIST_HEAD(pci_root_buses); @@ -530,6 +532,43 @@ static void pci_release_dev(struct devic kfree(pci_dev); } +/** + * pci_cfg_space_size - get the configuration space size of the PCI device. + * + * Regular PCI devices have 256 bytes, but PCI-X 2 and PCI Express devices + * have 4096 bytes. Even if the device is capable, that doesn't mean we can + * access it. Maybe we don't have a way to generate extended config space + * accesses, or the device is behind a reverse Express bridge. So we try + * reading the dword at 0x100 which must either be 0 or a valid extended + * capability header. + */ +static int pci_cfg_space_size(struct pci_dev *dev) +{ + int pos; + u32 status; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) { + pos = pci_find_capability(dev, PCI_CAP_ID_PCIX); + if (!pos) + goto fail; + + pci_read_config_dword(dev, pos + PCI_X_STATUS, &status); + if (!(status & (PCI_X_STATUS_266MHZ | PCI_X_STATUS_533MHZ))) + goto fail; + } + + if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL) + goto fail; + if (status == 0xffffffff) + goto fail; + + return PCI_CFG_SPACE_EXP_SIZE; + + fail: + return PCI_CFG_SPACE_SIZE; +} + /* * Read the config data for a PCI device, sanity-check it * and fill in the dev structure... @@ -566,6 +605,7 @@ pci_scan_device(struct pci_bus *bus, int dev->multifunction = !!(hdr_type & 0x80); dev->vendor = l & 0xffff; dev->device = (l >> 16) & 0xffff; + dev->cfg_size = pci_cfg_space_size(dev); /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) set this higher, assuming the system even supports it. */ --- linux-2.6.6-rc1/drivers/pci/proc.c 2004-01-09 00:04:32.000000000 -0800 +++ 25/drivers/pci/proc.c 2004-04-18 22:25:24.875048896 -0700 @@ -16,16 +16,15 @@ #include #include -#define PCI_CFG_SPACE_SIZE 256 - static int proc_initialized; /* = 0 */ static loff_t proc_bus_pci_lseek(struct file *file, loff_t off, int whence) { loff_t new = -1; + struct inode *inode = file->f_dentry->d_inode; - down(&file->f_dentry->d_inode->i_sem); + down(&inode->i_sem); switch (whence) { case 0: new = off; @@ -34,14 +33,14 @@ proc_bus_pci_lseek(struct file *file, lo new = file->f_pos + off; break; case 2: - new = PCI_CFG_SPACE_SIZE + off; + new = inode->i_size + off; break; } - if (new < 0 || new > PCI_CFG_SPACE_SIZE) + if (new < 0 || new > inode->i_size) new = -EINVAL; else file->f_pos = new; - up(&file->f_dentry->d_inode->i_sem); + up(&inode->i_sem); return new; } @@ -61,7 +60,7 @@ proc_bus_pci_read(struct file *file, cha */ if (capable(CAP_SYS_ADMIN)) - size = PCI_CFG_SPACE_SIZE; + size = dev->cfg_size; else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) size = 128; else @@ -134,14 +133,15 @@ proc_bus_pci_write(struct file *file, co const struct proc_dir_entry *dp = PDE(ino); struct pci_dev *dev = dp->data; int pos = *ppos; + int size = dev->cfg_size; int cnt; - if (pos >= PCI_CFG_SPACE_SIZE) + if (pos >= size) return 0; - if (nbytes >= PCI_CFG_SPACE_SIZE) - nbytes = PCI_CFG_SPACE_SIZE; - if (pos + nbytes > PCI_CFG_SPACE_SIZE) - nbytes = PCI_CFG_SPACE_SIZE - pos; + if (nbytes >= size) + nbytes = size; + if (pos + nbytes > size) + nbytes = size - pos; cnt = nbytes; if (!access_ok(VERIFY_READ, buf, cnt)) @@ -403,7 +403,7 @@ int pci_proc_attach_device(struct pci_de return -ENOMEM; e->proc_fops = &proc_bus_pci_operations; e->data = dev; - e->size = PCI_CFG_SPACE_SIZE; + e->size = dev->cfg_size; return 0; } --- linux-2.6.6-rc1/drivers/pci/quirks.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pci/quirks.c 2004-04-18 22:25:24.875048896 -0700 @@ -868,6 +868,13 @@ static void __init quirk_intel_ide_combi } #endif /* CONFIG_SCSI_SATA */ +int pciehp_msi_quirk; + +static void __devinit quirk_pciehp_msi(struct pci_dev *pdev) +{ + pciehp_msi_quirk = 1; +} + /* * The main table of quirks. * @@ -984,6 +991,8 @@ static struct pci_fixup pci_fixups[] __d quirk_intel_ide_combined }, #endif /* CONFIG_SCSI_SATA */ + { PCI_FIXUP_FINAL, PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SMCH, quirk_pciehp_msi }, + { 0 } }; @@ -1008,3 +1017,5 @@ void pci_fixup_device(int pass, struct p pci_do_fixups(dev, pass, pcibios_fixups); pci_do_fixups(dev, pass, pci_fixups); } + +EXPORT_SYMBOL(pciehp_msi_quirk); --- linux-2.6.6-rc1/drivers/pcmcia/cistpl.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pcmcia/cistpl.c 2004-04-18 22:25:24.877048592 -0700 @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -78,7 +79,7 @@ static const u_int exponent[] = { /* Parameters that can be set with 'insmod' */ -#define INT_MODULE_PARM(n, v) static int n = v; MODULE_PARM(n, "i") +#define INT_MODULE_PARM(n, v) static int n = v; module_param(n, int, 0444) INT_MODULE_PARM(cis_width, 0); /* 16-bit CIS? */ --- linux-2.6.6-rc1/drivers/pcmcia/cs.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/pcmcia/cs.c 2004-04-18 22:25:24.878048440 -0700 @@ -94,7 +94,7 @@ MODULE_AUTHOR("David Hinds 1) ? "s" : "")); /* Set host options, build basic interrupt mask */ - if (irq_list[0] == -1) + if (irq_list_count == 0) mask = irq_mask; else - for (i = mask = 0; i < 16; i++) + for (i = mask = 0; i < irq_list_count; i++) mask |= (1< #include +#include #include #include #include @@ -56,7 +57,7 @@ /* Parameters that can be set with 'insmod' */ -#define INT_MODULE_PARM(n, v) static int n = v; MODULE_PARM(n, "i") +#define INT_MODULE_PARM(n, v) static int n = v; module_param(n, int, 0444) INT_MODULE_PARM(probe_mem, 1); /* memory probe? */ #ifdef CONFIG_PCMCIA_PROBE --- linux-2.6.6-rc1/drivers/pcmcia/tcic.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/drivers/pcmcia/tcic.c 2004-04-18 22:25:24.881047984 -0700 @@ -60,7 +60,6 @@ static int pc_debug; module_param(pc_debug, int, 0644); -MODULE_PARM(pc_debug, "i"); static const char *version = "tcic.c 1.111 2000/02/15 04:13:12 (David Hinds)"; @@ -91,7 +90,8 @@ static int do_scan = 1; /* Bit map of interrupts to choose from */ static u_int irq_mask = 0xffff; -static int irq_list[16] = { -1 }; +static int irq_list[16]; +static int irq_list_count; /* The card status change interrupt -- 0 means autoselect */ static int cs_irq; @@ -105,15 +105,15 @@ static int poll_quick = HZ/20; /* CCLK external clock time, in nanoseconds. 70 ns = 14.31818 MHz */ static int cycle_time = 70; -MODULE_PARM(tcic_base, "i"); -MODULE_PARM(ignore, "i"); -MODULE_PARM(do_scan, "i"); -MODULE_PARM(irq_mask, "i"); -MODULE_PARM(irq_list, "1-16i"); -MODULE_PARM(cs_irq, "i"); -MODULE_PARM(poll_interval, "i"); -MODULE_PARM(poll_quick, "i"); -MODULE_PARM(cycle_time, "i"); +module_param(tcic_base, int, 0444); +module_param(ignore, int, 0444); +module_param(do_scan, int, 0444); +module_param(irq_mask, int, 0444); +module_param_array(irq_list, int, irq_list_count, 0444); +module_param(cs_irq, int, 0444); +module_param(poll_interval, int, 0444); +module_param(poll_quick, int, 0444); +module_param(cycle_time, int, 0444); /*====================================================================*/ @@ -481,10 +481,10 @@ static int __init init_tcic(void) /* Build interrupt mask */ printk(", %d sockets\n" KERN_INFO " irq list (", sockets); - if (irq_list[0] == -1) + if (irq_list_count == 0) mask = irq_mask; else - for (i = mask = 0; i < 16; i++) + for (i = mask = 0; i < irq_list_count; i++) mask |= (1< Brad Strand - Copyright (C) 1999-2003 3ware Inc. + Copyright (C) 1999-2004 3ware Inc. Kernel compatiblity By: Andre Hedrick Non-Copyright (C) 2000 Andre Hedrick @@ -179,6 +179,8 @@ 1.02.00.036 - Increase character ioctl timeout to 60 seconds. 1.02.00.037 - Fix tw_ioctl() to handle all non-data ATA passthru cmds for 'smartmontools' support. + 1.26.00.038 - Roll driver minor version to 26 to denote kernel 2.6. + Add support for cmds_per_lun module parameter. */ #include @@ -205,6 +207,7 @@ MODULE_LICENSE("GPL"); #include #include #include +#include #include #include @@ -242,10 +245,15 @@ static struct file_operations tw_fops = }; /* Globals */ -char *tw_driver_version="1.02.00.037"; +char *tw_driver_version="1.26.00.038"; TW_Device_Extension *tw_device_extension_list[TW_MAX_SLOT]; int tw_device_extension_count = 0; static int twe_major = -1; +static int cmds_per_lun; + +/* Module parameters */ +module_param(cmds_per_lun, int, 0); +MODULE_PARM_DESC(cmds_per_lun, "Maximum commands per LUN"); /* Functions */ @@ -1141,14 +1149,6 @@ int tw_findcards(Scsi_Host_Template *tw_ /* Set card status as online */ tw_dev->online = 1; -#ifdef CONFIG_3W_XXXX_CMD_PER_LUN - tw_host->cmd_per_lun = CONFIG_3W_XXXX_CMD_PER_LUN; - if (tw_host->cmd_per_lun > TW_MAX_CMDS_PER_LUN) - tw_host->cmd_per_lun = TW_MAX_CMDS_PER_LUN; -#else - /* Use SHT cmd_per_lun here */ - tw_host->cmd_per_lun = TW_MAX_CMDS_PER_LUN; -#endif tw_dev->free_head = TW_Q_START; tw_dev->free_tail = TW_Q_START; tw_dev->free_wrap = TW_Q_LENGTH - 1; @@ -3386,13 +3386,13 @@ int tw_slave_configure(Scsi_Device *SDpt dprintk(KERN_WARNING "3w-xxxx: tw_slave_configure()\n"); -#ifdef CONFIG_3W_XXXX_CMD_PER_LUN - max_cmds = CONFIG_3W_XXXX_CMD_PER_LUN; - if (max_cmds > TW_MAX_CMDS_PER_LUN) + if (cmds_per_lun) { + max_cmds = cmds_per_lun; + if (max_cmds > TW_MAX_CMDS_PER_LUN) + max_cmds = TW_MAX_CMDS_PER_LUN; + } else { max_cmds = TW_MAX_CMDS_PER_LUN; -#else - max_cmds = TW_MAX_CMDS_PER_LUN; -#endif + } scsi_adjust_queue_depth(SDptr, MSG_ORDERED_TAG, max_cmds); return 0; @@ -3488,6 +3488,7 @@ static Scsi_Host_Template driver_templat .eh_abort_handler = tw_scsi_eh_abort, .eh_host_reset_handler = tw_scsi_eh_reset, .bios_param = tw_scsi_biosparam, + .slave_configure = tw_slave_configure, .can_queue = TW_Q_LENGTH-2, .this_id = -1, .sg_tablesize = TW_MAX_SGL_LENGTH, --- linux-2.6.6-rc1/drivers/scsi/3w-xxxx.h 2003-09-27 18:57:45.000000000 -0700 +++ 25/drivers/scsi/3w-xxxx.h 2004-04-18 22:25:44.355087480 -0700 @@ -6,7 +6,7 @@ Arnaldo Carvalho de Melo Brad Strand - Copyright (C) 1999-2003 3ware Inc. + Copyright (C) 1999-2004 3ware Inc. Kernel compatiblity By: Andre Hedrick Non-Copyright (C) 2000 Andre Hedrick --- linux-2.6.6-rc1/drivers/scsi/advansys.c 2004-02-17 20:48:44.000000000 -0800 +++ 25/drivers/scsi/advansys.c 2004-04-18 22:25:46.443769952 -0700 @@ -4619,7 +4619,7 @@ advansys_detect(Scsi_Host_Template *tpnt ASC_DBG1(1, "advansys_detect: probing I/O port 0x%x...\n", iop); - if (check_region(iop, ASC_IOADR_GAP) != 0) { + if (!request_region(iop, ASC_IOADR_GAP,"advansys")) { printk( "AdvanSys SCSI: specified I/O Port 0x%X is busy\n", iop); /* Don't try this I/O port twice. */ @@ -4630,6 +4630,7 @@ advansys_detect(Scsi_Host_Template *tpnt "AdvanSys SCSI: specified I/O Port 0x%X has no adapter\n", iop); /* Don't try this I/O port twice. */ asc_ioport[ioport] = 0; + release_region(iop, ASC_IOADR_GAP); goto ioport_try_again; } else { /* @@ -4647,6 +4648,7 @@ advansys_detect(Scsi_Host_Template *tpnt * 'ioport' past this board. */ ioport++; + release_region(iop,ASC_IOADR_GAP); goto ioport_try_again; } } @@ -10003,9 +10005,9 @@ AscSearchIOPortAddr11( } for (; i < ASC_IOADR_TABLE_MAX_IX; i++) { iop_base = _asc_def_iop_base[i]; - if (check_region(iop_base, ASC_IOADR_GAP) != 0) { + if (!request_region(iop_base, ASC_IOADR_GAP, "advansys")) { ASC_DBG1(1, - "AscSearchIOPortAddr11: check_region() failed I/O port 0x%x\n", + "AscSearchIOPortAddr11: request_region() failed I/O port 0x%x\n", iop_base); continue; } --- linux-2.6.6-rc1/drivers/scsi/aic7xxx/aic7770_osm.c 2004-01-09 00:04:32.000000000 -0800 +++ 25/drivers/scsi/aic7xxx/aic7770_osm.c 2004-04-18 22:25:46.576749736 -0700 @@ -73,7 +73,7 @@ typedef void *aic7770_dev_t; static int aic7770_linux_config(struct aic7770_identity *entry, aic7770_dev_t dev, u_int eisaBase); -void +int ahc_linux_eisa_init(void) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) @@ -82,7 +82,7 @@ ahc_linux_eisa_init(void) int i; if (aic7xxx_probe_eisa_vl == 0) - return; + return -ENODEV; /* * Linux requires the EISA IDs to be specified in @@ -93,7 +93,7 @@ ahc_linux_eisa_init(void) (ahc_num_aic7770_devs + 1), M_DEVBUF, M_NOWAIT); if (aic7770_driver.id_table == NULL) - return; + return -ENOMEM; for (eid = (struct eisa_device_id *)aic7770_driver.id_table, id = aic7770_ident_table, i = 0; @@ -109,15 +109,16 @@ ahc_linux_eisa_init(void) } eid->sig[0] = 0; - eisa_driver_register(&aic7770_driver); + return eisa_driver_register(&aic7770_driver); #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ struct aic7770_identity *entry; u_int slot; u_int eisaBase; u_int i; + int ret = -ENODEV; if (aic7xxx_probe_eisa_vl == 0) - return; + return ret; eisaBase = 0x1000 + AHC_EISA_SLOT_OFFSET; for (slot = 1; slot < NUMSLOTS; eisaBase+=0x1000, slot++) { @@ -146,9 +147,12 @@ ahc_linux_eisa_init(void) continue; /* no EISA card in slot */ entry = aic7770_find_device(eisa_id); - if (entry != NULL) + if (entry != NULL) { aic7770_linux_config(entry, NULL, eisaBase); + ret = 0; + } } + return ret; #endif } @@ -156,13 +160,8 @@ void ahc_linux_eisa_exit(void) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) - if (aic7xxx_probe_eisa_vl == 0) - return; - - if (aic7770_driver.id_table != NULL) { - eisa_driver_unregister(&aic7770_driver); - free(aic7770_driver.id_table, M_DEVBUF); - } + eisa_driver_unregister(&aic7770_driver); + free(aic7770_driver.id_table, M_DEVBUF); #endif } --- linux-2.6.6-rc1/drivers/scsi/aic7xxx/aic79xx_osm.c 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/scsi/aic7xxx/aic79xx_osm.c 2004-04-18 22:25:47.149662640 -0700 @@ -2591,6 +2591,7 @@ ahd_linux_dv_thread(void *data) sprintf(current->comm, "ahd_dv_%d", ahd->unit); #else daemonize("ahd_dv_%d", ahd->unit); + current->flags |= PF_IOTHREAD; #endif unlock_kernel(); --- linux-2.6.6-rc1/drivers/scsi/aic7xxx/aic7xxx_osm.c 2004-03-10 20:41:29.000000000 -0800 +++ 25/drivers/scsi/aic7xxx/aic7xxx_osm.c 2004-04-18 22:25:47.152662184 -0700 @@ -892,18 +892,25 @@ ahc_linux_detect(Scsi_Host_Template *tem ahc_list_lockinit(); #ifdef CONFIG_PCI - ahc_linux_pci_init(); + found = ahc_linux_pci_init(); + if (found) + goto out; #endif #ifdef CONFIG_EISA - ahc_linux_eisa_init(); + found = ahc_linux_eisa_init(); + if (found) { +#ifdef CONFIG_PCI + ahc_linux_pci_exit(); +#endif + goto out; + } #endif /* * Register with the SCSI layer all * controllers we've found. */ - found = 0; TAILQ_FOREACH(ahc, &ahc_tailq, links) { if (ahc_linux_register_host(ahc, template) == 0) @@ -913,6 +920,8 @@ ahc_linux_detect(Scsi_Host_Template *tem spin_lock_irq(&io_request_lock); #endif aic7xxx_detect_complete++; + +out: return (found); } @@ -2296,6 +2305,7 @@ ahc_linux_dv_thread(void *data) sprintf(current->comm, "ahc_dv_%d", ahc->unit); #else daemonize("ahc_dv_%d", ahc->unit); + current->flags |= PF_IOTHREAD; #endif unlock_kernel(); @@ -3969,11 +3979,10 @@ ahc_linux_alloc_device(struct ahc_softc } static void -ahc_linux_free_device(struct ahc_softc *ahc, struct ahc_linux_device *dev) +__ahc_linux_free_device(struct ahc_softc *ahc, struct ahc_linux_device *dev) { struct ahc_linux_target *targ; - del_timer_sync(&dev->timer); targ = dev->target; targ->devices[dev->lun] = NULL; free(dev, M_DEVBUF); @@ -3983,6 +3992,13 @@ ahc_linux_free_device(struct ahc_softc * ahc_linux_free_target(ahc, targ); } +static void +ahc_linux_free_device(struct ahc_softc *ahc, struct ahc_linux_device *dev) +{ + del_timer_sync(&dev->timer); + __ahc_linux_free_device(ahc, dev); +} + void ahc_send_async(struct ahc_softc *ahc, char channel, u_int target, u_int lun, ac_code code, void *arg) @@ -4693,7 +4709,7 @@ ahc_linux_dev_timed_unfreeze(u_long arg) ahc_linux_run_device_queue(ahc, dev); if (TAILQ_EMPTY(&dev->busyq) && dev->active == 0) - ahc_linux_free_device(ahc, dev); + __ahc_linux_free_device(ahc, dev); ahc_unlock(ahc, &s); } @@ -5067,11 +5083,17 @@ ahc_platform_dump_card_state(struct ahc_ } } +static void __exit ahc_linux_exit(void); + static int __init ahc_linux_init(void) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) - return (ahc_linux_detect(&aic7xxx_driver_template) ? 0 : -ENODEV); + int rc = ahc_linux_detect(&aic7xxx_driver_template); + if (rc) + return rc; + ahc_linux_exit(); + return -ENODEV; #else scsi_register_module(MODULE_SCSI_HA, &aic7xxx_driver_template); if (aic7xxx_driver_template.present == 0) { --- linux-2.6.6-rc1/drivers/scsi/aic7xxx/aic7xxx_osm.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/drivers/scsi/aic7xxx/aic7xxx_osm.h 2004-04-18 22:25:46.580749128 -0700 @@ -840,7 +840,7 @@ typedef enum #ifdef CONFIG_EISA extern uint32_t aic7xxx_probe_eisa_vl; -void ahc_linux_eisa_init(void); +int ahc_linux_eisa_init(void); void ahc_linux_eisa_exit(void); int aic7770_map_registers(struct ahc_softc *ahc, u_int port); --- linux-2.6.6-rc1/drivers/scsi/libata-core.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/scsi/libata-core.c 2004-04-18 22:25:24.884047528 -0700 @@ -2245,6 +2245,7 @@ struct ata_queued_cmd *ata_qc_new_init(s qc->scsicmd = NULL; qc->ap = ap; qc->dev = dev; + qc->cursect = qc->cursg = qc->cursg_ofs = 0; INIT_LIST_HEAD(&qc->node); init_MUTEX_LOCKED(&qc->sem); --- linux-2.6.6-rc1/drivers/scsi/libata-scsi.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/scsi/libata-scsi.c 2004-04-18 22:25:46.290793208 -0700 @@ -32,6 +32,7 @@ #include "libata.h" +typedef unsigned int (*ata_xlat_func_t)(struct ata_queued_cmd *qc, u8 *scsicmd); static void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev, struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); @@ -168,6 +169,23 @@ int ata_scsi_slave_config(struct scsi_de sdev->use_10_for_ms = 1; blk_queue_max_phys_segments(sdev->request_queue, LIBATA_MAX_PRD); + if (sdev->id < ATA_MAX_DEVICES) { + struct ata_port *ap; + struct ata_device *dev; + + ap = (struct ata_port *) &sdev->host->hostdata[0]; + dev = &ap->device[sdev->id]; + + if (dev->flags & ATA_DFLAG_LBA48) { + sdev->host->max_sectors = 65534; + blk_queue_max_sectors(sdev->request_queue, 65534); + printk(KERN_INFO "ata%u: dev %u max request 32MB (lba48)\n", + ap->id, sdev->id); + } else + printk(KERN_INFO "ata%u: dev %u max request 128K\n", + ap->id, sdev->id); + } + return 0; /* scsi layer doesn't check return value, sigh */ } @@ -222,7 +240,6 @@ static unsigned int ata_scsi_rw_xlat(str struct ata_taskfile *tf = &qc->tf; unsigned int lba48 = tf->flags & ATA_TFLAG_LBA48; - qc->cursect = qc->cursg = qc->cursg_ofs = 0; tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf->hob_nsect = 0; tf->hob_lbal = 0; @@ -335,7 +352,8 @@ static unsigned int ata_scsi_rw_xlat(str static void ata_scsi_translate(struct ata_port *ap, struct ata_device *dev, struct scsi_cmnd *cmd, - void (*done)(struct scsi_cmnd *)) + void (*done)(struct scsi_cmnd *), + ata_xlat_func_t xlat_func) { struct ata_queued_cmd *qc; u8 *scsicmd = cmd->cmnd; @@ -352,9 +370,11 @@ static void ata_scsi_translate(struct at if (!qc) return; - qc->flags |= ATA_QCFLAG_SG; /* data is present; dma-map it */ + if (cmd->sc_data_direction == SCSI_DATA_READ || + cmd->sc_data_direction == SCSI_DATA_WRITE) + qc->flags |= ATA_QCFLAG_SG; /* data is present; dma-map it */ - if (ata_scsi_rw_xlat(qc, scsicmd)) + if (xlat_func(qc, scsicmd)) goto err_out; /* select device, send command to hardware */ @@ -1014,17 +1034,17 @@ ata_scsi_find_dev(struct ata_port *ap, s } /** - * ata_scsi_xlat_possible - check if SCSI to ATA translation is possible + * ata_get_xlat_func - check if SCSI to ATA translation is possible * @cmd: SCSI command opcode to consider * * Look up the SCSI command given, and determine whether the * SCSI command is to be translated or simulated. * * RETURNS: - * Non-zero if possible, zero if not. + * Pointer to translation function if possible, %NULL if not. */ -static inline int ata_scsi_xlat_possible(u8 cmd) +static inline ata_xlat_func_t ata_get_xlat_func(u8 cmd) { switch (cmd) { case READ_6: @@ -1034,10 +1054,10 @@ static inline int ata_scsi_xlat_possible case WRITE_6: case WRITE_10: case WRITE_16: - return 1; + return ata_scsi_rw_xlat; } - return 0; + return NULL; } /** @@ -1099,8 +1119,10 @@ int ata_scsi_queuecmd(struct scsi_cmnd * } if (dev->class == ATA_DEV_ATA) { - if (ata_scsi_xlat_possible(cmd->cmnd[0])) - ata_scsi_translate(ap, dev, cmd, done); + ata_xlat_func_t xlat_func = ata_get_xlat_func(cmd->cmnd[0]); + + if (xlat_func) + ata_scsi_translate(ap, dev, cmd, done, xlat_func); else ata_scsi_simulate(ap, dev, cmd, done); } else --- linux-2.6.6-rc1/drivers/scsi/sata_promise.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/scsi/sata_promise.c 2004-04-18 22:25:45.026985336 -0700 @@ -1180,14 +1180,14 @@ static void pdc_dma_start(struct ata_que static void pdc_tf_load_mmio(struct ata_port *ap, struct ata_taskfile *tf) { - if (tf->protocol != ATA_PROT_DMA) + if (tf->protocol == ATA_PROT_PIO) ata_tf_load_mmio(ap, tf); } static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf) { - if (tf->protocol != ATA_PROT_DMA) + if (tf->protocol == ATA_PROT_PIO) ata_exec_command_mmio(ap, tf); } @@ -1289,7 +1289,7 @@ static void pdc20621_put_to_dimm(struct readl(mmio + PDC_DIMM_WINDOW_CTLR); offset -= (idx * window_size); idx++; - dist = ((long) (window_size - (offset + size))) >= 0 ? size : + dist = ((s32)(window_size - (offset + size))) >= 0 ? size : (long) (window_size - offset); memcpy_toio((char *) (dimm_mmio + offset / 4), (char *) psource, dist); writel(0x01, mmio + PDC_GENERAL_CTLR); --- linux-2.6.6-rc1/drivers/serial/8250.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/serial/8250.c 2004-04-18 22:25:30.337218520 -0700 @@ -21,6 +21,7 @@ */ #include #include +#include #include #include #include @@ -117,11 +118,11 @@ static struct old_serial_port old_serial #define UART_NR (ARRAY_SIZE(old_serial_port) + CONFIG_SERIAL_8250_NR_UARTS) -#if defined(CONFIG_SERIAL_8250_RSA) && defined(MODULE) +#ifdef CONFIG_SERIAL_8250_RSA #define PORT_RSA_MAX 4 -static int probe_rsa[PORT_RSA_MAX]; -static int force_rsa[PORT_RSA_MAX]; +static unsigned long probe_rsa[PORT_RSA_MAX]; +static unsigned int probe_rsa_count; #endif /* CONFIG_SERIAL_8250_RSA */ struct uart_8250_port { @@ -678,21 +679,16 @@ static void autoconfig(struct uart_8250_ break; } -#if defined(CONFIG_SERIAL_8250_RSA) && defined(MODULE) +#ifdef CONFIG_SERIAL_8250_RSA /* * Only probe for RSA ports if we got the region. */ if (up->port.type == PORT_16550A && probeflags & PROBE_RSA) { int i; - for (i = 0 ; i < PORT_RSA_MAX ; ++i) { - if (!probe_rsa[i] && !force_rsa[i]) - break; - if (((probe_rsa[i] != up->port.iobase) || - check_region(up->port.iobase + UART_RSA_BASE, 16)) && - (force_rsa[i] != up->port.iobase)) - continue; - if (__enable_rsa(up)) { + for (i = 0 ; i < probe_rsa_count; ++i) { + if (probe_rsa[i] == up->port.iobase && + __enable_rsa(up)) { up->port.type = PORT_RSA; break; } @@ -836,7 +832,7 @@ receive_chars(struct uart_8250_port *up, if (unlikely(tty->flip.count >= TTY_FLIPBUF_SIZE)) { tty->flip.work.func((void *)tty); if (tty->flip.count >= TTY_FLIPBUF_SIZE) - return; // if TTY_DONT_FLIP is set + return; /* if TTY_DONT_FLIP is set */ } ch = serial_inp(up, UART_RX); *tty->flip.char_buf_ptr = ch; @@ -1197,12 +1193,21 @@ static void serial8250_break_ctl(struct spin_unlock_irqrestore(&up->port.lock, flags); } +#ifdef CONFIG_KGDB +static int kgdb_irq = -1; +#endif + static int serial8250_startup(struct uart_port *port) { struct uart_8250_port *up = (struct uart_8250_port *)port; unsigned long flags; int retval; +#ifdef CONFIG_KGDB + if (up->port.irq == kgdb_irq) + return -EBUSY; +#endif + up->capabilities = uart_config[up->port.type].flags; up->mcr = 0; @@ -1892,6 +1897,10 @@ static void __init serial8250_register_p for (i = 0; i < UART_NR; i++) { struct uart_8250_port *up = &serial8250_ports[i]; +#ifdef CONFIG_KGDB + if (up->port.irq == kgdb_irq) + up->port.kgdb = 1; +#endif up->port.line = i; up->port.ops = &serial8250_pops; init_timer(&up->timer); @@ -2175,6 +2184,31 @@ void serial8250_resume_port(int line) uart_resume_port(&serial8250_reg, &serial8250_ports[line].port); } +#ifdef CONFIG_KGDB +/* + * Find all the ports using the given irq and shut them down. + * Result should be that the irq will be released. + */ +void shutdown_for_kgdb(struct async_struct * info) +{ + int irq = info->state->irq; + struct uart_8250_port *up; + int ttyS; + + kgdb_irq = irq; /* save for later init */ + for (ttyS = 0; ttyS < UART_NR; ttyS++){ + up = &serial8250_ports[ttyS]; + if (up->port.irq == irq && (irq_lists + irq)->head) { +#ifdef CONFIG_DEBUG_SPINLOCK /* ugly business... */ + if(up->port.lock.magic != SPINLOCK_MAGIC) + spin_lock_init(&up->port.lock); +#endif + serial8250_shutdown(&up->port); + } + } +} +#endif /* CONFIG_KGDB */ + static int __init serial8250_init(void) { int ret, i; @@ -2215,14 +2249,12 @@ EXPORT_SYMBOL(serial8250_resume_port); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Generic 8250/16x50 serial driver $Revision: 1.90 $"); -MODULE_PARM(share_irqs, "i"); +module_param(share_irqs, uint, 0644); MODULE_PARM_DESC(share_irqs, "Share IRQs with other non-8250/16x50 devices" " (unsafe)"); -#if defined(CONFIG_SERIAL_8250_RSA) && defined(MODULE) -MODULE_PARM(probe_rsa, "1-" __MODULE_STRING(PORT_RSA_MAX) "i"); +#ifdef CONFIG_SERIAL_8250_RSA +module_param_array(probe_rsa, ulong, probe_rsa_count, 0444); MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA"); -MODULE_PARM(force_rsa, "1-" __MODULE_STRING(PORT_RSA_MAX) "i"); -MODULE_PARM_DESC(force_rsa, "Force I/O ports for RSA"); #endif MODULE_ALIAS_CHARDEV_MAJOR(TTY_MAJOR); --- linux-2.6.6-rc1/drivers/serial/serial_core.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/serial/serial_core.c 2004-04-18 22:25:30.339218216 -0700 @@ -1985,6 +1985,11 @@ uart_configure_port(struct uart_driver * { unsigned int flags; +#ifdef CONFIG_KGDB + if (port->kgdb) + return; +#endif + /* * If there isn't a port here, don't do anything further. */ --- linux-2.6.6-rc1/drivers/serial/serial_cs.c 2004-04-14 23:14:48.000000000 -0700 +++ 25/drivers/serial/serial_cs.c 2004-04-18 22:25:24.890046616 -0700 @@ -32,6 +32,7 @@ ======================================================================*/ #include +#include #include #include #include @@ -71,17 +72,18 @@ static char *version = "serial_cs.c 1.13 /* Bit map of interrupts to choose from */ static u_int irq_mask = 0xdeb8; -static int irq_list[4] = { -1 }; +static int irq_list[4]; +static unsigned int irq_list_count; /* Enable the speaker? */ static int do_sound = 1; /* Skip strict UART tests? */ static int buggy_uart; -MODULE_PARM(irq_mask, "i"); -MODULE_PARM(irq_list, "1-4i"); -MODULE_PARM(do_sound, "i"); -MODULE_PARM(buggy_uart, "i"); +module_param(irq_mask, uint, 0444); +module_param_array(irq_list, int, irq_list_count, 0444); +module_param(do_sound, int, 0444); +module_param(buggy_uart, int, 0444); /*====================================================================*/ @@ -221,10 +223,10 @@ static dev_link_t *serial_attach(void) link->io.NumPorts1 = 8; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; link->irq.IRQInfo1 = IRQ_INFO2_VALID | IRQ_LEVEL_ID; - if (irq_list[0] == -1) + if (irq_list_count == 0) link->irq.IRQInfo2 = irq_mask; else - for (i = 0; i < 4; i++) + for (i = 0; i < irq_list_count; i++) link->irq.IRQInfo2 |= 1 << irq_list[i]; link->conf.Attributes = CONF_ENABLE_IRQ; if (do_sound) { --- linux-2.6.6-rc1/drivers/usb/gadget/ether.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/drivers/usb/gadget/ether.c 2004-04-18 22:25:24.892046312 -0700 @@ -2374,9 +2374,7 @@ autoconf_fail: /* one random address for the gadget device ... both of these could * reasonably come from an id prom or a module parameter. */ - get_random_bytes (net->dev_addr, ETH_ALEN); - net->dev_addr [0] &= 0xfe; // clear multicast bit - net->dev_addr [0] |= 0x02; // set local assignment bit (IEEE802) + random_ether_addr(net->dev_addr); #ifdef DEV_CONFIG_CDC /* ... another address for the host, on the other end of the @@ -2385,9 +2383,7 @@ autoconf_fail: if (cdc) { u8 node_id [ETH_ALEN]; - get_random_bytes (node_id, sizeof node_id); - node_id [0] &= 0xfe; // clear multicast bit - node_id [0] |= 0x02; // set local assignment bit (IEEE802) + random_ether_addr(node_id); snprintf (ethaddr, sizeof ethaddr, "%02X%02X%02X%02X%02X%02X", node_id [0], node_id [1], node_id [2], node_id [3], node_id [4], node_id [5]); --- linux-2.6.6-rc1/drivers/usb/gadget/rndis.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/drivers/usb/gadget/rndis.c 2004-04-18 22:25:52.361870264 -0700 @@ -746,19 +746,23 @@ static int gen_ndis_set_resp (u8 configN rndis_set_cmplt_type *resp; int i, retval = -ENOTSUPP; struct rndis_config_parameter *param; - - if (!r) return -ENOMEM; + u8 *cp; + + if (!r) + return -ENOMEM; resp = (rndis_set_cmplt_type *) r->buf; - - if (!resp) return -ENOMEM; - + if (!resp) + return -ENOMEM; + + cp = (u8 *)resp; + switch (OID) { case OID_GEN_CURRENT_PACKET_FILTER: DEBUG("%s: OID_GEN_CURRENT_PACKET_FILTER\n", __FUNCTION__); - currentFilter2devFlags ((u32) ((u8 *) resp + 28), + currentFilter2devFlags(cp[28], rndis_per_dev_params [configNr].dev); retval = 0; - if ((u32) ((u8 *) resp + 28)) + if (cp[28]) rndis_per_dev_params [configNr].state = RNDIS_INITIALIZED; else rndis_per_dev_params [configNr].state = RNDIS_UNINITIALIZED; --- linux-2.6.6-rc1/drivers/usb/host/ehci-hcd.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/drivers/usb/host/ehci-hcd.c 2004-04-18 22:25:59.511783312 -0700 @@ -577,7 +577,8 @@ static void ehci_stop (struct usb_hcd *h /* root hub is shut down separately (first, when possible) */ spin_lock_irq (&ehci->lock); - ehci_work (ehci, NULL); + if (ehci->async) + ehci_work (ehci, NULL); spin_unlock_irq (&ehci->lock); ehci_mem_cleanup (ehci); --- linux-2.6.6-rc1/drivers/usb/media/vicam.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/drivers/usb/media/vicam.c 2004-04-18 22:25:24.893046160 -0700 @@ -612,15 +612,20 @@ vicam_ioctl(struct inode *inode, struct case VIDIOCSPICT: { - struct video_picture *vp = (struct video_picture *) arg; - - DBG("VIDIOCSPICT depth = %d, pal = %d\n", vp->depth, - vp->palette); + struct video_picture vp; + + if (copy_from_user(&vp, arg, sizeof(vp))) { + retval = -EFAULT; + break; + } + + DBG("VIDIOCSPICT depth = %d, pal = %d\n", vp.depth, + vp.palette); - cam->gain = vp->brightness >> 8; + cam->gain = vp.brightness >> 8; - if (vp->depth != 24 - || vp->palette != VIDEO_PALETTE_RGB24) + if (vp.depth != 24 + || vp.palette != VIDEO_PALETTE_RGB24) retval = -EINVAL; break; @@ -660,7 +665,7 @@ vicam_ioctl(struct inode *inode, struct break; } - DBG("VIDIOCSWIN %d x %d\n", vw->width, vw->height); + DBG("VIDIOCSWIN %d x %d\n", vw.width, vw.height); if ( vw.width != 320 || vw.height != 240 ) retval = -EFAULT; --- linux-2.6.6-rc1/drivers/usb/net/usbnet.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/drivers/usb/net/usbnet.c 2004-04-18 22:25:24.895045856 -0700 @@ -3413,9 +3413,7 @@ static int __init usbnet_init (void) < sizeof (struct cdc_state))); #endif - get_random_bytes (node_id, sizeof node_id); - node_id [0] &= 0xfe; // clear multicast bit - node_id [0] |= 0x02; // set local assignment bit (IEEE802) + random_ether_addr(node_id); return usb_register(&usbnet_driver); } --- linux-2.6.6-rc1/drivers/video/aty/aty128fb.c 2004-02-17 20:48:45.000000000 -0800 +++ 25/drivers/video/aty/aty128fb.c 2004-04-18 22:25:24.897045552 -0700 @@ -1998,11 +1998,13 @@ err_free_fb: static void __devexit aty128_remove(struct pci_dev *pdev) { struct fb_info *info = pci_get_drvdata(pdev); - struct aty128fb_par *par = info->par; + struct aty128fb_par *par; if (!info) return; + par = info->par; + unregister_framebuffer(info); #ifdef CONFIG_MTRR if (par->mtrr.vram_valid) --- linux-2.6.6-rc1/drivers/video/bw2.c 2003-06-14 12:18:23.000000000 -0700 +++ 25/drivers/video/bw2.c 2004-04-18 22:25:24.897045552 -0700 @@ -162,8 +162,10 @@ bw2_blank(int blank, struct fb_info *inf } static struct sbus_mmap_map bw2_mmap_map[] = { - { 0, 0, SBUS_MMAP_FBSIZE(1) }, - { 0, 0, 0 } + { + .size = SBUS_MMAP_FBSIZE(1) + }, + { .size = 0 } }; static int bw2_mmap(struct fb_info *info, struct file *file, struct vm_area_struct *vma) --- linux-2.6.6-rc1/drivers/video/cg14.c 2003-08-22 19:23:42.000000000 -0700 +++ 25/drivers/video/cg14.c 2004-04-18 22:25:24.898045400 -0700 @@ -367,23 +367,82 @@ static void cg14_init_fix(struct fb_info } static struct sbus_mmap_map __cg14_mmap_map[CG14_MMAP_ENTRIES] __initdata = { - { CG14_REGS, 0x80000000, 0x1000 }, - { CG14_XLUT, 0x80003000, 0x1000 }, - { CG14_CLUT1, 0x80004000, 0x1000 }, - { CG14_CLUT2, 0x80005000, 0x1000 }, - { CG14_CLUT3, 0x80006000, 0x1000 }, - { CG3_MMAP_OFFSET - - 0x7000, 0x80000000, 0x7000 }, - { CG3_MMAP_OFFSET, 0x00000000, SBUS_MMAP_FBSIZE(1) }, - { MDI_CURSOR_MAP, 0x80001000, 0x1000 }, - { MDI_CHUNKY_BGR_MAP, 0x01000000, 0x400000 }, - { MDI_PLANAR_X16_MAP, 0x02000000, 0x200000 }, - { MDI_PLANAR_C16_MAP, 0x02800000, 0x200000 }, - { MDI_PLANAR_X32_MAP, 0x03000000, 0x100000 }, - { MDI_PLANAR_B32_MAP, 0x03400000, 0x100000 }, - { MDI_PLANAR_G32_MAP, 0x03800000, 0x100000 }, - { MDI_PLANAR_R32_MAP, 0x03c00000, 0x100000 }, - { 0, 0, 0 } + { + .voff = CG14_REGS, + .poff = 0x80000000, + .size = 0x1000 + }, + { + .voff = CG14_XLUT, + .poff = 0x80003000, + .size = 0x1000 + }, + { + .voff = CG14_CLUT1, + .poff = 0x80004000, + .size = 0x1000 + }, + { + .voff = CG14_CLUT2, + .poff = 0x80005000, + .size = 0x1000 + }, + { + .voff = CG14_CLUT3, + .poff = 0x80006000, + .size = 0x1000 + }, + { + .voff = CG3_MMAP_OFFSET - 0x7000, + .poff = 0x80000000, + .size = 0x7000 + }, + { + .voff = CG3_MMAP_OFFSET, + .poff = 0x00000000, + .size = SBUS_MMAP_FBSIZE(1) + }, + { + .voff = MDI_CURSOR_MAP, + .poff = 0x80001000, + .size = 0x1000 + }, + { + .voff = MDI_CHUNKY_BGR_MAP, + .poff = 0x01000000, + .size = 0x400000 + }, + { + .voff = MDI_PLANAR_X16_MAP, + .poff = 0x02000000, + .size = 0x200000 + }, + { + .voff = MDI_PLANAR_C16_MAP, + .poff = 0x02800000, + .size = 0x200000 + }, + { + .voff = MDI_PLANAR_X32_MAP, + .poff = 0x03000000, + .size = 0x100000 + }, + { + .voff = MDI_PLANAR_B32_MAP, + .poff = 0x03400000, + .size = 0x100000 + }, + { + .voff = MDI_PLANAR_G32_MAP, + .poff = 0x03800000, + .size = 0x100000 + }, + { + .voff = MDI_PLANAR_R32_MAP, + .poff = 0x03c00000, + .size = 0x100000 + }, + { .size = 0 } }; struct all_info { --- linux-2.6.6-rc1/drivers/video/cg3.c 2003-06-14 12:18:24.000000000 -0700 +++ 25/drivers/video/cg3.c 2004-04-18 22:25:24.899045248 -0700 @@ -221,8 +221,12 @@ cg3_blank(int blank, struct fb_info *inf } static struct sbus_mmap_map cg3_mmap_map[] = { - { CG3_MMAP_OFFSET, CG3_RAM_OFFSET, SBUS_MMAP_FBSIZE(1) }, - { 0, 0, 0 } + { + .poff = CG3_MMAP_OFFSET, + .voff = CG3_RAM_OFFSET, + .size = SBUS_MMAP_FBSIZE(1) + }, + { .size = 0 } }; static int cg3_mmap(struct fb_info *info, struct file *file, struct vm_area_struct *vma) --- linux-2.6.6-rc1/drivers/video/cg6.c 2003-06-14 12:18:05.000000000 -0700 +++ 25/drivers/video/cg6.c 2004-04-18 22:25:24.899045248 -0700 @@ -480,15 +480,47 @@ cg6_blank(int blank, struct fb_info *inf } static struct sbus_mmap_map cg6_mmap_map[] = { - { CG6_FBC, CG6_FBC_OFFSET, PAGE_SIZE }, - { CG6_TEC, CG6_TEC_OFFSET, PAGE_SIZE }, - { CG6_BTREGS, CG6_BROOKTREE_OFFSET, PAGE_SIZE }, - { CG6_FHC, CG6_FHC_OFFSET, PAGE_SIZE }, - { CG6_THC, CG6_THC_OFFSET, PAGE_SIZE }, - { CG6_ROM, CG6_ROM_OFFSET, 0x10000 }, - { CG6_RAM, CG6_RAM_OFFSET, SBUS_MMAP_FBSIZE(1) }, - { CG6_DHC, CG6_DHC_OFFSET, 0x40000 }, - { 0, 0, 0 } + { + .voff = CG6_FBC, + .poff = CG6_FBC_OFFSET, + .size = PAGE_SIZE + }, + { + .voff = CG6_TEC, + .poff = CG6_TEC_OFFSET, + .size = PAGE_SIZE + }, + { + .voff = CG6_BTREGS, + .poff = CG6_BROOKTREE_OFFSET, + .size = PAGE_SIZE + }, + { + .voff = CG6_FHC, + .poff = CG6_FHC_OFFSET, + .size = PAGE_SIZE + }, + { + .voff = CG6_THC, + .poff = CG6_THC_OFFSET, + .size = PAGE_SIZE + }, + { + .voff = CG6_ROM, + .poff = CG6_ROM_OFFSET, + .size = 0x10000 + }, + { + .voff = CG6_RAM, + .poff = CG6_RAM_OFFSET, + .size = SBUS_MMAP_FBSIZE(1) + }, + { + .voff = CG6_DHC, + .poff = CG6_DHC_OFFSET, + .size = 0x40000 + }, + { .size = 0 } }; static int cg6_mmap(struct fb_info *info, struct file *file, struct vm_area_struct *vma) --- linux-2.6.6-rc1/drivers/video/console/sticore.c 2004-02-17 20:48:45.000000000 -0800 +++ 25/drivers/video/console/sticore.c 2004-04-18 22:26:02.384346616 -0700 @@ -22,7 +22,6 @@ #include #include -#include #include #include #include --- linux-2.6.6-rc1/drivers/video/fbmem.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/drivers/video/fbmem.c 2004-04-18 22:25:24.901044944 -0700 @@ -32,6 +32,9 @@ #include #endif #include +#include +#include +#include #if defined(__mc68000__) || defined(CONFIG_APUS) #include @@ -1245,6 +1248,8 @@ static struct file_operations fb_fops = #endif }; +static struct class_simple *fb_class; + /** * register_framebuffer - registers a frame buffer device * @fb_info: frame buffer info structure @@ -1259,6 +1264,7 @@ int register_framebuffer(struct fb_info *fb_info) { int i; + struct class_device *c; if (num_registered_fb == FB_MAX) return -ENXIO; @@ -1267,6 +1273,12 @@ register_framebuffer(struct fb_info *fb_ if (!registered_fb[i]) break; fb_info->node = i; + + c = class_simple_device_add(fb_class, MKDEV(FB_MAJOR, i), NULL, "fb%d", i); + if (IS_ERR(c)) { + /* Not fatal */ + printk(KERN_WARNING "Unable to create class_device for framebuffer %d; errno = %ld\n", i, PTR_ERR(c)); + } if (fb_info->pixmap.addr == NULL) { fb_info->pixmap.addr = kmalloc(FBPIXMAPSIZE, GFP_KERNEL); @@ -1332,6 +1344,7 @@ unregister_framebuffer(struct fb_info *f kfree(fb_info->sprite.addr); registered_fb[i]=NULL; num_registered_fb--; + class_simple_device_remove(MKDEV(FB_MAJOR, i)); return 0; } @@ -1393,6 +1406,12 @@ fbmem_init(void) if (register_chrdev(FB_MAJOR,"fb",&fb_fops)) printk("unable to get major %d for fb devs\n", FB_MAJOR); + fb_class = class_simple_create(THIS_MODULE, "graphics"); + if (IS_ERR(fb_class)) { + printk(KERN_WARNING "Unable to create fb class; errno = %ld\n", PTR_ERR(fb_class)); + fb_class = NULL; + } + #ifdef CONFIG_FB_OF if (ofonly) { offb_init(); --- linux-2.6.6-rc1/drivers/video/ffb.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/drivers/video/ffb.c 2004-04-18 22:25:24.902044792 -0700 @@ -769,34 +769,142 @@ ffb_blank(int blank, struct fb_info *inf } static struct sbus_mmap_map ffb_mmap_map[] = { - { FFB_SFB8R_VOFF, FFB_SFB8R_POFF, 0x0400000 }, - { FFB_SFB8G_VOFF, FFB_SFB8G_POFF, 0x0400000 }, - { FFB_SFB8B_VOFF, FFB_SFB8B_POFF, 0x0400000 }, - { FFB_SFB8X_VOFF, FFB_SFB8X_POFF, 0x0400000 }, - { FFB_SFB32_VOFF, FFB_SFB32_POFF, 0x1000000 }, - { FFB_SFB64_VOFF, FFB_SFB64_POFF, 0x2000000 }, - { FFB_FBC_REGS_VOFF, FFB_FBC_REGS_POFF, 0x0002000 }, - { FFB_BM_FBC_REGS_VOFF, FFB_BM_FBC_REGS_POFF, 0x0002000 }, - { FFB_DFB8R_VOFF, FFB_DFB8R_POFF, 0x0400000 }, - { FFB_DFB8G_VOFF, FFB_DFB8G_POFF, 0x0400000 }, - { FFB_DFB8B_VOFF, FFB_DFB8B_POFF, 0x0400000 }, - { FFB_DFB8X_VOFF, FFB_DFB8X_POFF, 0x0400000 }, - { FFB_DFB24_VOFF, FFB_DFB24_POFF, 0x1000000 }, - { FFB_DFB32_VOFF, FFB_DFB32_POFF, 0x1000000 }, - { FFB_FBC_KREGS_VOFF, FFB_FBC_KREGS_POFF, 0x0002000 }, - { FFB_DAC_VOFF, FFB_DAC_POFF, 0x0002000 }, - { FFB_PROM_VOFF, FFB_PROM_POFF, 0x0010000 }, - { FFB_EXP_VOFF, FFB_EXP_POFF, 0x0002000 }, - { FFB_DFB422A_VOFF, FFB_DFB422A_POFF, 0x0800000 }, - { FFB_DFB422AD_VOFF, FFB_DFB422AD_POFF, 0x0800000 }, - { FFB_DFB24B_VOFF, FFB_DFB24B_POFF, 0x1000000 }, - { FFB_DFB422B_VOFF, FFB_DFB422B_POFF, 0x0800000 }, - { FFB_DFB422BD_VOFF, FFB_DFB422BD_POFF, 0x0800000 }, - { FFB_SFB16Z_VOFF, FFB_SFB16Z_POFF, 0x0800000 }, - { FFB_SFB8Z_VOFF, FFB_SFB8Z_POFF, 0x0800000 }, - { FFB_SFB422_VOFF, FFB_SFB422_POFF, 0x0800000 }, - { FFB_SFB422D_VOFF, FFB_SFB422D_POFF, 0x0800000 }, - { 0, 0, 0 } + { + .voff = FFB_SFB8R_VOFF, + .poff = FFB_SFB8R_POFF, + .size = 0x0400000 + }, + { + .voff = FFB_SFB8G_VOFF, + .poff = FFB_SFB8G_POFF, + .size = 0x0400000 + }, + { + .voff = FFB_SFB8B_VOFF, + .poff = FFB_SFB8B_POFF, + .size = 0x0400000 + }, + { + .voff = FFB_SFB8X_VOFF, + .poff = FFB_SFB8X_POFF, + .size = 0x0400000 + }, + { + .voff = FFB_SFB32_VOFF, + .poff = FFB_SFB32_POFF, + .size = 0x1000000 + }, + { + .voff = FFB_SFB64_VOFF, + .poff = FFB_SFB64_POFF, + .size = 0x2000000 + }, + { + .voff = FFB_FBC_REGS_VOFF, + .poff = FFB_FBC_REGS_POFF, + .size = 0x0002000 + }, + { + .voff = FFB_BM_FBC_REGS_VOFF, + .poff = FFB_BM_FBC_REGS_POFF, + .size = 0x0002000 + }, + { + .voff = FFB_DFB8R_VOFF, + .poff = FFB_DFB8R_POFF, + .size = 0x0400000 + }, + { + .voff = FFB_DFB8G_VOFF, + .poff = FFB_DFB8G_POFF, + .size = 0x0400000 + }, + { + .voff = FFB_DFB8B_VOFF, + .poff = FFB_DFB8B_POFF, + .size = 0x0400000 + }, + { + .voff = FFB_DFB8X_VOFF, + .poff = FFB_DFB8X_POFF, + .size = 0x0400000 + }, + { + .voff = FFB_DFB24_VOFF, + .poff = FFB_DFB24_POFF, + .size = 0x1000000 + }, + { + .voff = FFB_DFB32_VOFF, + .poff = FFB_DFB32_POFF, + .size = 0x1000000 + }, + { + .voff = FFB_FBC_KREGS_VOFF, + .poff = FFB_FBC_KREGS_POFF, + .size = 0x0002000 + }, + { + .voff = FFB_DAC_VOFF, + .poff = FFB_DAC_POFF, + .size = 0x0002000 + }, + { + .voff = FFB_PROM_VOFF, + .poff = FFB_PROM_POFF, + .size = 0x0010000 + }, + { + .voff = FFB_EXP_VOFF, + .poff = FFB_EXP_POFF, + .size = 0x0002000 + }, + { + .voff = FFB_DFB422A_VOFF, + .poff = FFB_DFB422A_POFF, + .size = 0x0800000 + }, + { + .voff = FFB_DFB422AD_VOFF, + .poff = FFB_DFB422AD_POFF, + .size = 0x0800000 + }, + { + .voff = FFB_DFB24B_VOFF, + .poff = FFB_DFB24B_POFF, + .size = 0x1000000 + }, + { + .voff = FFB_DFB422B_VOFF, + .poff = FFB_DFB422B_POFF, + .size = 0x0800000 + }, + { + .voff = FFB_DFB422BD_VOFF, + .poff = FFB_DFB422BD_POFF, + .size = 0x0800000 + }, + { + .voff = FFB_SFB16Z_VOFF, + .poff = FFB_SFB16Z_POFF, + .size = 0x0800000 + }, + { + .voff = FFB_SFB8Z_VOFF, + .poff = FFB_SFB8Z_POFF, + .size = 0x0800000 + }, + { + .voff = FFB_SFB422_VOFF, + .poff = FFB_SFB422_POFF, + .size = 0x0800000 + }, + { + .voff = FFB_SFB422D_VOFF, + .poff = FFB_SFB422D_POFF, + .size = 0x0800000 + }, + { .size = 0 } }; static int ffb_mmap(struct fb_info *info, struct file *file, struct vm_area_struct *vma) --- linux-2.6.6-rc1/drivers/video/leo.c 2003-08-22 19:23:42.000000000 -0700 +++ 25/drivers/video/leo.c 2004-04-18 22:25:24.903044640 -0700 @@ -294,20 +294,72 @@ static int leo_blank(int blank, struct f } static struct sbus_mmap_map leo_mmap_map[] = { - { LEO_SS0_MAP, LEO_OFF_SS0, 0x800000 }, - { LEO_LC_SS0_USR_MAP, LEO_OFF_LC_SS0_USR, 0x1000 }, - { LEO_LD_SS0_MAP, LEO_OFF_LD_SS0, 0x1000 }, - { LEO_LX_CURSOR_MAP, LEO_OFF_LX_CURSOR, 0x1000 }, - { LEO_SS1_MAP, LEO_OFF_SS1, 0x800000 }, - { LEO_LC_SS1_USR_MAP, LEO_OFF_LC_SS1_USR, 0x1000 }, - { LEO_LD_SS1_MAP, LEO_OFF_LD_SS1, 0x1000 }, - { LEO_UNK_MAP, LEO_OFF_UNK, 0x1000 }, - { LEO_LX_KRN_MAP, LEO_OFF_LX_KRN, 0x1000 }, - { LEO_LC_SS0_KRN_MAP, LEO_OFF_LC_SS0_KRN, 0x1000 }, - { LEO_LC_SS1_KRN_MAP, LEO_OFF_LC_SS1_KRN, 0x1000 }, - { LEO_LD_GBL_MAP, LEO_OFF_LD_GBL, 0x1000 }, - { LEO_UNK2_MAP, LEO_OFF_UNK2, 0x100000 }, - { 0, 0, 0 } + { + .voff = LEO_SS0_MAP, + .poff = LEO_OFF_SS0, + .size = 0x800000 + }, + { + .voff = LEO_LC_SS0_USR_MAP, + .poff = LEO_OFF_LC_SS0_USR, + .size = 0x1000 + }, + { + .voff = LEO_LD_SS0_MAP, + .poff = LEO_OFF_LD_SS0, + .size = 0x1000 + }, + { + .voff = LEO_LX_CURSOR_MAP, + .poff = LEO_OFF_LX_CURSOR, + .size = 0x1000 + }, + { + .voff = LEO_SS1_MAP, + .poff = LEO_OFF_SS1, + .size = 0x800000 + }, + { + .voff = LEO_LC_SS1_USR_MAP, + .poff = LEO_OFF_LC_SS1_USR, + .size = 0x1000 + }, + { + .voff = LEO_LD_SS1_MAP, + .poff = LEO_OFF_LD_SS1, + .size = 0x1000 + }, + { + .voff = LEO_UNK_MAP, + .poff = LEO_OFF_UNK, + .size = 0x1000 + }, + { + .voff = LEO_LX_KRN_MAP, + .poff = LEO_OFF_LX_KRN, + .size = 0x1000 + }, + { + .voff = LEO_LC_SS0_KRN_MAP, + .poff = LEO_OFF_LC_SS0_KRN, + .size = 0x1000 + }, + { + .voff = LEO_LC_SS1_KRN_MAP, + .poff = LEO_OFF_LC_SS1_KRN, + .size = 0x1000 + }, + { + .voff = LEO_LD_GBL_MAP, + .poff = LEO_OFF_LD_GBL, + .size = 0x1000 + }, + { + .voff = LEO_UNK2_MAP, + .poff = LEO_OFF_UNK2, + .size = 0x100000 + }, + { .size = 0 } }; static int leo_mmap(struct fb_info *info, struct file *file, struct vm_area_struct *vma) --- linux-2.6.6-rc1/drivers/video/tcx.c 2003-06-14 12:17:55.000000000 -0700 +++ 25/drivers/video/tcx.c 2004-04-18 22:25:24.904044488 -0700 @@ -238,20 +238,59 @@ tcx_blank(int blank, struct fb_info *inf } static struct sbus_mmap_map __tcx_mmap_map[TCX_MMAP_ENTRIES] = { - { TCX_RAM8BIT, 0, SBUS_MMAP_FBSIZE(1) }, - { TCX_RAM24BIT, 0, SBUS_MMAP_FBSIZE(4) }, - { TCX_UNK3, 0, SBUS_MMAP_FBSIZE(8) }, - { TCX_UNK4, 0, SBUS_MMAP_FBSIZE(8) }, - { TCX_CONTROLPLANE, 0, SBUS_MMAP_FBSIZE(4) }, - { TCX_UNK6, 0, SBUS_MMAP_FBSIZE(8) }, - { TCX_UNK7, 0, SBUS_MMAP_FBSIZE(8) }, - { TCX_TEC, 0, PAGE_SIZE }, - { TCX_BTREGS, 0, PAGE_SIZE }, - { TCX_THC, 0, PAGE_SIZE }, - { TCX_DHC, 0, PAGE_SIZE }, - { TCX_ALT, 0, PAGE_SIZE }, - { TCX_UNK2, 0, 0x20000 }, - { 0, 0, 0 } + { + .voff = TCX_RAM8BIT, + .size = SBUS_MMAP_FBSIZE(1) + }, + { + .voff = TCX_RAM24BIT, + .size = SBUS_MMAP_FBSIZE(4) + }, + { + .voff = TCX_UNK3, + .size = SBUS_MMAP_FBSIZE(8) + }, + { + .voff = TCX_UNK4, + .size = SBUS_MMAP_FBSIZE(8) + }, + { + .voff = TCX_CONTROLPLANE, + .size = SBUS_MMAP_FBSIZE(4) + }, + { + .voff = TCX_UNK6, + .size = SBUS_MMAP_FBSIZE(8) + }, + { + .voff = TCX_UNK7, + .size = SBUS_MMAP_FBSIZE(8) + }, + { + .voff = TCX_TEC, + .size = PAGE_SIZE + }, + { + .voff = TCX_BTREGS, + .size = PAGE_SIZE + }, + { + .voff = TCX_THC, + .size = PAGE_SIZE + }, + { + .voff = TCX_DHC, + .size = PAGE_SIZE + }, + { + .voff = TCX_ALT, + .size = PAGE_SIZE + }, + { + .voff = TCX_UNK2, + .size = 0x20000 + }, + { .size = 0 } }; static int tcx_mmap(struct fb_info *info, struct file *file, struct vm_area_struct *vma) --- linux-2.6.6-rc1/fs/adfs/super.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/adfs/super.c 2004-04-18 22:25:24.906044184 -0700 @@ -192,6 +192,7 @@ static int parse_options(struct super_bl static int adfs_remount(struct super_block *sb, int *flags, char *data) { + *flags |= MS_NODIRATIME; return parse_options(sb, data); } --- linux-2.6.6-rc1/fs/affs/super.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/affs/super.c 2004-04-18 22:25:24.906044184 -0700 @@ -502,6 +502,8 @@ affs_remount(struct super_block *sb, int pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); + *flags |= MS_NODIRATIME; + if (!parse_options(data,&uid,&gid,&mode,&reserved,&root_block, &blocksize,&sbi->s_prefix,sbi->s_volume,&mount_flags)) return -EINVAL; --- linux-2.6.6-rc1/fs/aio.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/aio.c 2004-04-18 22:25:32.683861776 -0700 @@ -64,14 +64,9 @@ static void aio_kick_handler(void *); static int __init aio_setup(void) { kiocb_cachep = kmem_cache_create("kiocb", sizeof(struct kiocb), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!kiocb_cachep) - panic("unable to create kiocb cache\n"); - + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); kioctx_cachep = kmem_cache_create("kioctx", sizeof(struct kioctx), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!kioctx_cachep) - panic("unable to create kioctx cache"); + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); aio_wq = create_workqueue("aio"); --- linux-2.6.6-rc1/fs/autofs4/autofs_i.h 2003-09-27 18:57:46.000000000 -0700 +++ 25/fs/autofs4/autofs_i.h 2004-04-18 22:25:56.215284456 -0700 @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -82,7 +84,7 @@ struct autofs_wait_queue { char *name; /* This is for status reporting upon return */ int status; - int wait_ctr; + atomic_t wait_ctr; }; #define AUTOFS_SBI_MAGIC 0x6d4a556d @@ -93,7 +95,10 @@ struct autofs_sb_info { pid_t oz_pgrp; int catatonic; int version; + int sub_version; unsigned long exp_timeout; + int reghost_enabled; + int needs_reghost; struct super_block *sb; struct autofs_wait_queue *queues; /* Wait queue pointer */ }; @@ -125,6 +130,12 @@ static inline int autofs4_ispending(stru (inf != NULL && inf->flags & AUTOFS_INF_EXPIRING); } +static inline void autofs4_copy_atime(struct file *src, struct file *dst) +{ + dst->f_dentry->d_inode->i_atime = src->f_dentry->d_inode->i_atime; + return; +} + struct inode *autofs4_get_inode(struct super_block *, struct autofs_info *); struct autofs_info *autofs4_init_inf(struct autofs_sb_info *, mode_t mode); void autofs4_free_ino(struct autofs_info *); @@ -141,6 +152,7 @@ int autofs4_expire_multi(struct super_bl extern struct inode_operations autofs4_symlink_inode_operations; extern struct inode_operations autofs4_dir_inode_operations; extern struct inode_operations autofs4_root_inode_operations; +extern struct file_operations autofs4_dir_operations; extern struct file_operations autofs4_root_operations; /* Initializing function */ @@ -157,6 +169,24 @@ enum autofs_notify NFY_EXPIRE }; -int autofs4_wait(struct autofs_sb_info *,struct qstr *, enum autofs_notify); +int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int); void autofs4_catatonic_mode(struct autofs_sb_info *); + +static inline int simple_positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +static inline int simple_empty_nolock(struct dentry *dentry) +{ + struct dentry *child; + int ret = 0; + + list_for_each_entry(child, &dentry->d_subdirs, d_child) + if (simple_positive(child)) + goto out; + ret = 1; +out: + return ret; +} --- linux-2.6.6-rc1/fs/autofs4/expire.c 2003-08-08 22:55:13.000000000 -0700 +++ 25/fs/autofs4/expire.c 2004-04-18 22:25:55.490394656 -0700 @@ -4,6 +4,7 @@ * * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 1999-2000 Jeremy Fitzhardinge + * Copyright 2001-2003 Ian Kent * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your @@ -12,139 +13,240 @@ * ------------------------------------------------------------------------- */ #include "autofs_i.h" -#include -/* - * Determine if a subtree of the namespace is busy. - * - * mnt is the mount tree under the autofs mountpoint - */ -static inline int is_vfsmnt_tree_busy(struct vfsmount *mnt) +static unsigned long now; + +/* Check if a dentry can be expired return 1 if it can else return 0 */ +static inline int autofs4_can_expire(struct dentry *dentry, + unsigned long timeout, int do_now) +{ + struct autofs_info *ino = autofs4_dentry_ino(dentry); + + /* dentry in the process of being deleted */ + if (ino == NULL) + return 0; + + /* No point expiring a pending mount */ + if (dentry->d_flags & DCACHE_AUTOFS_PENDING) + return 0; + + if (!do_now) { + /* Too young to die */ + if (time_after(ino->last_used + timeout, now)) + return 0; + + /* update last_used here :- + - obviously makes sense if it is in use now + - less obviously, prevents rapid-fire expire + attempts if expire fails the first time */ + ino->last_used = now; + } + + return 1; +} + +/* Sorry I can't solve the problem without using counts either */ +static int autofs4_may_umount(struct vfsmount *mnt) { - struct vfsmount *this_parent = mnt; struct list_head *next; - int count; + struct vfsmount *this_parent = mnt; + int actual_refs; + int minimum_refs; - count = atomic_read(&mnt->mnt_count) - 1; + actual_refs = atomic_read(&mnt->mnt_count); + minimum_refs = 2; + spin_lock(&vfsmount_lock); repeat: next = this_parent->mnt_mounts.next; - DPRINTK(("is_vfsmnt_tree_busy: mnt=%p, this_parent=%p, next=%p\n", - mnt, this_parent, next)); resume: - for( ; next != &this_parent->mnt_mounts; next = next->next) { - struct vfsmount *p = list_entry(next, struct vfsmount, - mnt_child); - - /* -1 for struct vfs_mount's normal count, - -1 to compensate for child's reference to parent */ - count += atomic_read(&p->mnt_count) - 1 - 1; + while (next != &this_parent->mnt_mounts) { + struct vfsmount *p = list_entry(next, struct vfsmount, mnt_child); + + next = next->next; - DPRINTK(("is_vfsmnt_tree_busy: p=%p, count now %d\n", - p, count)); + actual_refs += atomic_read(&p->mnt_count); + minimum_refs += 2; - if (!list_empty(&p->mnt_mounts)) { + if ( !list_empty(&p->mnt_mounts) ) { this_parent = p; goto repeat; } - /* root is busy if any leaf is busy */ - if (atomic_read(&p->mnt_count) > 1) - return 1; } - /* All done at this level ... ascend and resume the search. */ if (this_parent != mnt) { - next = this_parent->mnt_child.next; + next = this_parent->mnt_child.next; this_parent = this_parent->mnt_parent; goto resume; } + spin_unlock(&vfsmount_lock); - DPRINTK(("is_vfsmnt_tree_busy: count=%d\n", count)); - return count != 0; /* remaining users? */ + DPRINTK(("autofs4_may_umount: done actual = %d, minimum = %d\n", + actual_refs, minimum_refs)); + + return actual_refs > minimum_refs; } -/* Traverse a dentry's list of vfsmounts and return the number of - non-busy mounts */ -static int check_vfsmnt(struct vfsmount *mnt, struct dentry *dentry) +/* Check a mount point for busyness return 1 if not busy, otherwise */ +static int autofs4_check_mount(struct vfsmount *mnt, struct dentry *dentry) { - int ret = dentry->d_mounted; - struct vfsmount *vfs = lookup_mnt(mnt, dentry); + int status = 0; - if (vfs) { - mntput(vfs); - if (is_vfsmnt_tree_busy(vfs)) - ret--; - } - DPRINTK(("check_vfsmnt: ret=%d\n", ret)); - return ret; + DPRINTK(("autofs4_check_mount: dentry %p %.*s\n", + dentry, (int)dentry->d_name.len, dentry->d_name.name)); + + mntget(mnt); + dget(dentry); + + if (!follow_down(&mnt, &dentry)) + goto done; + + while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) + ; + + /* This is an autofs submount, we can't expire it */ + if (is_autofs4_dentry(dentry)) + goto done; + + /* The big question */ + if (autofs4_may_umount(mnt) == 0) + status = 1; +done: + DPRINTK(("autofs4_check_mount: returning = %d\n", status)); + mntput(mnt); + dput(dentry); + return status; } -/* Check dentry tree for busyness. If a dentry appears to be busy - because it is a mountpoint, check to see if the mounted - filesystem is busy. */ -static int is_tree_busy(struct vfsmount *topmnt, struct dentry *top) +/* Check a directory tree of mount points for busyness + * The tree is not busy iff no mountpoints are busy + * Return 1 if the tree is busy or 0 otherwise + */ +static int autofs4_check_tree(struct vfsmount *mnt, + struct dentry *top, + unsigned long timeout, + int do_now) { - struct dentry *this_parent; + struct dentry *this_parent = top; struct list_head *next; - int count; - count = atomic_read(&top->d_count); - - DPRINTK(("is_tree_busy: top=%p initial count=%d\n", - top, count)); - this_parent = top; - - if (is_autofs4_dentry(top)) { - count--; - DPRINTK(("is_tree_busy: autofs; count=%d\n", count)); - } + DPRINTK(("autofs4_check_tree: parent %p %.*s\n", + top, (int)top->d_name.len, top->d_name.name)); - if (d_mountpoint(top)) - count -= check_vfsmnt(topmnt, top); + /* Negative dentry - give up */ + if (!simple_positive(top)) + return 0; + + /* Timeout of a tree mount is determined by its top dentry */ + if (!autofs4_can_expire(top, timeout, do_now)) + return 0; - repeat: + spin_lock(&dcache_lock); +repeat: next = this_parent->d_subdirs.next; - resume: +resume: while (next != &this_parent->d_subdirs) { - int adj = 0; - struct dentry *dentry = list_entry(next, struct dentry, - d_child); + struct dentry *dentry = list_entry(next, struct dentry, d_child); + + /* Negative dentry - give up */ + if (!simple_positive(dentry)) { + next = next->next; + continue; + } + + DPRINTK(("autofs4_check_tree: dentry %p %.*s\n", + dentry, (int)dentry->d_name.len, dentry->d_name.name)); + + if (!simple_empty_nolock(dentry)) { + this_parent = dentry; + goto repeat; + } + + dentry = dget(dentry); + spin_unlock(&dcache_lock); + + if (d_mountpoint(dentry)) { + /* First busy => tree busy */ + if (!autofs4_check_mount(mnt, dentry)) { + dput(dentry); + return 0; + } + } + + dput(dentry); + spin_lock(&dcache_lock); next = next->next; + } + + if (this_parent != top) { + next = this_parent->d_child.next; + this_parent = this_parent->d_parent; + goto resume; + } + spin_unlock(&dcache_lock); - count += atomic_read(&dentry->d_count) - 1; + return 1; +} + +struct dentry *autofs4_check_leaves(struct vfsmount *mnt, + struct dentry *parent, + unsigned long timeout, + int do_now) +{ + struct dentry *this_parent = parent; + struct list_head *next; + + DPRINTK(("autofs4_check_leaves: parent %p %.*s\n", + parent, (int)parent->d_name.len, parent->d_name.name)); - if (d_mountpoint(dentry)) - adj += check_vfsmnt(topmnt, dentry); + spin_lock(&dcache_lock); +repeat: + next = this_parent->d_subdirs.next; +resume: + while (next != &this_parent->d_subdirs) { + struct dentry *dentry = list_entry(next, struct dentry, d_child); - if (is_autofs4_dentry(dentry)) { - adj++; - DPRINTK(("is_tree_busy: autofs; adj=%d\n", - adj)); + /* Negative dentry - give up */ + if (!simple_positive(dentry)) { + next = next->next; + continue; } - count -= adj; + DPRINTK(("autofs4_check_leaves: dentry %p %.*s\n", + dentry, (int)dentry->d_name.len, dentry->d_name.name)); if (!list_empty(&dentry->d_subdirs)) { this_parent = dentry; goto repeat; } - if (atomic_read(&dentry->d_count) != adj) { - DPRINTK(("is_tree_busy: busy leaf (d_count=%d adj=%d)\n", - atomic_read(&dentry->d_count), adj)); - return 1; + dentry = dget(dentry); + spin_unlock(&dcache_lock); + + if (d_mountpoint(dentry)) { + /* Can we expire this guy */ + if (!autofs4_can_expire(dentry, timeout, do_now)) + goto cont; + + /* Can we umount this guy */ + if (autofs4_check_mount(mnt, dentry)) + return dentry; + } +cont: + dput(dentry); + spin_lock(&dcache_lock); + next = next->next; } - /* All done at this level ... ascend and resume the search. */ - if (this_parent != top) { - next = this_parent->d_child.next; + if (this_parent != parent) { + next = this_parent->d_child.next; this_parent = this_parent->d_parent; goto resume; } + spin_unlock(&dcache_lock); - DPRINTK(("is_tree_busy: count=%d\n", count)); - return count != 0; /* remaining users? */ + return NULL; } /* @@ -156,61 +258,86 @@ static int is_tree_busy(struct vfsmount static struct dentry *autofs4_expire(struct super_block *sb, struct vfsmount *mnt, struct autofs_sb_info *sbi, - int do_now) + int how) { - unsigned long now = jiffies; unsigned long timeout; struct dentry *root = sb->s_root; - struct list_head *tmp; + struct dentry *expired = NULL; + struct list_head *next; + int do_now = how & AUTOFS_EXP_IMMEDIATE; + int exp_leaves = how & AUTOFS_EXP_LEAVES; - if (!sbi->exp_timeout || !root) + if ( !sbi->exp_timeout || !root ) return NULL; + now = jiffies; timeout = sbi->exp_timeout; spin_lock(&dcache_lock); - for(tmp = root->d_subdirs.next; - tmp != &root->d_subdirs; - tmp = tmp->next) { - struct autofs_info *ino; - struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + next = root->d_subdirs.next; - if (dentry->d_inode == NULL) + /* On exit from the loop expire is set to a dgot dentry + * to expire or it's NULL */ + while ( next != &root->d_subdirs ) { + struct dentry *dentry = list_entry(next, struct dentry, d_child); + + /* Negative dentry - give up */ + if ( !simple_positive(dentry) ) { + next = next->next; continue; + } - ino = autofs4_dentry_ino(dentry); + dentry = dget(dentry); + spin_unlock(&dcache_lock); - if (ino == NULL) { - /* dentry in the process of being deleted */ - continue; + /* Case 1: indirect mount or top level direct mount */ + if (d_mountpoint(dentry)) { + DPRINTK(("autofs4_expire: checking mountpoint %p %.*s\n", + dentry, (int)dentry->d_name.len, dentry->d_name.name)); + + /* Can we expire this guy */ + if (!autofs4_can_expire(dentry, timeout, do_now)) + goto next; + + /* Can we umount this guy */ + if (autofs4_check_mount(mnt, dentry)) { + expired = dentry; + break; + } + goto next; } - /* No point expiring a pending mount */ - if (dentry->d_flags & DCACHE_AUTOFS_PENDING) - continue; + if ( simple_empty(dentry) ) + goto next; - if (!do_now) { - /* Too young to die */ - if (time_after(ino->last_used + timeout, now)) - continue; - - /* update last_used here :- - - obviously makes sense if it is in use now - - less obviously, prevents rapid-fire expire - attempts if expire fails the first time */ - ino->last_used = now; + /* Case 2: tree mount, expire iff entire tree is not busy */ + if (!exp_leaves) { + if (autofs4_check_tree(mnt, dentry, timeout, do_now)) { + expired = dentry; + break; + } + /* Case 3: direct mount, expire individual leaves */ + } else { + expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); + if (expired) { + dput(dentry); + break; + } } - if (!is_tree_busy(mnt, dentry)) { - DPRINTK(("autofs_expire: returning %p %.*s\n", - dentry, (int)dentry->d_name.len, dentry->d_name.name)); - /* Start from here next time */ - list_del(&root->d_subdirs); - list_add(&root->d_subdirs, &dentry->d_child); - dget(dentry); - spin_unlock(&dcache_lock); +next: + dput(dentry); + spin_lock(&dcache_lock); + next = next->next; + } - return dentry; - } + if ( expired ) { + DPRINTK(("autofs4_expire: returning %p %.*s\n", + expired, (int)expired->d_name.len, expired->d_name.name)); + spin_lock(&dcache_lock); + list_del(&expired->d_parent->d_subdirs); + list_add(&expired->d_parent->d_subdirs, &expired->d_child); + spin_unlock(&dcache_lock); + return expired; } spin_unlock(&dcache_lock); @@ -263,7 +390,7 @@ int autofs4_expire_multi(struct super_bl /* This is synchronous because it makes the daemon a little easier */ de_info->flags |= AUTOFS_INF_EXPIRING; - ret = autofs4_wait(sbi, &dentry->d_name, NFY_EXPIRE); + ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); de_info->flags &= ~AUTOFS_INF_EXPIRING; dput(dentry); } --- linux-2.6.6-rc1/fs/autofs4/inode.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/autofs4/inode.c 2004-04-18 22:25:55.491394504 -0700 @@ -187,6 +187,7 @@ int autofs4_fill_super(struct super_bloc struct file * pipe; int pipefd; struct autofs_sb_info *sbi; + struct autofs_info *ino; int minproto, maxproto; sbi = (struct autofs_sb_info *) kmalloc(sizeof(*sbi), GFP_KERNEL); @@ -203,6 +204,7 @@ int autofs4_fill_super(struct super_bloc sbi->oz_pgrp = process_group(current); sbi->sb = s; sbi->version = 0; + sbi->sub_version = 0; sbi->queues = NULL; s->s_blocksize = 1024; s->s_blocksize_bits = 10; @@ -212,7 +214,11 @@ int autofs4_fill_super(struct super_bloc /* * Get the root inode and dentry, but defer checking for errors. */ - root_inode = autofs4_get_inode(s, autofs4_mkroot(sbi)); + ino = autofs4_mkroot(sbi); + if (!ino) + goto fail_free; + root_inode = autofs4_get_inode(s, ino); + kfree(ino); if (!root_inode) goto fail_free; @@ -244,6 +250,7 @@ int autofs4_fill_super(struct super_bloc } sbi->version = maxproto > AUTOFS_MAX_PROTO_VERSION ? AUTOFS_MAX_PROTO_VERSION : maxproto; + sbi->sub_version = AUTOFS_PROTO_SUBVERSION; DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, sbi->oz_pgrp)); pipe = fget(pipefd); @@ -305,7 +312,7 @@ struct inode *autofs4_get_inode(struct s if (S_ISDIR(inf->mode)) { inode->i_nlink = 2; inode->i_op = &autofs4_dir_inode_operations; - inode->i_fop = &simple_dir_operations; + inode->i_fop = &autofs4_dir_operations; } else if (S_ISLNK(inf->mode)) { inode->i_size = inf->size; inode->i_op = &autofs4_symlink_inode_operations; --- linux-2.6.6-rc1/fs/autofs4/root.c 2003-09-27 18:57:46.000000000 -0700 +++ 25/fs/autofs4/root.c 2004-04-18 22:25:56.217284152 -0700 @@ -4,6 +4,7 @@ * * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved * Copyright 1999-2000 Jeremy Fitzhardinge + * Copyright 2001-2003 Ian Kent * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your @@ -24,17 +25,28 @@ static int autofs4_dir_unlink(struct ino static int autofs4_dir_rmdir(struct inode *,struct dentry *); static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); +static int autofs4_dir_open(struct inode *inode, struct file *file); +static int autofs4_dir_close(struct inode *inode, struct file *file); +static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir); +static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir); static struct dentry *autofs4_root_lookup(struct inode *,struct dentry *, struct nameidata *); +static int autofs4_dcache_readdir(struct file *, void *, filldir_t); struct file_operations autofs4_root_operations = { .open = dcache_dir_open, .release = dcache_dir_close, - .llseek = dcache_dir_lseek, .read = generic_read_dir, - .readdir = dcache_readdir, + .readdir = autofs4_root_readdir, .ioctl = autofs4_root_ioctl, }; +struct file_operations autofs4_dir_operations = { + .open = autofs4_dir_open, + .release = autofs4_dir_close, + .read = generic_read_dir, + .readdir = autofs4_dir_readdir, +}; + struct inode_operations autofs4_root_inode_operations = { .lookup = autofs4_root_lookup, .unlink = autofs4_dir_unlink, @@ -51,6 +63,30 @@ struct inode_operations autofs4_dir_inod .rmdir = autofs4_dir_rmdir, }; +static int autofs4_root_readdir(struct file *file, void *dirent, + filldir_t filldir) +{ + struct autofs_sb_info *sbi = autofs4_sbi(file->f_dentry->d_sb); + int oz_mode = autofs4_oz_mode(sbi); + + DPRINTK(("autofs4_root_readdir called, filp->f_pos = %lld\n", + file->f_pos)); + + /* + * Don't set reghost flag if: + * 1) f_pos is larger than zero -- we've already been here. + * 2) we haven't even enabled reghosting in the 1st place. + * 3) this is the daemon doing a readdir + */ + if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled) + sbi->needs_reghost = 1; + + DPRINTK(("autofs4_root_readdir: needs_reghost = %d\n", + sbi->needs_reghost)); + + return autofs4_dcache_readdir(file, dirent, filldir); +} + /* Update usage from here to top of tree, so that scan of top-level directories will give a useful result */ static void autofs4_update_usage(struct dentry *dentry) @@ -67,9 +103,191 @@ static void autofs4_update_usage(struct } } +/* + * From 2.4 kernel readdir.c + */ +static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) +{ + int i; + struct dentry *dentry = filp->f_dentry; + + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, dentry->d_inode->i_ino, DT_DIR) < 0) + break; + i++; + filp->f_pos++; + /* fallthrough */ + case 1: + if (filldir(dirent, "..", 2, i, dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) + break; + i++; + filp->f_pos++; + /* fallthrough */ + default: { + struct list_head *list; + int j = i-2; + + spin_lock(&dcache_lock); + list = dentry->d_subdirs.next; + + for (;;) { + if (list == &dentry->d_subdirs) { + spin_unlock(&dcache_lock); + return 0; + } + if (!j) + break; + j--; + list = list->next; + } + + while(1) { + struct dentry *de = list_entry(list, struct dentry, d_child); + + if (!d_unhashed(de) && de->d_inode) { + spin_unlock(&dcache_lock); + if (filldir(dirent, de->d_name.name, de->d_name.len, filp->f_pos, de->d_inode->i_ino, DT_UNKNOWN) < 0) + break; + spin_lock(&dcache_lock); + } + filp->f_pos++; + list = list->next; + if (list != &dentry->d_subdirs) + continue; + spin_unlock(&dcache_lock); + break; + } + } + } + return 0; +} + +static int autofs4_dir_open(struct inode *inode, struct file *file) +{ + struct dentry *dentry = file->f_dentry; + struct vfsmount *mnt = file->f_vfsmnt; + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + int status; + + DPRINTK(("autofs4_dir_open: file=%p dentry=%p %.*s\n", + file, dentry, dentry->d_name.len, dentry->d_name.name)); + + if (autofs4_oz_mode(sbi)) + goto out; + + if (autofs4_ispending(dentry)) { + DPRINTK(("autofs4_dir_open: dentry busy\n")); + return -EBUSY; + } + + if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) { + struct nameidata nd; + int empty; + + /* In case there are stale directory dentrys from a failed mount */ + spin_lock(&dcache_lock); + empty = list_empty(&dentry->d_subdirs); + spin_unlock(&dcache_lock); + + if (!empty) + d_invalidate(dentry); + + nd.flags = LOOKUP_DIRECTORY; + status = (dentry->d_op->d_revalidate)(dentry, &nd); + + if (!status) + return -ENOENT; + } + + if (d_mountpoint(dentry)) { + struct file *fp = NULL; + struct vfsmount *fp_mnt = mntget(mnt); + struct dentry *fp_dentry = dget(dentry); + + while (follow_down(&fp_mnt, &fp_dentry) && d_mountpoint(fp_dentry)); + + fp = dentry_open(fp_dentry, fp_mnt, file->f_flags); + status = PTR_ERR(fp); + if (IS_ERR(fp)) { + file->private_data = NULL; + return status; + } + file->private_data = fp; + } +out: + return 0; +} + +static int autofs4_dir_close(struct inode *inode, struct file *file) +{ + struct dentry *dentry = file->f_dentry; + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + + DPRINTK(("autofs4_dir_close: file=%p dentry=%p %.*s\n", + file, dentry, dentry->d_name.len, dentry->d_name.name)); + + if (autofs4_oz_mode(sbi)) + goto out; + + if (autofs4_ispending(dentry)) { + DPRINTK(("autofs4_dir_close: dentry busy\n")); + return -EBUSY; + } + + if (d_mountpoint(dentry)) { + struct file *fp = file->private_data; + + if (!fp) + return -ENOENT; + + filp_close(fp, current->files); + file->private_data = NULL; + } +out: + return 0; +} + +static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + struct dentry *dentry = file->f_dentry; + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + int status; + + DPRINTK(("autofs4_readdir: file=%p dentry=%p %.*s\n", + file, dentry, dentry->d_name.len, dentry->d_name.name)); + + if (autofs4_oz_mode(sbi)) + goto out; + + if (autofs4_ispending(dentry)) { + DPRINTK(("autofs4_readdir: dentry busy\n")); + return -EBUSY; + } + + if (d_mountpoint(dentry)) { + struct file *fp = file->private_data; + + if (!fp) + return -ENOENT; + + if (!fp->f_op || !fp->f_op->readdir) + goto out; + + status = vfs_readdir(fp, filldir, dirent); + file->f_pos = fp->f_pos; + if (status) + autofs4_copy_atime(file, fp); + return status; + } +out: + return autofs4_dcache_readdir(file, dirent, filldir); +} + static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, - struct autofs_sb_info *sbi) + struct autofs_sb_info *sbi, int flags) { struct autofs_info *de_info = autofs4_dentry_ino(dentry); int status = 0; @@ -78,11 +296,10 @@ static int try_to_fill_dentry(struct den when expiration is done to trigger mount request with a new dentry */ if (de_info && (de_info->flags & AUTOFS_INF_EXPIRING)) { - DPRINTK(("try_to_fill_entry: waiting for expire %p name=%.*s, flags&PENDING=%s de_info=%p de_info->flags=%x\n", - dentry, dentry->d_name.len, dentry->d_name.name, - dentry->d_flags & DCACHE_AUTOFS_PENDING?"t":"f", - de_info, de_info?de_info->flags:0)); - status = autofs4_wait(sbi, &dentry->d_name, NFY_NONE); + DPRINTK(("try_to_fill_entry: waiting for expire %p name=%.*s\n", + dentry, dentry->d_name.len, dentry->d_name.name)); + + status = autofs4_wait(sbi, dentry, NFY_NONE); DPRINTK(("try_to_fill_entry: expire done status=%d\n", status)); @@ -93,11 +310,11 @@ static int try_to_fill_dentry(struct den dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode)); /* Wait for a pending mount, triggering one if there isn't one already */ - while(dentry->d_inode == NULL) { - DPRINTK(("try_to_fill_entry: waiting for mount name=%.*s, de_info=%p de_info->flags=%x\n", - dentry->d_name.len, dentry->d_name.name, - de_info, de_info?de_info->flags:0)); - status = autofs4_wait(sbi, &dentry->d_name, NFY_MOUNT); + if (dentry->d_inode == NULL) { + DPRINTK(("try_to_fill_entry: waiting for mount name=%.*s\n", + dentry->d_name.len, dentry->d_name.name)); + + status = autofs4_wait(sbi, dentry, NFY_MOUNT); DPRINTK(("try_to_fill_entry: mount done status=%d\n", status)); @@ -113,19 +330,22 @@ static int try_to_fill_dentry(struct den /* Return a negative dentry, but leave it "pending" */ return 1; } - } + /* Trigger mount for path component or follow link */ + } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) || + current->link_count) { + DPRINTK(("try_to_fill_entry: waiting for mount name=%.*s\n", + dentry->d_name.len, dentry->d_name.name)); - /* If this is an unused directory that isn't a mount point, - bitch at the daemon and fix it in user space */ - spin_lock(&dcache_lock); - if (S_ISDIR(dentry->d_inode->i_mode) && - !d_mountpoint(dentry) && - list_empty(&dentry->d_subdirs)) { - DPRINTK(("try_to_fill_entry: mounting existing dir\n")); - spin_unlock(&dcache_lock); - return autofs4_wait(sbi, &dentry->d_name, NFY_MOUNT) == 0; + dentry->d_flags |= DCACHE_AUTOFS_PENDING; + status = autofs4_wait(sbi, dentry, NFY_MOUNT); + + DPRINTK(("try_to_fill_entry: mount done status=%d\n", status)); + + if (status) { + dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; + return 0; + } } - spin_unlock(&dcache_lock); /* We don't update the usages for the autofs daemon itself, this is necessary for recursive autofs mounts */ @@ -136,25 +356,25 @@ static int try_to_fill_dentry(struct den return 1; } - /* * Revalidate is called on every cache lookup. Some of those * cache lookups may actually happen while the dentry is not * yet completely filled in, and revalidate has to delay such * lookups.. */ -static int autofs4_root_revalidate(struct dentry * dentry, struct nameidata *nd) +static int autofs4_revalidate(struct dentry * dentry, struct nameidata *nd) { struct inode * dir = dentry->d_parent->d_inode; struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); int oz_mode = autofs4_oz_mode(sbi); + int flags = nd ? nd->flags : 0; + int status = 1; /* Pending dentry */ if (autofs4_ispending(dentry)) { - if (autofs4_oz_mode(sbi)) - return 1; - else - return try_to_fill_dentry(dentry, dir->i_sb, sbi); + if (!oz_mode) + status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags); + return status; } /* Negative dentry.. invalidate if "old" */ @@ -166,13 +386,12 @@ static int autofs4_root_revalidate(struc if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { - DPRINTK(("autofs_root_revalidate: dentry=%p %.*s, emptydir\n", + DPRINTK(("autofs4_root_revalidate: dentry=%p %.*s, emptydir\n", dentry, dentry->d_name.len, dentry->d_name.name)); spin_unlock(&dcache_lock); - if (oz_mode) - return 1; - else - return try_to_fill_dentry(dentry, dir->i_sb, sbi); + if (!oz_mode) + status = try_to_fill_dentry(dentry, dir->i_sb, sbi, flags); + return status; } spin_unlock(&dcache_lock); @@ -183,16 +402,6 @@ static int autofs4_root_revalidate(struc return 1; } -static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) -{ - struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); - - if (!autofs4_oz_mode(sbi)) - autofs4_update_usage(dentry); - - return 1; -} - static void autofs4_dentry_release(struct dentry *de) { struct autofs_info *inf; @@ -212,7 +421,7 @@ static void autofs4_dentry_release(struc /* For dentries of directories in the root dir */ static struct dentry_operations autofs4_root_dentry_operations = { - .d_revalidate = autofs4_root_revalidate, + .d_revalidate = autofs4_revalidate, .d_release = autofs4_dentry_release, }; @@ -224,11 +433,10 @@ static struct dentry_operations autofs4_ /* Lookups in non-root dirs never find anything - if it's there, it's already in the dcache */ -/* SMP-safe */ static struct dentry *autofs4_dir_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { #if 0 - DPRINTK(("autofs_dir_lookup: ignoring lookup of %.*s/%.*s\n", + DPRINTK(("autofs4_dir_lookup: iignoring lookup of %.*s/%.*s\n", dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, dentry->d_name.len, dentry->d_name.name)); #endif @@ -244,7 +452,7 @@ static struct dentry *autofs4_root_looku struct autofs_sb_info *sbi; int oz_mode; - DPRINTK(("autofs_root_lookup: name = %.*s\n", + DPRINTK(("autofs4_root_lookup: name = %.*s\n", dentry->d_name.len, dentry->d_name.name)); if (dentry->d_name.len > NAME_MAX) @@ -252,9 +460,8 @@ static struct dentry *autofs4_root_looku sbi = autofs4_sbi(dir->i_sb); - lock_kernel(); oz_mode = autofs4_oz_mode(sbi); - DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", + DPRINTK(("autofs4_lookup: pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", current->pid, process_group(current), sbi->catatonic, oz_mode)); /* @@ -285,12 +492,16 @@ static struct dentry *autofs4_root_looku * a signal. If so we can force a restart.. */ if (dentry->d_flags & DCACHE_AUTOFS_PENDING) { + /* See if we were interrupted */ if (signal_pending(current)) { - unlock_kernel(); - return ERR_PTR(-ERESTARTNOINTR); + sigset_t *sigset = ¤t->pending.signal; + if (sigismember (sigset, SIGKILL) || + sigismember (sigset, SIGQUIT) || + sigismember (sigset, SIGINT)) { + return ERR_PTR(-ERESTARTNOINTR); + } } } - unlock_kernel(); /* * If this dentry is unhashed, then we shouldn't honour this @@ -313,27 +524,21 @@ static int autofs4_dir_symlink(struct in struct inode *inode; char *cp; - DPRINTK(("autofs_dir_symlink: %s <- %.*s\n", symname, + DPRINTK(("autofs4_dir_symlink: %s <- %.*s\n", symname, dentry->d_name.len, dentry->d_name.name)); - lock_kernel(); - if (!autofs4_oz_mode(sbi)) { - unlock_kernel(); + if (!autofs4_oz_mode(sbi)) return -EACCES; - } ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555); - if (ino == NULL) { - unlock_kernel(); + if (ino == NULL) return -ENOSPC; - } ino->size = strlen(symname); ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL); if (cp == NULL) { kfree(ino); - unlock_kernel(); return -ENOSPC; } @@ -353,7 +558,6 @@ static int autofs4_dir_symlink(struct in dir->i_mtime = CURRENT_TIME; - unlock_kernel(); return 0; } @@ -370,7 +574,7 @@ static int autofs4_dir_symlink(struct in * If a process is blocked on the dentry waiting for the expire to finish, * it will invalidate the dentry and try to mount with a new one. * - * Also see autofs_dir_rmdir().. + * Also see autofs4_dir_rmdir().. */ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) { @@ -378,11 +582,8 @@ static int autofs4_dir_unlink(struct ino struct autofs_info *ino = autofs4_dentry_ino(dentry); /* This allows root to remove symlinks */ - lock_kernel(); - if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) { - unlock_kernel(); + if ( !autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN) ) return -EACCES; - } dput(ino->dentry); @@ -393,8 +594,6 @@ static int autofs4_dir_unlink(struct ino d_drop(dentry); - unlock_kernel(); - return 0; } @@ -403,16 +602,12 @@ static int autofs4_dir_rmdir(struct inod struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); - lock_kernel(); - if (!autofs4_oz_mode(sbi)) { - unlock_kernel(); + if (!autofs4_oz_mode(sbi)) return -EACCES; - } spin_lock(&dcache_lock); if (!list_empty(&dentry->d_subdirs)) { spin_unlock(&dcache_lock); - unlock_kernel(); return -ENOTEMPTY; } __d_drop(dentry); @@ -426,32 +621,24 @@ static int autofs4_dir_rmdir(struct inod if (dir->i_nlink) dir->i_nlink--; - unlock_kernel(); return 0; } - - static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); struct inode *inode; - lock_kernel(); - if ( !autofs4_oz_mode(sbi) ) { - unlock_kernel(); + if ( !autofs4_oz_mode(sbi) ) return -EACCES; - } - DPRINTK(("autofs_dir_mkdir: dentry %p, creating %.*s\n", + DPRINTK(("autofs4_dir_mkdir: dentry %p, creating %.*s\n", dentry, dentry->d_name.len, dentry->d_name.name)); ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555); - if (ino == NULL) { - unlock_kernel(); + if (ino == NULL) return -ENOSPC; - } inode = autofs4_get_inode(dir->i_sb, ino); d_instantiate(dentry, inode); @@ -467,7 +654,6 @@ static int autofs4_dir_mkdir(struct inod dir->i_nlink++; dir->i_mtime = CURRENT_TIME; - unlock_kernel(); return 0; } @@ -496,7 +682,68 @@ static inline int autofs4_get_protover(s return put_user(sbi->version, p); } -/* Identify autofs_dentries - this is so we can tell if there's +/* Return protocol sub version */ +static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int *p) +{ + return put_user(sbi->sub_version, p); +} + +/* + * Tells the daemon whether we need to reghost or not. Also, clears + * the reghost_needed flag. + */ +static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int *p) +{ + int status; + + DPRINTK(("autofs_ask_reghost: returning %d\n", sbi->needs_reghost)); + + status = put_user(sbi->needs_reghost, p); + if ( status ) + return status; + + sbi->needs_reghost = 0; + return 0; +} + +/* + * Enable / Disable reghosting ioctl() operation + */ +static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int *p) +{ + int status; + int val; + + status = get_user(val, p); + + DPRINTK(("autofs4_toggle_reghost: reghost = %d\n", val)); + + if (status) + return status; + + /* turn on/off reghosting, with the val */ + sbi->reghost_enabled = val; + return 0; +} + +/* +* Tells the daemon whether it can umount the autofs mount. +*/ +static inline int autofs4_ask_umount(struct vfsmount *mnt, int *p) +{ + int status = 0; + + if (may_umount(mnt) == 0) + status = 1; + + DPRINTK(("autofs_ask_umount: returning %d\n", status)); + + status = put_user(status, p); + + return status; +} + +/* Identify autofs4_dentries - this is so we can tell if there's an extra dentry refcount or not. We only hold a refcount on the dentry if its non-negative (ie, d_inode != NULL) */ @@ -517,7 +764,7 @@ static int autofs4_root_ioctl(struct ino { struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb); - DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n", + DPRINTK(("autofs4_root_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n", cmd,arg,sbi,process_group(current))); if ( _IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || @@ -537,9 +784,19 @@ static int autofs4_root_ioctl(struct ino return 0; case AUTOFS_IOC_PROTOVER: /* Get protocol version */ return autofs4_get_protover(sbi, (int *)arg); + case AUTOFS_IOC_PROTOSUBVER: /* Get protocol sub version */ + return autofs4_get_protosubver(sbi, (int *)arg); case AUTOFS_IOC_SETTIMEOUT: return autofs4_get_set_timeout(sbi,(unsigned long *)arg); + case AUTOFS_IOC_TOGGLEREGHOST: + return autofs4_toggle_reghost(sbi, (int *) arg); + case AUTOFS_IOC_ASKREGHOST: + return autofs4_ask_reghost(sbi, (int *) arg); + + case AUTOFS_IOC_ASKUMOUNT: + return autofs4_ask_umount(filp->f_vfsmnt, (int *) arg); + /* return a single thing to expire */ case AUTOFS_IOC_EXPIRE: return autofs4_expire_run(inode->i_sb,filp->f_vfsmnt,sbi, --- linux-2.6.6-rc1/fs/autofs4/waitq.c 2003-06-14 12:18:34.000000000 -0700 +++ 25/fs/autofs4/waitq.c 2004-04-18 22:25:55.494394048 -0700 @@ -3,6 +3,7 @@ * linux/fs/autofs/waitq.c * * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved + * Copyright 2001-2003 Ian Kent * * This file is part of the Linux kernel and is made available under * the terms of the GNU General Public License, version 2, or at your @@ -16,6 +17,8 @@ #include #include "autofs_i.h" +static spinlock_t waitq_lock = SPIN_LOCK_UNLOCKED; + /* We make this a static variable rather than a part of the superblock; it is better if we don't reassign numbers easily even across filesystems */ static autofs_wqt_t autofs4_next_wait_queue = 1; @@ -37,7 +40,7 @@ void autofs4_catatonic_mode(struct autof wq->status = -ENOENT; /* Magic is gone - report failure */ kfree(wq->name); wq->name = NULL; - wake_up(&wq->queue); + wake_up_interruptible(&wq->queue); wq = nwq; } if (sbi->pipe) { @@ -90,7 +93,7 @@ static void autofs4_notify_daemon(struct union autofs_packet_union pkt; size_t pktsz; - DPRINTK(("autofs_notify: wait id = 0x%08lx, name = %.*s, type=%d\n", + DPRINTK(("autofs4_notify_daemon: wait id = 0x%08lx, name = %.*s, type=%d\n", wq->wait_queue_token, wq->len, wq->name, type)); memset(&pkt,0,sizeof pkt); /* For security reasons */ @@ -116,7 +119,7 @@ static void autofs4_notify_daemon(struct memcpy(ep->name, wq->name, wq->len); ep->name[wq->len] = '\0'; } else { - printk("autofs_notify_daemon: bad type %d!\n", type); + printk("autofs4_notify_daemon: bad type %d!\n", type); return; } @@ -124,62 +127,103 @@ static void autofs4_notify_daemon(struct autofs4_catatonic_mode(sbi); } -int autofs4_wait(struct autofs_sb_info *sbi, struct qstr *name, +static int autofs4_getpath(struct autofs_sb_info *sbi, + struct dentry *dentry, char **name) +{ + struct dentry *root = sbi->sb->s_root; + struct dentry *tmp; + char *buf = *name; + char *p; + int len = 0; + + spin_lock(&dcache_lock); + for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) + len += tmp->d_name.len + 1; + + if (--len > NAME_MAX) { + spin_unlock(&dcache_lock); + return 0; + } + + *(buf + len) = '\0'; + p = buf + len - dentry->d_name.len; + strncpy(p, dentry->d_name.name, dentry->d_name.len); + + for (tmp = dentry->d_parent; tmp != root ; tmp = tmp->d_parent) { + *(--p) = '/'; + p -= tmp->d_name.len; + strncpy(p, tmp->d_name.name, tmp->d_name.len); + } + spin_unlock(&dcache_lock); + + return len; +} + +int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, enum autofs_notify notify) { struct autofs_wait_queue *wq; - int status; + char *name; + int len, status; /* In catatonic mode, we don't wait for nobody */ if ( sbi->catatonic ) return -ENOENT; - /* We shouldn't be able to get here, but just in case */ - if ( name->len > NAME_MAX ) + name = kmalloc(NAME_MAX + 1, GFP_KERNEL); + if (!name) + return -ENOMEM; + + len = autofs4_getpath(sbi, dentry, &name); + if (!len) { + kfree(name); return -ENOENT; + } - for ( wq = sbi->queues ; wq ; wq = wq->next ) { - if ( wq->hash == name->hash && - wq->len == name->len && - wq->name && !memcmp(wq->name,name->name,name->len) ) + spin_lock(&waitq_lock); + for (wq = sbi->queues ; wq ; wq = wq->next) { + if (wq->hash == dentry->d_name.hash && + wq->len == len && + wq->name && !memcmp(wq->name, name, len)) break; } + spin_unlock(&waitq_lock); if ( !wq ) { /* Create a new wait queue */ wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); - if ( !wq ) - return -ENOMEM; - - wq->name = kmalloc(name->len,GFP_KERNEL); - if ( !wq->name ) { - kfree(wq); + if ( !wq ) { + kfree(name); return -ENOMEM; } + + spin_lock(&waitq_lock); wq->wait_queue_token = autofs4_next_wait_queue; if (++autofs4_next_wait_queue == 0) autofs4_next_wait_queue = 1; - init_waitqueue_head(&wq->queue); - wq->hash = name->hash; - wq->len = name->len; - wq->status = -EINTR; /* Status return if interrupted */ - memcpy(wq->name, name->name, name->len); wq->next = sbi->queues; sbi->queues = wq; + spin_unlock(&waitq_lock); + init_waitqueue_head(&wq->queue); + wq->hash = dentry->d_name.hash; + wq->name = name; + wq->len = len; + wq->status = -EINTR; /* Status return if interrupted */ - DPRINTK(("autofs_wait: new wait id = 0x%08lx, name = %.*s, nfy=%d\n", - wq->wait_queue_token, wq->len, wq->name, notify)); + DPRINTK(("autofs4_wait: new wait id = 0x%08lx, name = %.*s, nfy=%d\n", + (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify)); /* autofs4_notify_daemon() may block */ - wq->wait_ctr = 2; + atomic_set(&wq->wait_ctr, 2); if (notify != NFY_NONE) { autofs4_notify_daemon(sbi,wq, - notify == NFY_MOUNT ? autofs_ptype_missing : - autofs_ptype_expire_multi); + notify == NFY_MOUNT ? + autofs_ptype_missing : + autofs_ptype_expire_multi); } } else { - wq->wait_ctr++; - DPRINTK(("autofs_wait: existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", - wq->wait_queue_token, wq->len, wq->name, notify)); + atomic_inc(&wq->wait_ctr); + DPRINTK(("autofs4_wait: existing wait id = 0x%08lx, name = %.*s, nfy=%d\n", + (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify)); } /* wq->name is NULL if and only if the lock is already released */ @@ -204,19 +248,19 @@ int autofs4_wait(struct autofs_sb_info * recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); - interruptible_sleep_on(&wq->queue); + wait_event_interruptible(wq->queue, wq->name == NULL); spin_lock_irqsave(¤t->sighand->siglock, irqflags); current->blocked = oldset; recalc_sigpending(); spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); } else { - DPRINTK(("autofs_wait: skipped sleeping\n")); + DPRINTK(("autofs4_wait: skipped sleeping\n")); } status = wq->status; - if (--wq->wait_ctr == 0) /* Are we the last process to need status? */ + if (atomic_dec_and_test(&wq->wait_ctr)) /* Are we the last process to need status? */ kfree(wq); return status; @@ -227,23 +271,28 @@ int autofs4_wait_release(struct autofs_s { struct autofs_wait_queue *wq, **wql; + spin_lock(&waitq_lock); for ( wql = &sbi->queues ; (wq = *wql) ; wql = &wq->next ) { if ( wq->wait_queue_token == wait_queue_token ) break; } - if ( !wq ) + + if ( !wq ) { + spin_unlock(&waitq_lock); return -EINVAL; + } *wql = wq->next; /* Unlink from chain */ + spin_unlock(&waitq_lock); kfree(wq->name); wq->name = NULL; /* Do not wait on this queue */ wq->status = status; - if (--wq->wait_ctr == 0) /* Is anyone still waiting for this guy? */ + if (atomic_dec_and_test(&wq->wait_ctr)) /* Is anyone still waiting for this guy? */ kfree(wq); else - wake_up(&wq->queue); + wake_up_interruptible(&wq->queue); return 0; } --- linux-2.6.6-rc1/fs/binfmt_aout.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/binfmt_aout.c 2004-04-18 22:26:02.143383248 -0700 @@ -27,7 +27,6 @@ #include #include -#include #include static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); --- linux-2.6.6-rc1/fs/binfmt_elf.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/binfmt_elf.c 2004-04-18 22:26:02.144383096 -0700 @@ -40,7 +40,6 @@ #include #include -#include #include @@ -522,7 +521,8 @@ static int load_elf_binary(struct linux_ goto out_free_ph; files = current->files; /* Refcounted so ok */ - if(unshare_files() < 0) + retval = unshare_files(); + if (retval < 0) goto out_free_ph; if (files == current->files) { put_files_struct(files); --- linux-2.6.6-rc1/fs/binfmt_flat.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/binfmt_flat.c 2004-04-18 22:26:02.145382944 -0700 @@ -40,7 +40,6 @@ #include #include #include -#include #include #include --- linux-2.6.6-rc1/fs/binfmt_misc.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/binfmt_misc.c 2004-04-18 22:25:42.869313352 -0700 @@ -39,6 +39,8 @@ static int enabled = 1; enum {Enabled, Magic}; #define MISC_FMT_PRESERVE_ARGV0 (1<<31) +#define MISC_FMT_OPEN_BINARY (1<<30) +#define MISC_FMT_CREDENTIALS (1<<29) typedef struct { struct list_head list; @@ -102,10 +104,15 @@ static Node *check_file(struct linux_bin static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) { Node *fmt; - struct file * file; + struct file * interp_file = NULL; + struct file * binary_file = NULL; char iname[BINPRM_BUF_SIZE]; char *iname_addr = iname; int retval; + int fd_binary = -1; + char fd_str[32]; + char * fdsp = fd_str; + int is_open_bin; retval = -ENOEXEC; if (!enabled) @@ -120,33 +127,105 @@ static int load_misc_binary(struct linux if (!fmt) goto _ret; - allow_write_access(bprm->file); - fput(bprm->file); - bprm->file = NULL; + is_open_bin = (fmt->flags & MISC_FMT_OPEN_BINARY) ? 1 : 0; + + if (is_open_bin) { + /* if the binary should be opened on behalf of the + * interpreter than keep it open and assign descriptor + * to it */ + fd_binary = get_unused_fd (); + if (fd_binary < 0) { + retval = fd_binary; + goto _ret; + } + snprintf (fd_str, sizeof(fd_str) - 1, "%d", fd_binary); + } else { + allow_write_access (bprm->file); + fput (bprm->file); + bprm->file = NULL; + } /* Build args for interpreter */ if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) { remove_arg_zero(bprm); } - retval = copy_strings_kernel(1, &bprm->interp, bprm); - if (retval < 0) goto _ret; - bprm->argc++; - retval = copy_strings_kernel(1, &iname_addr, bprm); - if (retval < 0) goto _ret; - bprm->argc++; + + if (is_open_bin) { + /* make argv[1] be the file descriptor of the binary */ + retval = copy_strings_kernel (1, &fdsp, bprm); + } else { + /* make argv[1] be the path to the binary */ + retval = copy_strings_kernel (1, &bprm->interp, bprm); + } + if (retval < 0) + goto _error; + bprm->argc ++; + retval = copy_strings_kernel (1, &iname_addr, bprm); + if (retval < 0) + goto _error; + bprm->argc ++; bprm->interp = iname; /* for binfmt_script */ - file = open_exec(iname); - retval = PTR_ERR(file); - if (IS_ERR(file)) - goto _ret; - bprm->file = file; + interp_file = open_exec (iname); + retval = PTR_ERR (interp_file); + if (IS_ERR (interp_file)) + goto _error; + + + binary_file = bprm->file; + if (fmt->flags & MISC_FMT_CREDENTIALS) { + /* + * Call prepare_binprm before switching to interpreter's file + * so that all security calculation will be done according to + * binary and not interpreter + */ + retval = prepare_binprm(bprm); + if (retval < 0) + goto _error; + bprm->file = interp_file; + memset(bprm->buf, 0, BINPRM_BUF_SIZE); + retval = kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE); + } else { + bprm->file = interp_file; + retval = prepare_binprm (bprm); + } + + if (retval < 0) + goto _error; + + if (is_open_bin) { + /* if the binary is not readable than enforce mm->dumpable=0 + regardless of the interpreter's permissions */ + if (permission (binary_file->f_dentry->d_inode, MAY_READ, NULL)) { + bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; + } + /* install the binary's fd. it is done at the latest possible point + * because once it is installed it will need to be sys_close()ed + * in case of error. + */ + fd_install (fd_binary, binary_file); + } + + retval = search_binary_handler (bprm, regs); + + if (retval < 0) + goto _error_close_file; - retval = prepare_binprm(bprm); - if (retval >= 0) - retval = search_binary_handler(bprm, regs); _ret: return retval; + +_error_close_file: + if (fd_binary > 0) { + sys_close (fd_binary); + fd_binary = -1; + bprm->file = NULL; + } +_error: + if (fd_binary > 0) + put_unused_fd (fd_binary); + bprm->interp_flags = 0; + goto _ret; + } /* Command parsers */ @@ -191,6 +270,36 @@ static int unquote(char *from) return p - from; } +static inline char * check_special_flags (char * sfs, Node * e) +{ + char * p = sfs; + int cont = 1; + + /* special flags */ + while (cont) { + switch (*p) { + case 'P': + p++; + e->flags |= MISC_FMT_PRESERVE_ARGV0; + break; + case 'O': + p++; + e->flags |= MISC_FMT_OPEN_BINARY; + break; + case 'C': + p++; + /* this flags also implies the + open-binary flag */ + e->flags |= (MISC_FMT_CREDENTIALS | + MISC_FMT_OPEN_BINARY); + break; + default: + cont = 0; + } + } + + return p; +} /* * This registers a new binary format, it recognises the syntax * ':name:type:offset:magic:mask:interpreter:' @@ -293,10 +402,8 @@ static Node *create_entry(const char *bu if (!e->interpreter[0]) goto Einval; - if (*p == 'P') { - p++; - e->flags |= MISC_FMT_PRESERVE_ARGV0; - } + + p = check_special_flags (p, e); if (*p == '\n') p++; @@ -346,6 +453,7 @@ static void entry_status(Node *e, char * { char *dp; char *status = "disabled"; + const char * flags = "flags: "; if (test_bit(Enabled, &e->flags)) status = "enabled"; @@ -357,6 +465,22 @@ static void entry_status(Node *e, char * sprintf(page, "%s\ninterpreter %s\n", status, e->interpreter); dp = page + strlen(page); + + /* print the special flags */ + sprintf (dp, "%s", flags); + dp += strlen (flags); + if (e->flags & MISC_FMT_PRESERVE_ARGV0) { + *dp ++ = 'P'; + } + if (e->flags & MISC_FMT_OPEN_BINARY) { + *dp ++ = 'O'; + } + if (e->flags & MISC_FMT_CREDENTIALS) { + *dp ++ = 'C'; + } + *dp ++ = '\n'; + + if (!test_bit(Magic, &e->flags)) { sprintf(dp, "extension .%s\n", e->magic); } else { --- linux-2.6.6-rc1/fs/bio.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/bio.c 2004-04-18 22:25:32.683861776 -0700 @@ -808,9 +808,7 @@ static void __init biovec_init_pools(voi size = bp->nr_vecs * sizeof(struct bio_vec); bp->slab = kmem_cache_create(bp->name, size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!bp->slab) - panic("biovec: can't init slab cache\n"); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); if (i >= scale) pool_entries >>= 1; @@ -825,16 +823,16 @@ static void __init biovec_init_pools(voi static int __init init_bio(void) { bio_slab = kmem_cache_create("bio", sizeof(struct bio), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!bio_slab) - panic("bio: can't create slab cache\n"); - bio_pool = mempool_create(BIO_POOL_SIZE, mempool_alloc_slab, mempool_free_slab, bio_slab); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + bio_pool = mempool_create(BIO_POOL_SIZE, mempool_alloc_slab, + mempool_free_slab, bio_slab); if (!bio_pool) panic("bio: can't create mempool\n"); biovec_init_pools(); - bio_split_pool = mempool_create(BIO_SPLIT_ENTRIES, bio_pair_alloc, bio_pair_free, NULL); + bio_split_pool = mempool_create(BIO_SPLIT_ENTRIES, + bio_pair_alloc, bio_pair_free, NULL); if (!bio_split_pool) panic("bio: can't create split pool\n"); --- linux-2.6.6-rc1/fs/block_dev.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/block_dev.c 2004-04-18 22:26:00.415645904 -0700 @@ -148,7 +148,7 @@ blkdev_get_blocks(struct inode *inode, s return 0; } -static int +static ssize_t blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { @@ -251,6 +251,7 @@ static void init_once(void * foo, kmem_c { memset(bdev, 0, sizeof(*bdev)); sema_init(&bdev->bd_sem, 1); + sema_init(&bdev->bd_mount_sem, 1); INIT_LIST_HEAD(&bdev->bd_inodes); INIT_LIST_HEAD(&bdev->bd_list); inode_init_once(&ei->vfs_inode); @@ -302,14 +303,9 @@ struct super_block *blockdev_superblock; void __init bdev_cache_init(void) { int err; - bdev_cachep = kmem_cache_create("bdev_cache", - sizeof(struct bdev_inode), - 0, - SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, - init_once, - NULL); - if (!bdev_cachep) - panic("Cannot create bdev_cache SLAB cache"); + bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), + 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, + init_once, NULL); err = register_filesystem(&bd_type); if (err) panic("Cannot register bdev pseudo-fs"); --- linux-2.6.6-rc1/fs/buffer.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/buffer.c 2004-04-18 22:26:00.418645448 -0700 @@ -51,25 +51,6 @@ static struct bh_wait_queue_head { wait_queue_head_t wqh; } ____cacheline_aligned_in_smp bh_wait_queue_heads[1< 10) - return; - enough++; - printk("buffer layer error at %s:%d\n", file, line); -#ifndef CONFIG_KALLSYMS - printk("Pass this trace through ksymoops for reporting\n"); -#endif - dump_stack(); -} -EXPORT_SYMBOL(__buffer_error); - inline void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) { @@ -99,17 +80,6 @@ EXPORT_SYMBOL(wake_up_buffer); void fastcall unlock_buffer(struct buffer_head *bh) { - /* - * unlock_buffer against a zero-count bh is a bug, if the page - * is not locked. Because then nothing protects the buffer's - * waitqueue, which is used here. (Well. Other locked buffers - * against the page will pin it. But complain anyway). - */ - if (atomic_read(&bh->b_count) == 0 && - !PageLocked(bh->b_page) && - !PageWriteback(bh->b_page)) - buffer_error(); - clear_buffer_locked(bh); smp_mb__after_clear_bit(); wake_up_buffer(bh); @@ -125,10 +95,6 @@ void __wait_on_buffer(struct buffer_head wait_queue_head_t *wqh = bh_waitq_head(bh); DEFINE_WAIT(wait); - if (atomic_read(&bh->b_count) == 0 && - (!bh->b_page || !PageLocked(bh->b_page))) - buffer_error(); - do { prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); if (buffer_locked(bh)) { @@ -146,8 +112,6 @@ void __wait_on_buffer(struct buffer_head static void __set_page_buffers(struct page *page, struct buffer_head *head) { - if (page_has_buffers(page)) - buffer_error(); page_cache_get(page); SetPagePrivate(page); page->private = (unsigned long)head; @@ -263,6 +227,77 @@ int fsync_bdev(struct block_device *bdev return sync_blockdev(bdev); } +/** + * freeze_bdev -- lock a filesystem and force it into a consistent state + * @bdev: blockdevice to lock + * + * This takes the block device bd_mount_sem to make sure no new mounts + * happen on bdev until thaw_bdev() is called. + * If a superblock is found on this device, we take the s_umount semaphore + * on it to make sure nobody unmounts until the snapshot creation is done. + */ +struct super_block *freeze_bdev(struct block_device *bdev) +{ + struct super_block *sb; + + down(&bdev->bd_mount_sem); + sb = get_super(bdev); + if (sb && !(sb->s_flags & MS_RDONLY)) { + sb->s_frozen = SB_FREEZE_WRITE; + wmb(); + + sync_inodes_sb(sb, 0); + DQUOT_SYNC(sb); + + lock_super(sb); + if (sb->s_dirt && sb->s_op->write_super) + sb->s_op->write_super(sb); + unlock_super(sb); + + if (sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, 1); + + sync_blockdev(sb->s_bdev); + sync_inodes_sb(sb, 1); + + sb->s_frozen = SB_FREEZE_TRANS; + wmb(); + + sync_blockdev(sb->s_bdev); + + if (sb->s_op->write_super_lockfs) + sb->s_op->write_super_lockfs(sb); + } + + sync_blockdev(bdev); + return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ +} +EXPORT_SYMBOL(freeze_bdev); + +/** + * thaw_bdev -- unlock filesystem + * @bdev: blockdevice to unlock + * @sb: associated superblock + * + * Unlocks the filesystem and marks it writeable again after freeze_bdev(). + */ +void thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + if (sb) { + BUG_ON(sb->s_bdev != bdev); + + if (sb->s_op->unlockfs) + sb->s_op->unlockfs(sb); + sb->s_frozen = SB_UNFROZEN; + wmb(); + wake_up(&sb->s_wait_unfrozen); + drop_super(sb); + } + + up(&bdev->bd_mount_sem); +} +EXPORT_SYMBOL(thaw_bdev); + /* * sync everything. Start out by waking pdflush, because that writes back * all queues in parallel. @@ -433,10 +468,12 @@ __find_get_block_slow(struct block_devic } bh = bh->b_this_page; } while (bh != head); - buffer_error(); - printk("block=%llu, b_blocknr=%llu\n", + + printk("__find_get_block_slow() failed. " + "block=%llu, b_blocknr=%llu\n", (unsigned long long)block, (unsigned long long)bh->b_blocknr); printk("b_state=0x%08lx, b_size=%u\n", bh->b_state, bh->b_size); + printk("device blocksize: %d\n", 1 << bd_inode->i_blkbits); out_unlock: spin_unlock(&bd_mapping->private_lock); page_cache_release(page); @@ -847,10 +884,7 @@ int __set_page_dirty_buffers(struct page struct buffer_head *bh = head; do { - if (buffer_uptodate(bh)) - set_buffer_dirty(bh); - else - buffer_error(); + set_buffer_dirty(bh); bh = bh->b_this_page; } while (bh != head); } @@ -1151,7 +1185,7 @@ grow_dev_page(struct block_device *bdev, return page; failed: - buffer_error(); + BUG(); unlock_page(page); page_cache_release(page); return NULL; @@ -1247,8 +1281,6 @@ __getblk_slow(struct block_device *bdev, */ void fastcall mark_buffer_dirty(struct buffer_head *bh) { - if (!buffer_uptodate(bh)) - buffer_error(); if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) __set_page_dirty_nobuffers(bh->b_page); } @@ -1267,7 +1299,7 @@ void __brelse(struct buffer_head * buf) return; } printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n"); - buffer_error(); /* For the stack backtrace */ + WARN_ON(1); } /* @@ -1294,8 +1326,6 @@ static struct buffer_head *__bread_slow( unlock_buffer(bh); return bh; } else { - if (buffer_dirty(bh)) - buffer_error(); get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); @@ -1686,10 +1716,6 @@ void unmap_underlying_metadata(struct bl old_bh = __find_get_block_slow(bdev, block, 0); if (old_bh) { -#if 0 /* This happens. Later. */ - if (buffer_dirty(old_bh)) - buffer_error(); -#endif clear_buffer_dirty(old_bh); wait_on_buffer(old_bh); clear_buffer_req(old_bh); @@ -1737,8 +1763,6 @@ static int __block_write_full_page(struc last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; if (!page_has_buffers(page)) { - if (!PageUptodate(page)) - buffer_error(); create_empty_buffers(page, 1 << inode->i_blkbits, (1 << BH_Dirty)|(1 << BH_Uptodate)); } @@ -1767,9 +1791,6 @@ static int __block_write_full_page(struc * mapped buffers outside i_size will occur, because * this page can be outside i_size when there is a * truncate in progress. - * - * if (buffer_mapped(bh)) - * buffer_error(); */ /* * The buffer was zeroed by block_write_full_page() @@ -1777,8 +1798,6 @@ static int __block_write_full_page(struc clear_buffer_dirty(bh); set_buffer_uptodate(bh); } else if (!buffer_mapped(bh) && buffer_dirty(bh)) { - if (buffer_new(bh)) - buffer_error(); err = get_block(inode, block, bh, 1); if (err) goto recover; @@ -1811,8 +1830,6 @@ static int __block_write_full_page(struc continue; } if (test_clear_buffer_dirty(bh)) { - if (!buffer_uptodate(bh)) - buffer_error(); mark_buffer_async_write(bh); } else { unlock_buffer(bh); @@ -1942,8 +1959,6 @@ static int __block_prepare_write(struct unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); if (PageUptodate(page)) { - if (!buffer_mapped(bh)) - buffer_error(); set_buffer_uptodate(bh); continue; } @@ -2001,8 +2016,6 @@ out: void *kaddr; clear_buffer_new(bh); - if (buffer_uptodate(bh)) - buffer_error(); kaddr = kmap_atomic(page, KM_USER0); memset(kaddr+block_start, 0, bh->b_size); kunmap_atomic(kaddr, KM_USER0); @@ -2068,8 +2081,6 @@ int block_read_full_page(struct page *pa if (!PageLocked(page)) PAGE_BUG(page); - if (PageUptodate(page)) - buffer_error(); blocksize = 1 << inode->i_blkbits; if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); @@ -2684,7 +2695,7 @@ static int end_bio_bh_io_sync(struct bio return 0; } -int submit_bh(int rw, struct buffer_head * bh) +void submit_bh(int rw, struct buffer_head * bh) { struct bio *bio; @@ -2692,13 +2703,6 @@ int submit_bh(int rw, struct buffer_head BUG_ON(!buffer_mapped(bh)); BUG_ON(!bh->b_end_io); - if ((rw == READ || rw == READA) && buffer_uptodate(bh)) - buffer_error(); - if (rw == WRITE && !buffer_uptodate(bh)) - buffer_error(); - if (rw == READ && buffer_dirty(bh)) - buffer_error(); - /* Only clear out a write error when rewriting */ if (test_set_buffer_req(bh) && rw == WRITE) clear_buffer_write_io_error(bh); @@ -2722,7 +2726,7 @@ int submit_bh(int rw, struct buffer_head bio->bi_end_io = end_bio_bh_io_sync; bio->bi_private = bh; - return submit_bio(rw, bio); + submit_bio(rw, bio); } /** @@ -2798,21 +2802,6 @@ void sync_dirty_buffer(struct buffer_hea } /* - * Sanity checks for try_to_free_buffers. - */ -static void check_ttfb_buffer(struct page *page, struct buffer_head *bh) -{ - if (!buffer_uptodate(bh) && !buffer_req(bh)) { - if (PageUptodate(page) && page->mapping - && buffer_mapped(bh) /* discard_buffer */ - && S_ISBLK(page->mapping->host->i_mode)) - { - buffer_error(); - } - } -} - -/* * try_to_free_buffers() checks if all the buffers on this particular page * are unused, and releases them if so. * @@ -2847,7 +2836,6 @@ drop_buffers(struct page *page, struct b bh = head; do { - check_ttfb_buffer(page, bh); if (buffer_write_io_error(bh)) set_bit(AS_EIO, &page->mapping->flags); if (buffer_busy(bh)) @@ -2857,9 +2845,6 @@ drop_buffers(struct page *page, struct b bh = bh->b_this_page; } while (bh != head); - if (!was_uptodate && PageUptodate(page) && !PageError(page)) - buffer_error(); - do { struct buffer_head *next = bh->b_this_page; @@ -3050,7 +3035,7 @@ void __init buffer_init(void) bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, - 0, init_buffer_head, NULL); + SLAB_PANIC, init_buffer_head, NULL); for (i = 0; i < ARRAY_SIZE(bh_wait_queue_heads); i++) init_waitqueue_head(&bh_wait_queue_heads[i].wqh); --- linux-2.6.6-rc1/fs/cifs/AUTHORS 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/AUTHORS 2004-04-18 22:25:28.593483608 -0700 @@ -26,5 +26,6 @@ Test case and Bug Report contributors ------------------------------------- Thanks to those in the community who have submitted detailed bug reports and debug of problems they have found: Jochen Dolze, David Blaine, -Rene Scharfe, Martin Josefsson, Alexander Wild and others. +Rene Scharfe, Martin Josefsson, Alexander Wild, Anthony Liguori, +Urban Widmark, Massimiliano Ferrero, Howard Owen and others. --- linux-2.6.6-rc1/fs/cifs/CHANGES 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/CHANGES 2004-04-18 22:25:28.594483456 -0700 @@ -1,3 +1,94 @@ +Version 1.08 +------------ +Allow file_mode and dir_mode (specified at mount time) to be enforced +locally (the server already enforced its own ACLs too) for servers +that do not report the correct mode (do not support the +CIFS Unix Extensions). + +Version 1.07 +------------ +Fix some small memory leaks in some unmount error paths. Fix major leak +of cache pages in readpages causing multiple read oriented stress +testcases (including fsx, and even large file copy) to fail over time. + +Version 1.06 +------------ +Send NTCreateX with ATTR_POSIX if Linux/Unix extensions negotiated with server. +This allows files that differ only in case and improves performance of file +creation and file open to such servers. Fix semaphore conflict which causes +slow delete of open file to Samba (which unfortunately can cause an oplock +break to self while vfs_unlink held i_sem) which can hang for 20 seconds. + +Version 1.05 +------------ +fixes to cifs_readpages for fsx test case + +Version 1.04 +------------ +Fix caching data integrity bug when extending file size especially when no +oplock on file. Fix spurious logging of valid already parsed mount options +that are parsed outside of the cifs vfs such as nosuid. + + +Version 1.03 +------------ +Connect to server when port number override not specified, and tcp port +unitialized. Reset search to restart at correct file when kernel routine +filldir returns error during large directory searches (readdir). + +Version 1.02 +------------ +Fix caching problem when files opened by multiple clients in which +page cache could contain stale data, and write through did +not occur often enough while file was still open when read ahead +(read oplock) not allowed. Treat "sep=" when first mount option +as an overrride of comma as the default separator between mount +options. + +Version 1.01 +------------ +Allow passwords longer than 16 bytes. Allow null password string. + +Version 1.00 +------------ +Gracefully clean up failed mounts when attempting to mount to servers such as +Windows 98 that terminate tcp sessions during prototocol negotiation. Handle +embedded commas in mount parsing of passwords. + +Version 0.99 +------------ +Invalidate local inode cached pages on oplock break and when last file +instance is closed so that the client does not continue using stale local +copy rather than later modified server copy of file. Do not reconnect +when server drops the tcp session prematurely before negotiate +protocol response. Fix oops in roepen_file when dentry freed. Allow +the support for CIFS Unix Extensions to be disabled via proc interface. + +Version 0.98 +------------ +Fix hang in commit_write during reconnection of open files under heavy load. +Fix unload_nls oops in a mount failure path. Serialize writes to same socket +which also fixes any possible races when cifs signatures are enabled in SMBs +being sent out of signature sequence number order. + +Version 0.97 +------------ +Fix byte range locking bug (endian problem) causing bad offset and +length. + +Version 0.96 +------------ +Fix oops (in send_sig) caused by CIFS unmount code trying to +wake up the demultiplex thread after it had exited. Do not log +error on harmless oplock release of closed handle. + +Version 0.95 +------------ +Fix unsafe global variable usage and password hash failure on gcc 3.3.1 +Fix problem reconnecting secondary mounts to same server after session +failure. Fix invalid dentry - race in mkdir when directory gets created +by another client between the lookup and mkdir. + Version 0.94 ------------ Fix to list processing in reopen_files. Fix reconnection when server hung --- linux-2.6.6-rc1/fs/cifs/cifs_debug.c 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/cifs_debug.c 2004-04-18 22:25:28.599482696 -0700 @@ -88,9 +88,26 @@ cifs_debug_data_read(char *buf, char **b i, ses->serverName, ses->serverDomain, atomic_read(&ses->inUse), ses->serverOS, ses->serverNOS, ses->capabilities,ses->status,ses->server->tcpStatus); buf += length; - if(ses->server) + if(ses->server) { buf += sprintf(buf, "\n\tLocal Users To Same Server: %d SecMode: 0x%x", atomic_read(&ses->server->socketUseCount),ses->server->secMode); + + /* length = sprintf(buf, "\nMIDs: \n"); + buf += length; + + spin_lock(&GlobalMid_Lock); + list_for_each(tmp1, &ses->server->pending_mid_q) { + mid_entry = list_entry(tmp1, struct + mid_q_entry, + qhead); + if(mid_entry) { + length = sprintf(buf,"State: %d com: %d pid: %d tsk: %p\n",mid_entry->midState,mid_entry->command,mid_entry->pid,mid_entry->tsk); + buf += length; + } + } + spin_unlock(&GlobalMid_Lock); */ + } + } read_unlock(&GlobalSMBSeslock); sprintf(buf, "\n"); @@ -127,8 +144,10 @@ cifs_debug_data_read(char *buf, char **b buf += sprintf(buf, "\tDISCONNECTED "); } read_unlock(&GlobalSMBSeslock); + length = sprintf(buf, "\n"); buf += length; + *eof = 1; /* BB add code to dump additional info such as TCP session info now */ /* @@ -177,6 +196,9 @@ cifs_stats_read(char *buf, char **beginB item_length = sprintf(buf,"Active Operations (MIDs in use): %d\n",midCount.counter); length += item_length; + buf += item_length; + item_length = sprintf(buf,"%d sessions and %d shares reconnected after failure\n",tcpSesReconnectCount.counter,tconInfoReconnectCount.counter); + length += item_length; return length; } @@ -201,6 +223,8 @@ static read_proc_t packet_signing_enable static write_proc_t packet_signing_enabled_write; static read_proc_t quotaEnabled_read; static write_proc_t quotaEnabled_write; +static read_proc_t linuxExtensionsEnabled_read; +static write_proc_t linuxExtensionsEnabled_write; void cifs_proc_init(void) @@ -213,62 +237,67 @@ cifs_proc_init(void) proc_fs_cifs->owner = THIS_MODULE; create_proc_read_entry("DebugData", 0, proc_fs_cifs, - cifs_debug_data_read, 0); + cifs_debug_data_read, 0); create_proc_read_entry("SimultaneousOps", 0, proc_fs_cifs, - cifs_total_xid_read, 0); + cifs_total_xid_read, 0); create_proc_read_entry("Stats", 0, proc_fs_cifs, - cifs_stats_read, 0); + cifs_stats_read, 0); pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs, - cifsFYI_read, 0); + cifsFYI_read, 0); if (pde) pde->write_proc = cifsFYI_write; pde = create_proc_read_entry("traceSMB", 0, proc_fs_cifs, - traceSMB_read, 0); + traceSMB_read, 0); if (pde) pde->write_proc = traceSMB_write; pde = create_proc_read_entry("OplockEnabled", 0, proc_fs_cifs, - oplockEnabled_read, 0); + oplockEnabled_read, 0); if (pde) pde->write_proc = oplockEnabled_write; - pde = create_proc_read_entry("QuotaEnabled", 0, proc_fs_cifs, - quotaEnabled_read, 0); - if (pde) - pde->write_proc = quotaEnabled_write; + pde = create_proc_read_entry("QuotaEnabled", 0, proc_fs_cifs, + quotaEnabled_read, 0); + if (pde) + pde->write_proc = quotaEnabled_write; + + pde = create_proc_read_entry("LinuxExtensionsEnabled", 0, proc_fs_cifs, + linuxExtensionsEnabled_read, 0); + if (pde) + pde->write_proc = linuxExtensionsEnabled_write; pde = create_proc_read_entry("MultiuserMount", 0, proc_fs_cifs, - multiuser_mount_read, 0); + multiuser_mount_read, 0); if (pde) pde->write_proc = multiuser_mount_write; pde = create_proc_read_entry("ExtendedSecurity", 0, proc_fs_cifs, - extended_security_read, 0); + extended_security_read, 0); if (pde) pde->write_proc = extended_security_write; pde = - create_proc_read_entry("LookupCacheEnable", 0, proc_fs_cifs, - lookupFlag_read, 0); + create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs, + lookupFlag_read, 0); if (pde) pde->write_proc = lookupFlag_write; pde = create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs, - ntlmv2_enabled_read, 0); + ntlmv2_enabled_read, 0); if (pde) pde->write_proc = ntlmv2_enabled_write; pde = create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs, - packet_signing_enabled_read, 0); + packet_signing_enabled_read, 0); if (pde) pde->write_proc = packet_signing_enabled_write; } @@ -283,12 +312,15 @@ cifs_proc_clean(void) remove_proc_entry("cifsFYI", proc_fs_cifs); remove_proc_entry("TraceSMB", proc_fs_cifs); remove_proc_entry("SimultaneousOps", proc_fs_cifs); - remove_proc_entry("TotalOps", proc_fs_cifs); + remove_proc_entry("Stats", proc_fs_cifs); remove_proc_entry("MultiuserMount", proc_fs_cifs); remove_proc_entry("OplockEnabled", proc_fs_cifs); remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); remove_proc_entry("ExtendedSecurity",proc_fs_cifs); remove_proc_entry("PacketSigningEnabled",proc_fs_cifs); + remove_proc_entry("LinuxExtensionsEnabled",proc_fs_cifs); + remove_proc_entry("QuotaEnabled",proc_fs_cifs); + remove_proc_entry("LookupCacheEnabled",proc_fs_cifs); remove_proc_entry("cifs", proc_root_fs); } @@ -410,6 +442,46 @@ quotaEnabled_write(struct file *file, co return count; } +static int +linuxExtensionsEnabled_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "%d\n", linuxExtEnabled); +/* could also check if quotas are enabled in kernel + as a whole first */ + len -= off; + *start = page + off; + + if (len > count) + len = count; + else + *eof = 1; + + if (len < 0) + len = 0; + + return len; +} +static int +linuxExtensionsEnabled_write(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char c; + int rc; + + rc = get_user(c, buffer); + if (rc) + return rc; + if (c == '0' || c == 'n' || c == 'N') + linuxExtEnabled = 0; + else if (c == '1' || c == 'y' || c == 'Y') + linuxExtEnabled = 1; + + return count; +} + static int lookupFlag_read(char *page, char **start, off_t off, --- linux-2.6.6-rc1/fs/cifs/cifsencrypt.c 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/cifsencrypt.c 2004-04-18 22:25:28.601482392 -0700 @@ -74,7 +74,7 @@ int cifs_sign_smb(struct smb_hdr * cifs_ rc = cifs_calculate_signature(cifs_pdu, ses->mac_signing_key,smb_signature); if(rc) - memset(cifs_pdu->Signature.SecuritySignature, 0, 8); + memset(cifs_pdu->Signature.SecuritySignature, 0, 8); else memcpy(cifs_pdu->Signature.SecuritySignature, smb_signature, 8); @@ -88,15 +88,15 @@ int cifs_verify_signature(struct smb_hdr char server_response_sig[8]; char what_we_think_sig_should_be[20]; - if((cifs_pdu == NULL) || (mac_key == NULL)) - return -EINVAL; + if((cifs_pdu == NULL) || (mac_key == NULL)) + return -EINVAL; if (cifs_pdu->Command == SMB_COM_NEGOTIATE) return 0; if (cifs_pdu->Command == SMB_COM_LOCKING_ANDX) { struct smb_com_lock_req * pSMB = (struct smb_com_lock_req *)cifs_pdu; - if(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE) + if(pSMB->LockType & LOCKING_ANDX_OPLOCK_RELEASE) return 0; } @@ -113,10 +113,10 @@ int cifs_verify_signature(struct smb_hdr its signature against what the server sent */ memcpy(server_response_sig,cifs_pdu->Signature.SecuritySignature,8); - cifs_pdu->Signature.Sequence.SequenceNumber = expected_sequence_number; - cifs_pdu->Signature.Sequence.Reserved = 0; + cifs_pdu->Signature.Sequence.SequenceNumber = expected_sequence_number; + cifs_pdu->Signature.Sequence.Reserved = 0; - rc = cifs_calculate_signature(cifs_pdu, mac_key, + rc = cifs_calculate_signature(cifs_pdu, mac_key, what_we_think_sig_should_be); if(rc) @@ -136,7 +136,7 @@ int cifs_verify_signature(struct smb_hdr int cifs_calculate_mac_key(char * key, const char * rn, const char * password) { char temp_key[16]; - if ((key == NULL) || (rn == NULL) || (password == NULL)) + if ((key == NULL) || (rn == NULL)) return -EINVAL; E_md4hash(password, temp_key); @@ -156,7 +156,7 @@ int CalcNTLMv2_partial_mac_key(struct ci if(ses) return -EINVAL; - E_md4hash(ses->password_with_pad, temp_hash); + E_md4hash(ses->password, temp_hash); hmac_md5_init_limK_to_64(temp_hash, 16, &ctx); user_name_len = strlen(ses->userName); @@ -165,22 +165,21 @@ int CalcNTLMv2_partial_mac_key(struct ci dom_name_len = strlen(ses->domainName); if(dom_name_len > MAX_USERNAME_SIZE) return -EINVAL; - - + ucase_buf = kmalloc((MAX_USERNAME_SIZE+1), GFP_KERNEL); - unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL); - + unicode_buf = kmalloc((MAX_USERNAME_SIZE+1)*4, GFP_KERNEL); + for(i=0;icharset2upper[(int)ses->userName[i]]; ucase_buf[i] = 0; - user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf, MAX_USERNAME_SIZE*2, nls_info); + user_name_len = cifs_strtoUCS(unicode_buf, ucase_buf, MAX_USERNAME_SIZE*2, nls_info); unicode_buf[user_name_len] = 0; user_name_len++; - for(i=0;icharset2upper[(int)ses->domainName[i]]; - ucase_buf[i] = 0; - dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf, MAX_USERNAME_SIZE*2, nls_info); + for(i=0;icharset2upper[(int)ses->domainName[i]]; + ucase_buf[i] = 0; + dom_name_len = cifs_strtoUCS(unicode_buf+user_name_len, ucase_buf, MAX_USERNAME_SIZE*2, nls_info); unicode_buf[user_name_len + dom_name_len] = 0; hmac_md5_update((const unsigned char *) unicode_buf, @@ -201,6 +200,5 @@ void CalcNTLMv2_response(const struct ci hmac_md5_update(ses->server->cryptKey,8,&context); /* hmac_md5_update(v2_session_response+16)client thing,8,&context); */ /* BB fix */ - hmac_md5_final(v2_session_response,&context); } --- linux-2.6.6-rc1/fs/cifs/cifsfs.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/cifs/cifsfs.c 2004-04-18 22:25:28.603482088 -0700 @@ -52,6 +52,7 @@ int cifsERROR = 1; int traceSMB = 0; unsigned int oplockEnabled = 1; unsigned int quotaEnabled = 0; +unsigned int linuxExtEnabled = 1; unsigned int lookupCacheEnabled = 1; unsigned int multiuser_mount = 0; unsigned int extended_security = 0; @@ -82,6 +83,9 @@ cifs_read_super(struct super_block *sb, cifs_sb = CIFS_SB(sb); if(cifs_sb == NULL) return -ENOMEM; + else + memset(cifs_sb,0,sizeof(struct cifs_sb_info)); + rc = cifs_mount(sb, cifs_sb, data, devname); @@ -123,14 +127,15 @@ out_no_root: iput(inode); out_mount_failed: - if(cifs_sb->local_nls) - unload_nls(cifs_sb->local_nls); - if(cifs_sb) + if(cifs_sb) { + if(cifs_sb->local_nls) + unload_nls(cifs_sb->local_nls); kfree(cifs_sb); + } return rc; } -void +static void cifs_put_super(struct super_block *sb) { int rc = 0; @@ -151,7 +156,7 @@ cifs_put_super(struct super_block *sb) return; } -int +static int cifs_statfs(struct super_block *sb, struct kstatfs *buf) { int xid, rc; @@ -186,8 +191,21 @@ cifs_statfs(struct super_block *sb, stru static int cifs_permission(struct inode * inode, int mask, struct nameidata *nd) { - /* the server does permission checks, we do not need to do it here */ - return 0; + struct cifs_sb_info *cifs_sb; + + cifs_sb = CIFS_SB(inode->i_sb); + + if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) { + /* the server supports the Unix-like mode bits and does its + own permission checks, and therefore we do not allow the file + mode to be overriden on these mounts - so do not do perm + check on client side */ + return 0; + } else /* file mode might have been restricted at mount time + on the client (above and beyond ACL on servers) for + servers which do not support setting and viewing mode bits, + so allowing client to check permissions is useful */ + return vfs_permission(inode, mask); } static kmem_cache_t *cifs_inode_cachep; @@ -357,6 +375,12 @@ static struct quotactl_ops cifs_quotactl }; #endif +static int cifs_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_NODIRATIME; + return 0; +} + struct super_operations cifs_super_ops = { .read_inode = cifs_read_inode, .put_super = cifs_put_super, @@ -369,6 +393,7 @@ struct super_operations cifs_super_ops = us with the same number of releases (closes) as opens */ .show_options = cifs_show_options, /* .umount_begin = cifs_umount_begin, *//* consider adding in the future */ + .remount_fs = cifs_remount, }; static struct super_block * @@ -395,24 +420,58 @@ cifs_get_sb(struct file_system_type *fs_ return sb; } -ssize_t +static ssize_t cifs_read_wrapper(struct file * file, char *read_data, size_t read_size, loff_t * poffset) { - if(CIFS_I(file->f_dentry->d_inode)->clientCanCacheRead) + if(file == NULL) + return -EIO; + else if(file->f_dentry == NULL) + return -EIO; + else if(file->f_dentry->d_inode == NULL) + return -EIO; + + cFYI(1,("In read_wrapper size %zd at %lld",read_size,*poffset)); + if(CIFS_I(file->f_dentry->d_inode)->clientCanCacheRead) { return generic_file_read(file,read_data,read_size,poffset); - else - return cifs_read(file,read_data,read_size,poffset); + } else { + /* BB do we need to lock inode from here until after invalidate? */ +/* if(file->f_dentry->d_inode->i_mapping) { + filemap_fdatawrite(file->f_dentry->d_inode->i_mapping); + filemap_fdatawait(file->f_dentry->d_inode->i_mapping); + }*/ +/* cifs_revalidate(file->f_dentry);*/ /* BB fixme */ + + /* BB we should make timer configurable - perhaps + by simply calling cifs_revalidate here */ + /* invalidate_remote_inode(file->f_dentry->d_inode);*/ + return generic_file_read(file,read_data,read_size,poffset); + } } -ssize_t +static ssize_t cifs_write_wrapper(struct file * file, const char *write_data, size_t write_size, loff_t * poffset) { - if(CIFS_I(file->f_dentry->d_inode)->clientCanCacheAll) /* check caching for write */ - return generic_file_write(file,write_data, write_size,poffset); - else - return cifs_write(file,write_data,write_size,poffset); + ssize_t written; + + if(file == NULL) + return -EIO; + else if(file->f_dentry == NULL) + return -EIO; + else if(file->f_dentry->d_inode == NULL) + return -EIO; + + cFYI(1,("In write_wrapper size %zd at %lld",write_size,*poffset)); + + /* check whether we can cache writes locally */ + written = generic_file_write(file,write_data,write_size,poffset); + if(!CIFS_I(file->f_dentry->d_inode)->clientCanCacheAll) { + if(file->f_dentry->d_inode->i_mapping) { + filemap_fdatawrite(file->f_dentry->d_inode->i_mapping); + } + } + return written; } @@ -469,8 +528,8 @@ struct inode_operations cifs_symlink_ino }; struct file_operations cifs_file_ops = { - .read = generic_file_read, - .write = generic_file_write, + .read = cifs_read_wrapper, + .write = cifs_write_wrapper, .open = cifs_open, .release = cifs_close, .lock = cifs_lock, @@ -498,7 +557,7 @@ cifs_init_once(void *inode, kmem_cache_t } } -int +static int cifs_init_inodecache(void) { cifs_inode_cachep = kmem_cache_create("cifs_inode_cache", @@ -511,14 +570,14 @@ cifs_init_inodecache(void) return 0; } -void +static void cifs_destroy_inodecache(void) { if (kmem_cache_destroy(cifs_inode_cachep)) printk(KERN_WARNING "cifs_inode_cache: error freeing\n"); } -int +static int cifs_init_request_bufs(void) { cifs_req_cachep = kmem_cache_create("cifs_request", @@ -531,7 +590,7 @@ cifs_init_request_bufs(void) return 0; } -void +static void cifs_destroy_request_bufs(void) { if (kmem_cache_destroy(cifs_req_cachep)) @@ -539,7 +598,7 @@ cifs_destroy_request_bufs(void) "cifs_destroy_request_cache: error not all structures were freed\n"); } -int +static int cifs_init_mids(void) { cifs_mid_cachep = kmem_cache_create("cifs_mpx_ids", @@ -558,7 +617,7 @@ cifs_init_mids(void) return 0; } -void +static void cifs_destroy_mids(void) { if (kmem_cache_destroy(cifs_mid_cachep)) @@ -584,7 +643,7 @@ static int cifs_oplock_thread(void * dum do { set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(39*HZ); + schedule_timeout(1*HZ); spin_lock(&GlobalMid_Lock); if(list_empty(&GlobalOplock_Q)) { spin_unlock(&GlobalMid_Lock); @@ -593,23 +652,41 @@ static int cifs_oplock_thread(void * dum oplock_item = list_entry(GlobalOplock_Q.next, struct oplock_q_entry, qhead); if(oplock_item) { + cFYI(1,("found oplock item to write out")); pTcon = oplock_item->tcon; inode = oplock_item->pinode; netfid = oplock_item->netfid; spin_unlock(&GlobalMid_Lock); DeleteOplockQEntry(oplock_item); - if (S_ISREG(inode->i_mode)) + /* can not grab inode sem here since it would + deadlock when oplock received on delete + since vfs_unlink holds the i_sem across + the call */ + /* down(&inode->i_sem);*/ + if (S_ISREG(inode->i_mode)) { rc = filemap_fdatawrite(inode->i_mapping); - else + if(CIFS_I(inode)->clientCanCacheRead == 0) + invalidate_remote_inode(inode); + } else rc = 0; + /* up(&inode->i_sem);*/ if (rc) CIFS_I(inode)->write_behind_rc = rc; cFYI(1,("Oplock flush inode %p rc %d",inode,rc)); - rc = CIFSSMBLock(0, pTcon, netfid, - 0 /* len */ , 0 /* offset */, 0, - 0, LOCKING_ANDX_OPLOCK_RELEASE, - 0 /* wait flag */); - cFYI(1,("Oplock release rc = %d ",rc)); + + /* releasing a stale oplock after recent reconnection + of smb session using a now incorrect file + handle is not a data integrity issue but do + not bother sending an oplock release if session + to server still is disconnected since oplock + already released by the server in that case */ + if(pTcon->tidStatus != CifsNeedReconnect) { + rc = CIFSSMBLock(0, pTcon, netfid, + 0 /* len */ , 0 /* offset */, 0, + 0, LOCKING_ANDX_OPLOCK_RELEASE, + 0 /* wait flag */); + cFYI(1,("Oplock release rc = %d ",rc)); + } } else spin_unlock(&GlobalMid_Lock); } @@ -633,6 +710,9 @@ init_cifs(void) */ atomic_set(&sesInfoAllocCount, 0); atomic_set(&tconInfoAllocCount, 0); + atomic_set(&tcpSesReconnectCount, 0); + atomic_set(&tconInfoReconnectCount, 0); + atomic_set(&bufAllocCount, 0); atomic_set(&midCount, 0); GlobalCurrentXid = 0; --- linux-2.6.6-rc1/fs/cifs/cifsfs.h 2003-07-13 21:44:35.000000000 -0700 +++ 25/fs/cifs/cifsfs.h 2004-04-18 22:25:28.603482088 -0700 @@ -60,8 +60,6 @@ extern int cifs_getattr(struct vfsmount extern int cifs_setattr(struct dentry *, struct iattr *); extern struct inode_operations cifs_file_inode_ops; -extern void cifs_truncate_file(struct inode *); - extern struct inode_operations cifs_symlink_inode_ops; /* Functions related to files and directories */ --- linux-2.6.6-rc1/fs/cifs/cifsglob.h 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/cifsglob.h 2004-04-18 22:25:28.604481936 -0700 @@ -16,6 +16,7 @@ * */ #include +#include #include "cifs_fs_sb.h" /* * The sizes of various internal tables and strings @@ -55,6 +56,10 @@ #define TRUE 1 #endif +#ifndef XATTR_DOS_ATTRIB +#define XATTR_DOS_ATTRIB "user.DOSATTRIB" +#endif + /* * This information is kept on every Server we know about. * @@ -83,6 +88,13 @@ enum securityEnum { Kerberos /* Kerberos via SPNEGO */ }; +enum protocolEnum { + IPV4 = 0, + IPV6, + SCTP + /* Netbios frames protocol not supported at this time */ +}; + /* ***************************************************************** * Except the CIFS PDUs themselves all the @@ -94,13 +106,16 @@ struct TCP_Server_Info { char server_Name[SERVER_NAME_LEN_WITH_NULL]; /* 15 chars + X'20'in 16th */ char unicode_server_Name[SERVER_NAME_LEN_WITH_NULL * 2]; /* Unicode version of server_Name */ struct socket *ssocket; - struct sockaddr_in sockAddr; + union { + struct sockaddr_in sockAddr; + struct sockaddr_in6 sockAddr6; + } addr; wait_queue_head_t response_q; struct list_head pending_mid_q; void *Server_NlsInfo; /* BB - placeholder for future NLS info */ unsigned short server_codepage; /* codepage for the server */ unsigned long ip_address; /* IP addr for the server if known */ - unsigned long svType; /* computer type */ + enum protocolEnum protocolType; char versionMajor; char versionMinor; int svlocal:1; /* local server or remote */ @@ -161,7 +176,7 @@ struct cifsSesInfo { char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for tcp names - will ipv6 and sctp addresses fit here?? */ char userName[MAX_USERNAME_SIZE + 1]; char domainName[MAX_USERNAME_SIZE + 1]; - char password_with_pad[CIFS_ENCPWD_SIZE]; + char * password; }; /* @@ -175,13 +190,14 @@ struct cifsTconInfo { struct cifsSesInfo *ses; /* pointer to session associated with */ char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource (in ASCII not UTF) */ char *nativeFileSystem; - __u16 tid; /* The 2 byte transaction id */ + __u16 tid; /* The 2 byte tree id */ __u16 Flags; /* optional support bits */ enum statusEnum tidStatus; - atomic_t useCount; /* how many mounts (explicit or implicit refer to this share */ + atomic_t useCount; /* how many mounts (explicit or implicit) to this share */ FILE_SYSTEM_DEVICE_INFO fsDevInfo; - FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* note file system name may be truncated - but very unlikely */ + FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if file system name truncated */ FILE_SYSTEM_UNIX_INFO fsUnixInfo; + int retry:1; /* BB add field for back pointer to sb struct? */ }; @@ -213,6 +229,7 @@ struct cifsFileInfo { int closePend:1; /* file is marked to close */ int emptyDir:1; int invalidHandle:1; /* file closed via session abend */ + struct semaphore fh_sem; /* prevents reopen race after dead ses*/ char * search_resume_name; unsigned int resume_name_length; __u32 resume_key; @@ -274,6 +291,7 @@ struct oplock_q_entry { #define MID_REQUEST_ALLOCATED 1 #define MID_REQUEST_SUBMITTED 2 #define MID_RESPONSE_RECEIVED 4 +#define MID_RETRY_NEEDED 8 /* session closed while this request out */ struct servers_not_supported { /* @z4a */ struct servers_not_supported *next1; /* @z4a */ @@ -313,7 +331,7 @@ struct servers_not_supported { /* @z4a * * ---------- * sesSem operations on smb session * tconSem operations on tree connection - * i_sem inode operations + * fh_sem file handle reconnection operations * ****************************************************************************/ @@ -358,6 +376,9 @@ GLOBAL_EXTERN char Local_System_Name[15] GLOBAL_EXTERN atomic_t sesInfoAllocCount; GLOBAL_EXTERN atomic_t tconInfoAllocCount; +GLOBAL_EXTERN atomic_t tcpSesReconnectCount; +GLOBAL_EXTERN atomic_t tconInfoReconnectCount; + /* Various Debug counters to remove someday (BB) */ GLOBAL_EXTERN atomic_t bufAllocCount; GLOBAL_EXTERN atomic_t midCount; @@ -374,4 +395,6 @@ GLOBAL_EXTERN unsigned int extended_secu with more secure ntlmssp2 challenge/resp */ GLOBAL_EXTERN unsigned int ntlmv2_support; /* better optional password hash */ GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */ +GLOBAL_EXTERN unsigned int linuxExtEnabled; /* enable Linux/Unix CIFS extensions */ + --- linux-2.6.6-rc1/fs/cifs/cifspdu.h 2003-08-22 19:23:42.000000000 -0700 +++ 25/fs/cifs/cifspdu.h 2004-04-18 22:25:28.605481784 -0700 @@ -727,8 +727,10 @@ typedef struct smb_com_read_rsp { typedef struct locking_andx_range { __u16 Pid; __u16 Pad; - __u64 Offset; - __u64 Length; + __u32 OffsetHigh; + __u32 OffsetLow; + __u32 LengthHigh; + __u32 LengthLow; } LOCKING_ANDX_RANGE; #define LOCKING_ANDX_SHARED_LOCK 0x01 @@ -1101,10 +1103,10 @@ typedef struct smb_com_transaction2_spi_ } TRANSACTION2_SPI_RSP; struct set_file_rename { - __u32 overwrite; /* 1 = overwrite dest */ - __u32 root_fid; /* zero */ + __u32 overwrite; /* 1 = overwrite dest */ + __u32 root_fid; /* zero */ __u32 target_name_len; - char target_name[0]; /* Must be unicode */ + char target_name[0]; /* Must be unicode */ }; struct smb_com_transaction2_sfi_req { --- linux-2.6.6-rc1/fs/cifs/cifsproto.h 2003-08-22 19:23:42.000000000 -0700 +++ 25/fs/cifs/cifsproto.h 2004-04-18 22:25:28.606481632 -0700 @@ -30,8 +30,8 @@ struct statfs; ***************************************************************** */ -extern struct smb_hdr *buf_get(void); -extern void buf_release(void *); +extern struct smb_hdr *cifs_buf_get(void); +extern void cifs_buf_release(void *); extern int smb_send(struct socket *, struct smb_hdr *, unsigned int /* length */ , struct sockaddr *); extern unsigned int _GetXid(void); @@ -41,7 +41,6 @@ extern void _FreeXid(unsigned int); extern char *build_path_from_dentry(struct dentry *); extern char *build_wildcard_path_from_dentry(struct dentry *direntry); extern void renew_parental_timestamps(struct dentry *direntry); -extern void *kcalloc(size_t mem, int type); extern int SendReceive(const unsigned int /* xid */ , struct cifsSesInfo *, struct smb_hdr * /* input */ , struct smb_hdr * /* out */ , @@ -61,12 +60,6 @@ struct oplock_q_entry * AllocOplockQEntr void DeleteOplockQEntry(struct oplock_q_entry *); extern struct timespec cifs_NTtimeToUnix(u64 /* utc nanoseconds since 1601 */ ); extern u64 cifs_UnixTimeToNT(struct timespec); -extern void RevUcode_to_Ucode(char *revUnicode, char *UnicodeName); -extern void Ucode_to_RevUcode(char *Unicode, char *revUnicodeName); -extern void RevUcode_to_Ucode_with_Len(char *revUnicode, char *UnicodeName, - int Len); -extern void Ucode_to_RevUcode_with_Len(char *Unicode, char *revUnicodeName, - int Len); extern int cifs_get_inode_info(struct inode **pinode, const unsigned char *search_path, FILE_ALL_INFO * pfile_info, @@ -75,21 +68,9 @@ extern int cifs_get_inode_info_unix(stru const unsigned char *search_path, struct super_block *sb); -extern int reopen_files(struct cifsTconInfo *, struct nls_table *); -extern int setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, +extern int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, struct nls_table * nls_info); extern int CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses); -extern int CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, - char *ntlm_session_key, const struct nls_table *); -extern int CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, - char *SecurityBlob,int SecurityBlobLength, - const struct nls_table *); -extern int CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, - struct cifsSesInfo *ses, int *ntlmv2_flag, - const struct nls_table *); -extern int CIFSNTLMSSPAuthSessSetup(unsigned int xid, - struct cifsSesInfo *ses, char *ntlm_session_key, - int ntlmv2_flag, const struct nls_table *); extern int CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, const char *tree, struct cifsTconInfo *tcon, @@ -224,7 +205,6 @@ extern void sesInfoFree(struct cifsSesIn extern struct cifsTconInfo *tconInfoAlloc(void); extern void tconInfoFree(struct cifsTconInfo *); -extern int cifs_demultiplex_thread(struct TCP_Server_Info *); extern int cifs_reconnect(struct TCP_Server_Info *server); extern int cifs_sign_smb(struct smb_hdr *, struct cifsSesInfo *,__u32 *); --- linux-2.6.6-rc1/fs/cifs/cifssmb.c 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/cifssmb.c 2004-04-18 22:25:28.612480720 -0700 @@ -21,7 +21,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - /* SMB/CIFS PDU handling routines here - except for leftovers in connect.c */ + /* SMB/CIFS PDU handling routines here - except for leftovers in connect.c */ + /* These are mostly routines that operate on a pathname, or on a tree id */ + /* (mounted volume), but there are eight handle based routines which must be */ + /* treated slightly different for reconnection purposes since we never want */ + /* to reuse a stale file handle and the caller knows the file handle */ #include #include @@ -37,38 +41,108 @@ static struct { int index; char *name; } protocols[] = { - { - CIFS_PROT, "\2NT LM 0.12"}, { - BAD_PROT, "\2"} + {CIFS_PROT, "\2NT LM 0.12"}, + {BAD_PROT, "\2"} }; -int + +/* Mark as invalid, all open files on tree connections since they + were closed when session to server was lost */ +static void mark_open_files_invalid(struct cifsTconInfo * pTcon) +{ + struct cifsFileInfo *open_file = NULL; + struct list_head * tmp; + struct list_head * tmp1; + +/* list all files open on tree connection and mark them invalid */ + write_lock(&GlobalSMBSeslock); + list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { + open_file = list_entry(tmp,struct cifsFileInfo, tlist); + if(open_file) { + open_file->invalidHandle = TRUE; + } + } + write_unlock(&GlobalSMBSeslock); + /* BB Add call to invalidate_inodes(sb) for all superblocks mounted to this tcon */ +} + +static int smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, void **request_buf /* returned */ , void **response_buf /* returned */ ) { int rc = 0; - if(tcon && (tcon->tidStatus == CifsNeedReconnect)) { - rc = -EIO; - if(tcon->ses) { - struct nls_table *nls_codepage = load_nls_default(); + /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so + check for tcp and smb session status done differently + for those three - in the calling routine */ + if(tcon) { + if((tcon->ses) && (tcon->ses->server)){ + struct nls_table *nls_codepage; + /* Give Demultiplex thread up to 10 seconds to + reconnect, should be greater than cifs socket + timeout which is 7 seconds */ + while(tcon->ses->server->tcpStatus == CifsNeedReconnect) { + wait_event_interruptible_timeout(tcon->ses->server->response_q, + (tcon->ses->server->tcpStatus == CifsGood), 10 * HZ); + if(tcon->ses->server->tcpStatus == CifsNeedReconnect) { + /* on "soft" mounts we wait once */ + if((tcon->retry == FALSE) || + (tcon->ses->status == CifsExiting)) { + cFYI(1,("gave up waiting on reconnect in smb_init")); + return -EHOSTDOWN; + } /* else "hard" mount - keep retrying until + process is killed or server comes back up */ + } else /* TCP session is reestablished now */ + break; + + } + + nls_codepage = load_nls_default(); + /* need to prevent multiple threads trying to + simultaneously reconnect the same SMB session */ + down(&tcon->ses->sesSem); if(tcon->ses->status == CifsNeedReconnect) - rc = setup_session(0, tcon->ses, nls_codepage); - if(!rc) { + rc = cifs_setup_session(0, tcon->ses, nls_codepage); + if(!rc && (tcon->tidStatus == CifsNeedReconnect)) { + mark_open_files_invalid(tcon); rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon, nls_codepage); + up(&tcon->ses->sesSem); + if(rc == 0) + atomic_inc(&tconInfoReconnectCount); + cFYI(1, ("reconnect tcon rc = %d", rc)); - if(!rc) - reopen_files(tcon,nls_codepage); + /* Removed call to reopen open files here - + it is safer (and faster) to reopen files + one at a time as needed in read and write */ + + /* Check if handle based operation so we + know whether we can continue or not without + returning to caller to reset file handle */ + switch(smb_command) { + case SMB_COM_READ_ANDX: + case SMB_COM_WRITE_ANDX: + case SMB_COM_CLOSE: + case SMB_COM_FIND_CLOSE2: + case SMB_COM_LOCKING_ANDX: { + unload_nls(nls_codepage); + return -EAGAIN; + } + } + } else { + up(&tcon->ses->sesSem); } unload_nls(nls_codepage); + + } else { + return -EIO; } } if(rc) return rc; - *request_buf = buf_get(); + *request_buf = cifs_buf_get(); if (request_buf == 0) { return -ENOMEM; } @@ -98,7 +172,6 @@ CIFSSMBNegotiate(unsigned int xid, struc rc = -EIO; return rc; } - rc = smb_init(SMB_COM_NEGOTIATE, 0, 0 /* no tcon yet */ , (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -120,11 +193,11 @@ CIFSSMBNegotiate(unsigned int xid, struc if (rc == 0) { server->secMode = pSMBr->SecurityMode; server->secType = NTLM; /* BB override default for NTLMv2 or krb*/ - /* one byte - no need to convert this or EncryptionKeyLen from le,*/ + /* one byte - no need to convert this or EncryptionKeyLen from le,*/ server->maxReq = le16_to_cpu(pSMBr->MaxMpxCount); /* probably no need to store and check maxvcs */ server->maxBuf = - min(le32_to_cpu(pSMBr->MaxBufferSize), + min(le32_to_cpu(pSMBr->MaxBufferSize), (__u32) CIFS_MAX_MSGSIZE + MAX_CIFS_HDR_SIZE); server->maxRw = le32_to_cpu(pSMBr->MaxRawSize); cFYI(0, ("Max buf = %d ", ses->server->maxBuf)); @@ -172,7 +245,6 @@ CIFSSMBNegotiate(unsigned int xid, struc pSMBr->ByteCount - 16, &server->secType); } - } else server->capabilities &= ~CAP_EXTENDED_SECURITY; if(sign_CIFS_PDUs == FALSE) { @@ -187,7 +259,7 @@ CIFSSMBNegotiate(unsigned int xid, struc } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); return rc; } @@ -218,12 +290,19 @@ CIFSSMBTDis(const int xid, struct cifsTc return -EBUSY; } + /* No need to return error on this operation if tid invalidated and + closed on server already e.g. due to tcp session crashing */ + if(tcon->tidStatus == CifsNeedReconnect) { + up(&tcon->tconSem); + return 0; + } + /* BB remove (from server) list of shares - but with smp safety BB */ /* BB is ses active - do we need to check here - but how? BB */ - if((tcon->ses == 0) || (tcon->ses->server == 0)) { - up(&tcon->tconSem); - return -EIO; - } + if((tcon->ses == 0) || (tcon->ses->server == 0)) { + up(&tcon->tconSem); + return -EIO; + } rc = smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, (void **) &smb_buffer, (void **) &smb_buffer_response); @@ -237,8 +316,14 @@ CIFSSMBTDis(const int xid, struct cifsTc cFYI(1, (" Tree disconnect failed %d", rc)); if (smb_buffer) - buf_release(smb_buffer); + cifs_buf_release(smb_buffer); up(&tcon->tconSem); + + /* No need to return error on this operation if tid invalidated and + closed on server already e.g. due to tcp session crashing */ + if (rc == -EAGAIN) + rc = 0; + return rc; } @@ -251,9 +336,8 @@ CIFSSMBLogoff(const int xid, struct cifs int length; cFYI(1, ("In SMBLogoff for session disconnect")); - if (ses) - down(&ses->sesSem); /* check this sem more places */ + down(&ses->sesSem); else return -EIO; @@ -266,8 +350,8 @@ CIFSSMBLogoff(const int xid, struct cifs rc = smb_init(SMB_COM_LOGOFF_ANDX, 2, 0 /* no tcon anymore */, (void **) &pSMB, (void **) &smb_buffer_response); - if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) - pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; + if(ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; if (rc) { up(&ses->sesSem); @@ -285,8 +369,14 @@ CIFSSMBLogoff(const int xid, struct cifs ses->server->tcpStatus = CifsExiting; } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); up(&ses->sesSem); + + /* if session dead then we do not need to do ulogoff, + since server closed smb session, no sense reporting + error */ + if (rc == -EAGAIN) + rc = 0; return rc; } @@ -300,6 +390,7 @@ CIFSSMBDelFile(const int xid, struct cif int bytes_returned; int name_len; +DelFileRetry: rc = smb_init(SMB_COM_DELETE, 1, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -329,7 +420,10 @@ CIFSSMBDelFile(const int xid, struct cif cFYI(1, ("Error in RMFile = %d", rc)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto DelFileRetry; + return rc; } @@ -344,7 +438,7 @@ CIFSSMBRmDir(const int xid, struct cifsT int name_len; cFYI(1, ("In CIFSSMBRmDir")); - +RmDirRetry: rc = smb_init(SMB_COM_DELETE_DIRECTORY, 0, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -372,7 +466,9 @@ CIFSSMBRmDir(const int xid, struct cifsT cFYI(1, ("Error in RMDir = %d", rc)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto RmDirRetry; return rc; } @@ -387,7 +483,7 @@ CIFSSMBMkDir(const int xid, struct cifsT int name_len; cFYI(1, ("In CIFSSMBMkDir")); - +MkDirRetry: rc = smb_init(SMB_COM_CREATE_DIRECTORY, 0, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -415,8 +511,9 @@ CIFSSMBMkDir(const int xid, struct cifsT cFYI(1, ("Error in Mkdir = %d", rc)); } if (pSMB) - buf_release(pSMB); - + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto MkDirRetry; return rc; } @@ -433,6 +530,7 @@ CIFSSMBOpen(const int xid, struct cifsTc int bytes_returned; int name_len; +openRetry: rc = smb_init(SMB_COM_NT_CREATE_ANDX, 24, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -464,8 +562,14 @@ CIFSSMBOpen(const int xid, struct cifsTc } pSMB->DesiredAccess = cpu_to_le32(access_flags); pSMB->AllocationSize = 0; - pSMB->FileAttributes = ATTR_NORMAL; /* XP does not handle ATTR_POSIX_SEMANTICS */ - /*if ((omode & S_IWUGO) == 0) + pSMB->FileAttributes = ATTR_NORMAL; + /* XP does not handle ATTR_POSIX_SEMANTICS */ + /* but it helps speed up case sensitive checks for other + servers such as Samba */ + if (tcon->ses->capabilities & CAP_UNIX) + pSMB->FileAttributes |= ATTR_POSIX_SEMANTICS; + + /* if ((omode & S_IWUGO) == 0) pSMB->FileAttributes |= ATTR_READONLY;*/ /* Above line causes problems due to vfs splitting create into two pieces - need to set mode after file created not while it is @@ -501,8 +605,9 @@ CIFSSMBOpen(const int xid, struct cifsTc } } if (pSMB) - buf_release(pSMB); - + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto openRetry; return rc; } @@ -527,9 +632,9 @@ CIFSSMBRead(const int xid, struct cifsTc if (rc) return rc; - /* tcon and ses pointer are checked in smb_init */ - if (tcon->ses->server == NULL) - return -ECONNABORTED; + /* tcon and ses pointer are checked in smb_init */ + if (tcon->ses->server == NULL) + return -ECONNABORTED; pSMB->AndXCommand = 0xFF; /* none */ pSMB->Fid = netfid; @@ -567,10 +672,13 @@ CIFSSMBRead(const int xid, struct cifsTc } if (pSMB) { if(*buf) - buf_release(pSMB); + cifs_buf_release(pSMB); else *buf = (char *)pSMB; } + + /* Note: On -EAGAIN error only caller can retry on handle based calls + since file handle passed in no longer valid */ return rc; } @@ -623,7 +731,10 @@ CIFSSMBWrite(const int xid, struct cifsT *nbytes = le16_to_cpu(pSMBr->Count); if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + /* Note: On -EAGAIN error only caller can retry on handle based calls + since file handle passed in no longer valid */ return rc; } @@ -639,9 +750,9 @@ CIFSSMBLock(const int xid, struct cifsTc LOCK_RSP *pSMBr = NULL; int bytes_returned; int timeout = 0; + __u64 temp; - cFYI(1, ("In CIFSSMBLock")); - + cFYI(1, ("In CIFSSMBLock - timeout %d numLock %d",waitFlag,numLock)); rc = smb_init(SMB_COM_LOCKING_ANDX, 8, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -649,6 +760,12 @@ CIFSSMBLock(const int xid, struct cifsTc if(lockType == LOCKING_ANDX_OPLOCK_RELEASE) { timeout = -1; /* no response expected */ + pSMB->Timeout = 0; + } else if (waitFlag == TRUE) { + timeout = 3; /* blocking operation, no timeout */ + pSMB->Timeout = -1; /* blocking - do not time out */ + } else { + pSMB->Timeout = 0; } pSMB->NumberOfLocks = cpu_to_le32(numLock); @@ -658,8 +775,12 @@ CIFSSMBLock(const int xid, struct cifsTc pSMB->Fid = smb_file_id; /* netfid stays le */ pSMB->Locks[0].Pid = cpu_to_le16(current->tgid); - pSMB->Locks[0].Length = cpu_to_le64(len); - pSMB->Locks[0].Offset = cpu_to_le64(offset); + temp = cpu_to_le64(len); + pSMB->Locks[0].LengthLow = (__u32)(len & 0xFFFFFFFF); + pSMB->Locks[0].LengthHigh = (__u32)(len>>32); + temp = cpu_to_le64(offset); + pSMB->Locks[0].OffsetLow = (__u32)(offset & 0xFFFFFFFF); + pSMB->Locks[0].OffsetHigh = (__u32)(offset>>32); pSMB->ByteCount = sizeof (LOCKING_ANDX_RANGE); pSMB->hdr.smb_buf_length += pSMB->ByteCount; pSMB->ByteCount = cpu_to_le16(pSMB->ByteCount); @@ -671,8 +792,10 @@ CIFSSMBLock(const int xid, struct cifsTc cERROR(1, ("Send error in Lock = %d", rc)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + /* Note: On -EAGAIN error only caller can retry on handle based calls + since file handle passed in no longer valid */ return rc; } @@ -685,8 +808,11 @@ CIFSSMBClose(const int xid, struct cifsT int bytes_returned; cFYI(1, ("In CIFSSMBClose")); +/* do not retry on dead session on close */ rc = smb_init(SMB_COM_CLOSE, 3, tcon, (void **) &pSMB, (void **) &pSMBr); + if(rc == -EAGAIN) + return 0; if (rc) return rc; @@ -699,7 +825,11 @@ CIFSSMBClose(const int xid, struct cifsT cERROR(1, ("Send error in Close = %d", rc)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + /* Since session is dead, file will be closed on server already */ + if(rc == -EAGAIN) + rc = 0; return rc; } @@ -716,7 +846,7 @@ CIFSSMBRename(const int xid, struct cifs int name_len, name_len2; cFYI(1, ("In CIFSSMBRename")); - +renameRetry: rc = smb_init(SMB_COM_RENAME, 1, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -766,7 +896,10 @@ CIFSSMBRename(const int xid, struct cifs cFYI(1, ("Send error in rename = %d", rc)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto renameRetry; return rc; } @@ -774,44 +907,43 @@ CIFSSMBRename(const int xid, struct cifs int CIFSSMBRenameOpenFile(const int xid,struct cifsTconInfo *pTcon, int netfid, char * target_name, const struct nls_table * nls_codepage) { - struct smb_com_transaction2_sfi_req *pSMB = NULL; - struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; + struct smb_com_transaction2_sfi_req *pSMB = NULL; + struct smb_com_transaction2_sfi_rsp *pSMBr = NULL; struct set_file_rename * rename_info; - char *data_offset; + char *data_offset; char dummy_string[30]; - int rc = 0; - int bytes_returned = 0; + int rc = 0; + int bytes_returned = 0; int len_of_str; - cFYI(1, ("Rename to File by handle")); + cFYI(1, ("Rename to File by handle")); + rc = smb_init(SMB_COM_TRANSACTION2, 15, pTcon, (void **) &pSMB, + (void **) &pSMBr); + if (rc) + return rc; - rc = smb_init(SMB_COM_TRANSACTION2, 15, pTcon, (void **) &pSMB, - (void **) &pSMBr); - if (rc) - return rc; - - pSMB->ParameterCount = 6; - pSMB->MaxSetupCount = 0; - pSMB->Reserved = 0; - pSMB->Flags = 0; - pSMB->Timeout = 0; - pSMB->Reserved2 = 0; - pSMB->ParameterOffset = offsetof(struct smb_com_transaction2_sfi_req, - Fid) - 4; - pSMB->DataOffset = pSMB->ParameterOffset + pSMB->ParameterCount; + pSMB->ParameterCount = 6; + pSMB->MaxSetupCount = 0; + pSMB->Reserved = 0; + pSMB->Flags = 0; + pSMB->Timeout = 0; + pSMB->Reserved2 = 0; + pSMB->ParameterOffset = offsetof(struct smb_com_transaction2_sfi_req, + Fid) - 4; + pSMB->DataOffset = pSMB->ParameterOffset + pSMB->ParameterCount; - data_offset = (char *) (&pSMB->hdr.Protocol) + pSMB->DataOffset; + data_offset = (char *) (&pSMB->hdr.Protocol) + pSMB->DataOffset; rename_info = (struct set_file_rename *) data_offset; - pSMB->MaxParameterCount = cpu_to_le16(2); - pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */ - pSMB->SetupCount = 1; - pSMB->Reserved3 = 0; - pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); - pSMB->ByteCount = 3 /* pad */ + pSMB->ParameterCount; - pSMB->ParameterCount = cpu_to_le16(pSMB->ParameterCount); - pSMB->TotalParameterCount = pSMB->ParameterCount; - pSMB->ParameterOffset = cpu_to_le16(pSMB->ParameterOffset); - pSMB->DataOffset = cpu_to_le16(pSMB->DataOffset); + pSMB->MaxParameterCount = cpu_to_le16(2); + pSMB->MaxDataCount = cpu_to_le16(1000); /* BB find max SMB PDU from sess */ + pSMB->SetupCount = 1; + pSMB->Reserved3 = 0; + pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); + pSMB->ByteCount = 3 /* pad */ + pSMB->ParameterCount; + pSMB->ParameterCount = cpu_to_le16(pSMB->ParameterCount); + pSMB->TotalParameterCount = pSMB->ParameterCount; + pSMB->ParameterOffset = cpu_to_le16(pSMB->ParameterOffset); + pSMB->DataOffset = cpu_to_le16(pSMB->DataOffset); /* construct random name ".cifs_tmp" */ rename_info->overwrite = cpu_to_le32(1); rename_info->root_fid = 0; @@ -836,11 +968,15 @@ int CIFSSMBRenameOpenFile(const int xid, rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, (struct smb_hdr *) pSMBr, &bytes_returned, 0); if (rc) { - cFYI(1,("Send error in Rename (by file handle) = %d", rc)); + cFYI(1,("Send error in Rename (by file handle) = %d", rc)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + /* Note: On -EAGAIN error only caller can retry on handle based calls + since file handle passed in no longer valid */ + return rc; } @@ -859,7 +995,7 @@ CIFSUnixCreateSymLink(const int xid, str int bytes_returned = 0; cFYI(1, ("In Symlink Unix style")); - +createSymLinkRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -929,7 +1065,11 @@ CIFSUnixCreateSymLink(const int xid, str } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto createSymLinkRetry; + return rc; } @@ -947,7 +1087,7 @@ CIFSUnixCreateHardLink(const int xid, st int bytes_returned = 0; cFYI(1, ("In Create Hard link Unix style")); - +createHardLinkRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -1014,7 +1154,10 @@ CIFSUnixCreateHardLink(const int xid, st } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto createHardLinkRetry; + return rc; } @@ -1030,6 +1173,7 @@ CIFSCreateHardLink(const int xid, struct int name_len, name_len2; cFYI(1, ("In CIFSCreateHardLink")); +winCreateHardLinkRetry: rc = smb_init(SMB_COM_NT_RENAME, 4, tcon, (void **) &pSMB, (void **) &pSMBr); @@ -1081,7 +1225,9 @@ CIFSCreateHardLink(const int xid, struct cFYI(1, ("Send error in hard link (NT rename) = %d", rc)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto winCreateHardLinkRetry; return rc; } @@ -1100,6 +1246,8 @@ CIFSSMBUnixQuerySymLink(const int xid, s int name_len; cFYI(1, ("In QPathSymLinkInfo (Unix) for path %s", searchName)); + +querySymLinkRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -1174,7 +1322,9 @@ CIFSSMBUnixQuerySymLink(const int xid, s } } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto querySymLinkRetry; return rc; } @@ -1256,7 +1406,11 @@ CIFSSMBQueryReparseLinkInfo(const int xi } } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + /* Note: On -EAGAIN error only caller can retry on handle based calls + since file handle passed in no longer valid */ + return rc; } @@ -1274,6 +1428,7 @@ CIFSSMBQPathInfo(const int xid, struct c int name_len; cFYI(1, ("In QPathInfo path %s", searchName)); +QPathInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -1334,7 +1489,10 @@ CIFSSMBQPathInfo(const int xid, struct c rc = -ENOMEM; } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto QPathInfoRetry; + return rc; } @@ -1352,6 +1510,7 @@ CIFSSMBUnixQPathInfo(const int xid, stru int name_len; cFYI(1, ("In QPathInfo (Unix) the path %s", searchName)); +UnixQPathInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -1413,7 +1572,10 @@ CIFSSMBUnixQPathInfo(const int xid, stru } } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto UnixQPathInfoRetry; + return rc; } @@ -1430,6 +1592,7 @@ CIFSFindSingle(const int xid, struct cif int name_len; cFYI(1, ("In FindUnique")); +findUniqueRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -1487,7 +1650,10 @@ CIFSFindSingle(const int xid, struct cif /* BB fill in */ } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto findUniqueRetry; + return rc; } @@ -1507,6 +1673,7 @@ CIFSFindFirst(const int xid, struct cifs int name_len; cFYI(1, ("In FindFirst")); +findFirstRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -1590,7 +1757,11 @@ CIFSFindFirst(const int xid, struct cifs memcpy(findData, response_data, le16_to_cpu(pSMBr->DataCount)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto findFirstRetry; + return rc; } @@ -1608,6 +1779,7 @@ CIFSFindNext(const int xid, struct cifsT int bytes_returned; cFYI(1, ("In FindNext")); + if(resume_file_name == NULL) { return -EIO; } @@ -1690,7 +1862,11 @@ CIFSFindNext(const int xid, struct cifsT memcpy(findData, response_data, le16_to_cpu(pSMBr->DataCount)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + /* Note: On -EAGAIN error only caller can retry on handle based calls + since file handle passed in no longer valid */ + return rc; } @@ -1701,10 +1877,14 @@ CIFSFindClose(const int xid, struct cifs FINDCLOSE_REQ *pSMB = NULL; CLOSE_RSP *pSMBr = NULL; int bytes_returned; - cFYI(1, ("In CIFSSMBFindClose")); + cFYI(1, ("In CIFSSMBFindClose")); rc = smb_init(SMB_COM_FIND_CLOSE2, 1, tcon, (void **) &pSMB, (void **) &pSMBr); + /* no sense returning error if session restarted + file handle has been closed */ + if(rc == -EAGAIN) + return 0; if (rc) return rc; @@ -1716,7 +1896,11 @@ CIFSFindClose(const int xid, struct cifs cERROR(1, ("Send error in FindClose = %d", rc)); } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + /* Since session is dead, search handle closed on server already */ + if (rc == -EAGAIN) + rc = 0; return rc; } @@ -1743,7 +1927,7 @@ CIFSGetDFSRefer(const int xid, struct ci cFYI(1, ("In GetDFSRefer the path %s", searchName)); if (ses == NULL) return -ENODEV; - +getDFSRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, 0, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -1874,7 +2058,11 @@ CIFSGetDFSRefer(const int xid, struct ci } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto getDFSRetry; + return rc; } @@ -1890,7 +2078,7 @@ CIFSSMBQFSInfo(const int xid, struct cif int bytes_returned = 0; cFYI(1, ("In QFSInfo")); - +QFSInfoRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -1951,7 +2139,11 @@ CIFSSMBQFSInfo(const int xid, struct cif } } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto QFSInfoRetry; + return rc; } @@ -1967,6 +2159,7 @@ CIFSSMBQFSAttributeInfo(int xid, struct int bytes_returned = 0; cFYI(1, ("In QFSAttributeInfo")); +QFSAttributeRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -2013,7 +2206,11 @@ CIFSSMBQFSAttributeInfo(int xid, struct } } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto QFSAttributeRetry; + return rc; } @@ -2029,7 +2226,7 @@ CIFSSMBQFSDeviceInfo(int xid, struct cif int bytes_returned = 0; cFYI(1, ("In QFSDeviceInfo")); - +QFSDeviceRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -2078,7 +2275,12 @@ CIFSSMBQFSDeviceInfo(int xid, struct cif } } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto QFSDeviceRetry; + + return rc; } @@ -2094,6 +2296,7 @@ CIFSSMBQFSUnixInfo(int xid, struct cifsT int bytes_returned = 0; cFYI(1, ("In QFSUnixInfo")); +QFSUnixRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -2140,7 +2343,12 @@ CIFSSMBQFSUnixInfo(int xid, struct cifsT } } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto QFSUnixRetry; + + return rc; } @@ -2162,7 +2370,7 @@ CIFSSMBSetEOF(int xid, struct cifsTconIn int bytes_returned = 0; cFYI(1, ("In SetEOF")); - +SetEOFRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -2232,7 +2440,11 @@ CIFSSMBSetEOF(int xid, struct cifsTconIn } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto SetEOFRetry; + return rc; } @@ -2248,8 +2460,7 @@ CIFSSMBSetFileSize(const int xid, struct int bytes_returned = 0; __u32 tmp; - cFYI(1, ("SetFileSize (via SetFileInfo)")); - + cFYI(1, ("SetFileSize (via SetFileInfo) %lld",size)); rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -2318,7 +2529,11 @@ CIFSSMBSetFileSize(const int xid, struct } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + /* Note: On -EAGAIN error only caller can retry on handle based calls + since file handle passed in no longer valid */ + return rc; } @@ -2335,6 +2550,7 @@ CIFSSMBSetTimes(int xid, struct cifsTcon cFYI(1, ("In SetTimes")); +SetTimesRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -2392,7 +2608,11 @@ CIFSSMBSetTimes(int xid, struct cifsTcon } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + + if (rc == -EAGAIN) + goto SetTimesRetry; + return rc; } @@ -2409,7 +2629,7 @@ CIFSSMBUnixSetPerms(const int xid, struc FILE_UNIX_BASIC_INFO *data_offset; cFYI(1, ("In SetUID/GID/Mode")); - +setPermsRetry: rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, (void **) &pSMBr); if (rc) @@ -2470,6 +2690,8 @@ CIFSSMBUnixSetPerms(const int xid, struc } if (pSMB) - buf_release(pSMB); + cifs_buf_release(pSMB); + if (rc == -EAGAIN) + goto setPermsRetry; return rc; } --- linux-2.6.6-rc1/fs/cifs/cifs_unicode.c 2003-07-10 18:50:31.000000000 -0700 +++ 25/fs/cifs/cifs_unicode.c 2004-04-18 22:25:28.599482696 -0700 @@ -25,25 +25,6 @@ #include "cifs_debug.h" /* - * NAME: toUpper() - * - * FUNCTION: Upper case ASCII string (in place) using the current codepage - * - */ - -void -toUpper(const struct nls_table *n, char *mixed_string) -{ - unsigned int i; - char temp; - - for (i = 0; i < strlen(mixed_string); i++) { - temp = mixed_string[i]; - mixed_string[i] = n->charset2upper[(int) temp]; - } -} - -/* * NAME: cifs_strfromUCS() * * FUNCTION: Convert little-endian unicode string to character string @@ -104,28 +85,3 @@ cifs_strtoUCS(wchar_t * to, const char * return i; } -/* - * NAME: get_UCSname2() - * - * FUNCTION: Allocate and translate to unicode string - * - */ -/*int -get_UCSname2(struct component_name *uniName, struct dentry *dentry, - struct nls_table *nls_tab) -{ - int length = dentry->d_name.len; - - if (length > 255) - return ENAMETOOLONG; - - uniName->name = kmalloc((length + 1) * sizeof (wchar_t), GFP_KERNEL); - - if (uniName->name == NULL) - return ENOSPC; - - uniName->namlen = cifs_strtoUCS(uniName->name, dentry->d_name.name, - length, nls_tab); - - return 0; -} */ --- linux-2.6.6-rc1/fs/cifs/cifs_unicode.h 2003-06-14 12:18:35.000000000 -0700 +++ 25/fs/cifs/cifs_unicode.h 2004-04-18 22:25:28.600482544 -0700 @@ -58,26 +58,11 @@ extern signed char UniLowerTable[512]; extern struct UniCaseRange UniLowerRange[]; #endif /* UNIUPR_NOLOWER */ -/* - * directory entry argument - */ -struct component_name { - int namlen; - wchar_t *name; -}; - #ifdef __KERNEL__ int cifs_strfromUCS_le(char *, const wchar_t *, int, const struct nls_table *); int cifs_strtoUCS(wchar_t *, const char *, int, const struct nls_table *); - -int cifs_UCSname(struct component_name *, struct dentry *, - const struct nls_table *); - -void toUpper(const struct nls_table *, char *); #endif -#define free_UCSname(COMP) kfree((COMP)->name) - /* * UniStrcat: Concatenate the second string to the first * --- linux-2.6.6-rc1/fs/cifs/connect.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/cifs/connect.c 2004-04-18 22:25:28.621479352 -0700 @@ -44,7 +44,7 @@ extern void SMBencrypt(unsigned char *pa unsigned char *p24); extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24); -extern int inet_addr(char *); +extern int cifs_inet_pton(int, const char *, void *dst); struct smb_vol { char *username; @@ -58,7 +58,8 @@ struct smb_vol { gid_t linux_gid; mode_t file_mode; mode_t dir_mode; - int rw; + int rw:1; + int retry:1; unsigned int rsize; unsigned int wsize; unsigned int sockopt; @@ -66,13 +67,14 @@ struct smb_vol { }; int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket); +int ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket); /* * cifs tcp session reconnection * * mark tcp session as reconnecting so temporarily locked - * mark all smb sessions as reconnecting for tcp session (TBD BB) + * mark all smb sessions as reconnecting for tcp session * reconnect tcp session * wake up waiters on reconnection? - (not needed currently) */ @@ -84,6 +86,7 @@ cifs_reconnect(struct TCP_Server_Info *s struct list_head *tmp; struct cifsSesInfo *ses; struct cifsTconInfo *tcon; + struct mid_q_entry * mid_entry; if(server->tcpStatus == CifsExiting) return rc; @@ -123,13 +126,35 @@ cifs_reconnect(struct TCP_Server_Info *s server->ssocket = NULL; } + spin_lock(&GlobalMid_Lock); + list_for_each(tmp, &server->pending_mid_q) { + mid_entry = list_entry(tmp, struct + mid_q_entry, + qhead); + if(mid_entry) { + if(mid_entry->midState == MID_REQUEST_SUBMITTED) { + /* Mark other intransit requests as needing retry so + we do not immediately mark the session bad again + (ie after we reconnect below) as they timeout too */ + mid_entry->midState = MID_RETRY_NEEDED; + } + } + } + spin_unlock(&GlobalMid_Lock); + + while ((server->tcpStatus != CifsExiting) && (server->tcpStatus != CifsGood)) { - rc = ipv4_connect(&server->sockAddr, &server->ssocket); + if(server->protocolType == IPV6) { + rc = ipv6_connect(&server->addr.sockAddr6,&server->ssocket); + } else { + rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket); + } if(rc) { set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(3 * HZ); } else { + atomic_inc(&tcpSesReconnectCount); server->tcpStatus = CifsGood; wake_up(&server->response_q); } @@ -138,7 +163,7 @@ cifs_reconnect(struct TCP_Server_Info *s return rc; } -int +static int cifs_demultiplex_thread(struct TCP_Server_Info *server) { int length; @@ -165,7 +190,7 @@ cifs_demultiplex_thread(struct TCP_Serve while (server->tcpStatus != CifsExiting) { if (smb_buffer == NULL) - smb_buffer = buf_get(); + smb_buffer = cifs_buf_get(); else memset(smb_buffer, 0, sizeof (struct smb_hdr)); @@ -193,6 +218,7 @@ cifs_demultiplex_thread(struct TCP_Serve } else if (server->tcpStatus == CifsNeedReconnect) { cFYI(1,("Reconnecting after server stopped responding")); cifs_reconnect(server); + cFYI(1,("call to reconnect done")); csocket = server->ssocket; continue; } else if ((length == -ERESTARTSYS) || (length == -EAGAIN)) { @@ -201,6 +227,15 @@ cifs_demultiplex_thread(struct TCP_Serve tcpStatus CifsNeedReconnect if server hung */ continue; } else if (length <= 0) { + if(server->tcpStatus == CifsNew) { + cFYI(1,("tcp session abended prematurely (after SMBnegprot)")); + /* some servers kill tcp session rather than returning + smb negprot error in which case reconnecting here is + not going to help - return error to mount */ + server->tcpStatus = CifsExiting; + break; + } + cFYI(1,("Reconnecting after unexpected rcvmsg error ")); cifs_reconnect(server); csocket = server->ssocket; @@ -208,6 +243,8 @@ cifs_demultiplex_thread(struct TCP_Serve } pdu_length = 4 + ntohl(smb_buffer->smb_buf_length); + /* Ony read pdu_length after below checks for too short (due + to e.g. int overflow) and too long ie beyond end of buf */ cFYI(1, ("Peek length rcvd: %d with smb length: %d", length, pdu_length)); temp = (char *) smb_buffer; @@ -228,8 +265,8 @@ cifs_demultiplex_thread(struct TCP_Serve } else if (temp[0] != (char) 0) { cERROR(1, - ("Unknown RFC 1001 frame received not 0x00 nor 0x85")); - cifs_dump_mem(" Received Data is: ", temp, length); + ("Unknown RFC 1001 frame not 0x00 nor 0x85")); + cifs_dump_mem(" Received Data: ", temp, length); cifs_reconnect(server); csocket = server->ssocket; continue; @@ -257,8 +294,9 @@ cifs_demultiplex_thread(struct TCP_Serve length = 0; iov.iov_base = smb_buffer; iov.iov_len = pdu_length; - for (total_read = 0; total_read < pdu_length; total_read += length) { - /* Should improve check for buffer overflow with bad pdu_length */ + for (total_read = 0; + total_read < pdu_length; + total_read += length) { length = sock_recvmsg(csocket, &smb_msg, pdu_length - total_read, 0); if (length == 0) { @@ -286,7 +324,7 @@ cifs_demultiplex_thread(struct TCP_Serve mid_q_entry, qhead); - if (mid_entry->mid == smb_buffer->Mid) { + if ((mid_entry->mid == smb_buffer->Mid) && (mid_entry->midState == MID_REQUEST_SUBMITTED)) { cFYI(1, (" Mid 0x%x matched - waking up ",mid_entry->mid)); task_to_wake = mid_entry->tsk; @@ -302,6 +340,7 @@ cifs_demultiplex_thread(struct TCP_Serve wake_up_process(task_to_wake); } else if (is_valid_oplock_break(smb_buffer) == FALSE) { cERROR(1, ("No task to wake, unknown frame rcvd!")); + cifs_dump_mem("Received Data is: ",temp,sizeof(struct smb_hdr)); } } } else { @@ -316,14 +355,16 @@ cifs_demultiplex_thread(struct TCP_Serve } } } - /* BB add code to lock SMB sessions while releasing */ + + server->tcpStatus = CifsExiting; + server->tsk = NULL; if(server->ssocket) { sock_release(csocket); - server->ssocket = NULL; + server->ssocket = NULL; } set_fs(temp_fs); if (smb_buffer) /* buffer usually freed in free_mid - need to free it on error or exit */ - buf_release(smb_buffer); + cifs_buf_release(smb_buffer); read_lock(&GlobalSMBSeslock); if (list_empty(&server->pending_mid_q)) { @@ -337,39 +378,85 @@ cifs_demultiplex_thread(struct TCP_Serve ses->server = NULL; } } - kfree(server); - } else /* BB need to more gracefully handle the rare negative session - response case because response will be still outstanding */ - cERROR(1, ("Active MIDs in queue while exiting - can not delete mid_q_entries or TCP_Server_Info structure due to pending requests MEMORY LEAK!!")); - /* BB wake up waitors, and/or wait and/or free stale mids and try again? BB */ - /* BB Need to fix bug in error path above - perhaps wait until smb requests - time out and then free the tcp per server struct BB */ - read_unlock(&GlobalSMBSeslock); + read_unlock(&GlobalSMBSeslock); + } else { + spin_lock(&GlobalMid_Lock); + list_for_each(tmp, &server->pending_mid_q) { + mid_entry = list_entry(tmp, struct mid_q_entry, qhead); + if (mid_entry->midState == MID_REQUEST_SUBMITTED) { + cFYI(1, + (" Clearing Mid 0x%x - waking up ",mid_entry->mid)); + task_to_wake = mid_entry->tsk; + if(task_to_wake) { + wake_up_process(task_to_wake); + } + } + } + spin_unlock(&GlobalMid_Lock); + read_unlock(&GlobalSMBSeslock); + set_current_state(TASK_INTERRUPTIBLE); + /* 1/8th of sec should be more than enough time for them to exit */ + schedule_timeout(HZ/8); + } + + if (list_empty(&server->pending_mid_q)) { + /* mpx threads have not exited yet give them + at least the smb send timeout time for long ops */ + cFYI(1, ("Wait for exit from demultiplex thread")); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(46 * HZ); + /* if threads still have not exited they are probably never + coming home not much else we can do but free the memory */ + } + kfree(server); cFYI(1, ("About to exit from demultiplex thread")); return 0; } -int -parse_mount_options(char *options, const char *devname, struct smb_vol *vol) +static void * +cifs_kcalloc(size_t size, int type) +{ + void *addr; + addr = kmalloc(size, type); + if (addr) + memset(addr, 0, size); + return addr; +} + +static int +cifs_parse_mount_options(char *options, const char *devname, struct smb_vol *vol) { char *value; char *data; - int temp_len; + int temp_len, i, j; + char separator[2]; + + separator[0] = ','; + separator[1] = 0; - memset(vol,0,sizeof(struct smb_vol)); vol->linux_uid = current->uid; /* current->euid instead? */ vol->linux_gid = current->gid; vol->dir_mode = S_IRWXUGO; /* 2767 perms indicate mandatory locking support */ vol->file_mode = S_IALLUGO & ~(S_ISUID | S_IXGRP); + /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ vol->rw = TRUE; if (!options) return 1; - while ((data = strsep(&options, ",")) != NULL) { + if(strncmp(options,"sep=",4) == 0) { + if(options[4] != 0) { + separator[0] = options[4]; + options += 5; + } else { + cFYI(1,("Null separator not allowed")); + } + } + + while ((data = strsep(&options, separator)) != NULL) { if (!*data) continue; if ((value = strchr(data, '=')) != NULL) @@ -389,11 +476,54 @@ parse_mount_options(char *options, const } else if (strnicmp(data, "pass", 4) == 0) { if (!value || !*value) { vol->password = NULL; - } else if (strnlen(value, 17) < 17) { - vol->password = value; + continue; + } + temp_len = strlen(value); + /* removed password length check, NTLM passwords + can be arbitrarily long */ + + /* if comma in password, the string will be + prematurely null terminated. Commas in password are + specified across the cifs mount interface by a double + comma ie ,, and a comma used as in other cases ie ',' + as a parameter delimiter/separator is single and due + to the strsep above is temporarily zeroed. */ + + /* NB: password legally can have multiple commas and + the only illegal character in a password is null */ + + if ((value[temp_len] == 0) && (value[temp_len+1] == separator[0])) { + /* reinsert comma */ + value[temp_len] = separator[0]; + temp_len+=2; /* move after the second comma */ + while(value[temp_len] != 0) { + if((value[temp_len] == separator[0]) && (value[temp_len+1] != separator[0])) { + /* single comma indicating start of next parm */ + break; + } + temp_len++; + } + if(value[temp_len] == 0) { + options = NULL; + } else { + value[temp_len] = 0; + /* move options to point to start of next parm */ + options = value + temp_len + 1; + } + /* go from value to (value + temp_len) condensing double commas to singles */ + vol->password = cifs_kcalloc(temp_len, GFP_KERNEL); + for(i=0,j=0;ipassword[j] = value[i]; + if(value[i] == separator[0] && value[i+1] == separator[0]) { + /* skip second comma */ + i++; + } + } + /* value[temp_len] is zeroed above so + vol->password[temp_len] guaranteed to be null */ } else { - printk(KERN_WARNING "CIFS: password too long\n"); - return 1; + vol->password = cifs_kcalloc(temp_len + 1, GFP_KERNEL); + strcpy(vol->password, value); } } else if (strnicmp(data, "ip", 2) == 0) { if (!value || !*value) { @@ -506,8 +636,29 @@ parse_mount_options(char *options, const /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { vol->rw = TRUE; + } else if ((strnicmp(data, "suid", 4) == 0) || + (strnicmp(data, "nosuid", 6) == 0) || + (strnicmp(data, "exec", 4) == 0) || + (strnicmp(data, "noexec", 6) == 0) || + (strnicmp(data, "nodev", 5) == 0) || + (strnicmp(data, "dev", 3) == 0)) { + /* The mount tool or mount.cifs helper (if present) + uses these opts to set flags, and the flags are read + by the kernel vfs layer before we get here (ie + before read super) so there is no point trying to + parse these options again and set anything and it + is ok to just ignore them */ + continue; } else if (strnicmp(data, "ro", 2) == 0) { vol->rw = FALSE; + } else if (strnicmp(data, "hard", 4) == 0) { + vol->retry = 1; + } else if (strnicmp(data, "soft", 4) == 0) { + vol->retry = 0; + } else if (strnicmp(data, "nohard", 6) == 0) { + vol->retry = 0; + } else if (strnicmp(data, "nosoft", 6) == 0) { + vol->retry = 1; } else printk(KERN_WARNING "CIFS: Unknown mount option %s\n",data); } @@ -537,8 +688,8 @@ parse_mount_options(char *options, const return 0; } -struct cifsSesInfo * -find_tcp_session(__u32 new_target_ip_addr, +static struct cifsSesInfo * +cifs_find_tcp_session(__u32 new_target_ip_addr, char *userName, struct TCP_Server_Info **psrvTcp) { struct list_head *tmp; @@ -549,7 +700,7 @@ find_tcp_session(__u32 new_target_ip_add list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); if (ses->server) { - if (ses->server->sockAddr.sin_addr.s_addr == + if (ses->server->addr.sockAddr.sin_addr.s_addr == new_target_ip_addr) { /* BB lock server and tcp session and increment use count here?? */ *psrvTcp = ses->server; /* found a match on the TCP session */ @@ -568,7 +719,7 @@ find_tcp_session(__u32 new_target_ip_add return NULL; } -struct cifsTconInfo * +static struct cifsTconInfo * find_unc(__u32 new_target_ip_addr, char *uncName, char *userName) { struct list_head *tmp; @@ -582,9 +733,9 @@ find_unc(__u32 new_target_ip_addr, char if (tcon->ses->server) { cFYI(1, (" old ip addr: %x == new ip %x ?", - tcon->ses->server->sockAddr.sin_addr. + tcon->ses->server->addr.sockAddr.sin_addr. s_addr, new_target_ip_addr)); - if (tcon->ses->server->sockAddr.sin_addr. + if (tcon->ses->server->addr.sockAddr.sin_addr. s_addr == new_target_ip_addr) { /* BB lock tcon and server and tcp session and increment use count here? */ /* found a match on the TCP session */ @@ -628,7 +779,8 @@ connect_to_dfs_path(int xid, struct cifs the helper that resolves tcp names, mount to it, try to tcon to it unmount it if fail */ - /* BB free memory for referrals string BB */ + if(referrals) + kfree(referrals); return rc; } @@ -666,95 +818,11 @@ get_dfs_path(int xid, struct cifsSesInfo return rc; } -int setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, struct nls_table * nls_info) -{ - int rc = 0; - char ntlm_session_key[CIFS_SESSION_KEY_SIZE]; - int ntlmv2_flag = FALSE; - - /* what if server changes its buffer size after dropping the session? */ - if(pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ - rc = CIFSSMBNegotiate(xid, pSesInfo); - pSesInfo->capabilities = pSesInfo->server->capabilities; - pSesInfo->sequence_number = 0; - if (!rc) { - cFYI(1,("Security Mode: 0x%x Capabilities: 0x%x Time Zone: %d", - pSesInfo->server->secMode, - pSesInfo->server->capabilities, - pSesInfo->server->timeZone)); - if (extended_security - && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) - && (pSesInfo->server->secType == NTLMSSP)) { - cFYI(1, ("New style sesssetup ")); - rc = CIFSSpnegoSessSetup(xid, pSesInfo, - NULL /* security blob */, - 0 /* blob length */, - nls_info); - } else if (extended_security - && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) - && (pSesInfo->server->secType == RawNTLMSSP)) { - cFYI(1, ("NTLMSSP sesssetup ")); - rc = CIFSNTLMSSPNegotiateSessSetup(xid, - pSesInfo, - &ntlmv2_flag, - nls_info); - if (!rc) { - if(ntlmv2_flag) { - char * v2_response; - cFYI(1,("Can use more secure NTLM version 2 password hash")); - CalcNTLMv2_partial_mac_key(pSesInfo, - nls_info); - v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL); - if(v2_response) { - CalcNTLMv2_response(pSesInfo,v2_response); -/* cifs_calculate_ntlmv2_mac_key(pSesInfo->mac_signing_key, response, ntlm_session_key, */ - kfree(v2_response); - /* BB Put dummy sig in SessSetup PDU? */ - } else - rc = -ENOMEM; - - } else { - SMBNTencrypt(pSesInfo->password_with_pad, - pSesInfo->server->cryptKey, - ntlm_session_key); - - cifs_calculate_mac_key(pSesInfo->mac_signing_key, - ntlm_session_key, - pSesInfo->password_with_pad); - } - /* for better security the weaker lanman hash not sent - in AuthSessSetup so we no longer calculate it */ - - rc = CIFSNTLMSSPAuthSessSetup(xid, - pSesInfo, - ntlm_session_key, - ntlmv2_flag, - nls_info); - } - } else { /* old style NTLM 0.12 session setup */ - SMBNTencrypt(pSesInfo->password_with_pad, - pSesInfo->server->cryptKey, - ntlm_session_key); - - cifs_calculate_mac_key(pSesInfo->mac_signing_key, - ntlm_session_key, pSesInfo->password_with_pad); - rc = CIFSSessSetup(xid, pSesInfo, - ntlm_session_key, nls_info); - } - if (rc) { - cERROR(1,("Send error in SessSetup = %d",rc)); - } else { - cFYI(1,("CIFS Session Established successfully")); - pSesInfo->status = CifsGood; - } - } - return rc; -} - int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket) { int rc = 0; + int connected = 0; if(*csocket == NULL) { rc = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, csocket); @@ -769,39 +837,47 @@ ipv4_connect(struct sockaddr_in *psin_se } psin_server->sin_family = AF_INET; + if(psin_server->sin_port) { /* user overrode default port */ rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) psin_server, sizeof (struct sockaddr_in),0); - if (rc >= 0) { - return rc; - } - } - - /* do not retry on the same port we just failed on */ - if(psin_server->sin_port != htons(CIFS_PORT)) { - psin_server->sin_port = htons(CIFS_PORT); + if (rc >= 0) + connected = 1; + } + + if(!connected) { + /* do not retry on the same port we just failed on */ + if(psin_server->sin_port != htons(CIFS_PORT)) { + psin_server->sin_port = htons(CIFS_PORT); - rc = (*csocket)->ops->connect(*csocket, + rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) psin_server, sizeof (struct sockaddr_in),0); + if (rc >= 0) + connected = 1; + } } - if (rc < 0) { + if (!connected) { psin_server->sin_port = htons(RFC1001_PORT); rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) psin_server, sizeof (struct sockaddr_in),0); - if (rc < 0) { - cFYI(1, ("Error connecting to socket. %d", rc)); - sock_release(*csocket); - *csocket = NULL; - return rc; - } + if (rc >= 0) + connected = 1; } + /* give up here - unless we want to retry on different + protocol families some day */ + if (!connected) { + cFYI(1,("Error %d connecting to server via ipv4",rc)); + sock_release(*csocket); + *csocket = NULL; + return rc; + } /* Eventually check for other socket options to change from the default. sock_setsockopt not used because it expects user space buffer */ - (*csocket)->sk->sk_rcvtimeo = 8 * HZ; + (*csocket)->sk->sk_rcvtimeo = 7 * HZ; return rc; } @@ -810,49 +886,63 @@ int ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) { int rc = 0; + int connected = 0; - rc = sock_create(PF_INET6, SOCK_STREAM, - IPPROTO_TCP /* IPPROTO_IPV6 ? */ , csocket); - if (rc < 0) { - cERROR(1, ("Error creating socket. Aborting operation")); - return rc; + if(*csocket == NULL) { + rc = sock_create(PF_INET6, SOCK_STREAM, IPPROTO_TCP, csocket); + if (rc < 0) { + cERROR(1, ("Error %d creating ipv6 socket",rc)); + *csocket = NULL; + return rc; + } else { + /* BB other socket options to set KEEPALIVE, NODELAY? */ + cFYI(1,("ipv6 Socket created")); + } } psin_server->sin6_family = AF_INET6; + if(psin_server->sin6_port) { /* user overrode default port */ rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) psin_server, sizeof (struct sockaddr_in6),0); - if (rc >= 0) { - /* BB other socket options to set KEEPALIVE, timeouts? NODELAY? */ - return rc; - } - } + if (rc >= 0) + connected = 1; + } + + if(!connected) { + /* do not retry on the same port we just failed on */ + if(psin_server->sin6_port != htons(CIFS_PORT)) { + psin_server->sin6_port = htons(CIFS_PORT); - /* do not retry on the same port we just failed on */ - if(psin_server->sin6_port != htons(CIFS_PORT)) { - psin_server->sin6_port = htons(CIFS_PORT); - - rc = (*csocket)->ops->connect(*csocket, - (struct sockaddr *) psin_server, - sizeof (struct sockaddr_in6), 0); -/* BB fix the timeout to be shorter above - and check flags */ + rc = (*csocket)->ops->connect(*csocket, + (struct sockaddr *) psin_server, + sizeof (struct sockaddr_in6),0); + if (rc >= 0) + connected = 1; + } } - if (rc < 0) { + if (!connected) { psin_server->sin6_port = htons(RFC1001_PORT); rc = (*csocket)->ops->connect(*csocket, (struct sockaddr *) - psin_server, - sizeof (struct sockaddr_in6), 0); - if (rc < 0) { - cFYI(1, - ("Error connecting to socket (via ipv6). %d", - rc)); - sock_release(*csocket); - *csocket = NULL; - return rc; - } + psin_server, sizeof (struct sockaddr_in6),0); + if (rc >= 0) + connected = 1; } + /* give up here - unless we want to retry on different + protocol families some day */ + if (!connected) { + cFYI(1,("Error %d connecting to server via ipv6",rc)); + sock_release(*csocket); + *csocket = NULL; + return rc; + } + /* Eventually check for other socket options to change from + the default. sock_setsockopt not used because it expects + user space buffer */ + (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + return rc; } @@ -864,7 +954,7 @@ cifs_mount(struct super_block *sb, struc int xid; struct socket *csocket = NULL; struct sockaddr_in sin_server; -/* struct sockaddr_in6 sin_server6; */ + struct sockaddr_in6 sin_server6; struct smb_vol volume_info; struct cifsSesInfo *pSesInfo = NULL; struct cifsSesInfo *existingCifsSes = NULL; @@ -872,11 +962,16 @@ cifs_mount(struct super_block *sb, struc struct TCP_Server_Info *srvTcp = NULL; xid = GetXid(); - cFYI(1, ("Entering cifs_mount. Xid: %d with: %s", xid, mount_data)); - if (parse_mount_options(mount_data, devname, &volume_info)) { +/* cFYI(1, ("Entering cifs_mount. Xid: %d with: %s", xid, mount_data)); */ + + memset(&volume_info,0,sizeof(struct smb_vol)); + + if (cifs_parse_mount_options(mount_data, devname, &volume_info)) { if(volume_info.UNC) kfree(volume_info.UNC); + if(volume_info.password) + kfree(volume_info.password); FreeXid(xid); return -EINVAL; } @@ -888,17 +983,45 @@ cifs_mount(struct super_block *sb, struc cifserror("No username specified "); /* In userspace mount helper we can get user name from alternate locations such as env variables and files on disk */ + if(volume_info.UNC) + kfree(volume_info.UNC); + if(volume_info.password) + kfree(volume_info.password); FreeXid(xid); return -EINVAL; } - if (volume_info.UNC) { - sin_server.sin_addr.s_addr = inet_addr(volume_info.UNCip); - cFYI(1, ("UNC: %s ", volume_info.UNC)); - } else { - /* BB we could connect to the DFS root? but which server do we ask? */ + if (volume_info.UNCip && volume_info.UNC) { + rc = cifs_inet_pton(AF_INET, volume_info.UNCip,&sin_server.sin_addr.s_addr); + + if(rc == 0) { + /* not ipv4 address, try ipv6 */ + rc = cifs_inet_pton(AF_INET6,volume_info.UNCip,&sin_server6.sin6_addr.in6_u); + } + + if(rc != 1) { + /* we failed translating address */ + if(volume_info.UNC) + kfree(volume_info.UNC); + if(volume_info.password) + kfree(volume_info.password); + FreeXid(xid); + return -EINVAL; + } + + cFYI(1, ("UNC: %s ip: %s", volume_info.UNC, volume_info.UNCip)); + /* success */ + rc = 0; + } else if (volume_info.UNCip){ + /* BB using ip addr as server name connect to the DFS root below */ + cERROR(1,("Connecting to DFS root not implemented yet")); + } else /* which servers DFS root would we conect to */ { cERROR(1, ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified ")); + if(volume_info.UNC) + kfree(volume_info.UNC); + if(volume_info.password) + kfree(volume_info.password); FreeXid(xid); return -EINVAL; } @@ -911,19 +1034,25 @@ cifs_mount(struct super_block *sb, struc cifs_sb->local_nls = load_nls(volume_info.iocharset); if(cifs_sb->local_nls == NULL) { cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset)); + if(volume_info.UNC) + kfree(volume_info.UNC); + if(volume_info.password) + kfree(volume_info.password); FreeXid(xid); return -ELIBACC; } } existingCifsSes = - find_tcp_session(sin_server.sin_addr.s_addr, + cifs_find_tcp_session(sin_server.sin_addr.s_addr, volume_info.username, &srvTcp); if (srvTcp) { cFYI(1, ("Existing tcp session with server found ")); } else { /* create socket */ if(volume_info.port) sin_server.sin_port = htons(volume_info.port); + else + sin_server.sin_port = 0; rc = ipv4_connect(&sin_server, &csocket); if (rc < 0) { cERROR(1, @@ -932,6 +1061,8 @@ cifs_mount(struct super_block *sb, struc sock_release(csocket); if(volume_info.UNC) kfree(volume_info.UNC); + if(volume_info.password) + kfree(volume_info.password); FreeXid(xid); return rc; } @@ -942,16 +1073,20 @@ cifs_mount(struct super_block *sb, struc sock_release(csocket); if(volume_info.UNC) kfree(volume_info.UNC); + if(volume_info.password) + kfree(volume_info.password); FreeXid(xid); return rc; } else { memset(srvTcp, 0, sizeof (struct TCP_Server_Info)); - memcpy(&srvTcp->sockAddr, &sin_server, sizeof (struct sockaddr_in)); + memcpy(&srvTcp->addr.sockAddr, &sin_server, sizeof (struct sockaddr_in)); /* BB Add code for ipv6 case too */ srvTcp->ssocket = csocket; + srvTcp->protocolType = IPV4; init_waitqueue_head(&srvTcp->response_q); INIT_LIST_HEAD(&srvTcp->pending_mid_q); - srvTcp->tcpStatus = CifsGood; + srvTcp->tcpStatus = CifsNew; + init_MUTEX(&srvTcp->tcpSem); kernel_thread((void *)(void *)cifs_demultiplex_thread, srvTcp, CLONE_FS | CLONE_FILES | CLONE_VM); } @@ -960,6 +1095,8 @@ cifs_mount(struct super_block *sb, struc if (existingCifsSes) { pSesInfo = existingCifsSes; cFYI(1, ("Existing smb sess found ")); + if(volume_info.password) + kfree(volume_info.password); } else if (!rc) { cFYI(1, ("Existing smb sess not found ")); pSesInfo = sesInfoAlloc(); @@ -973,8 +1110,7 @@ cifs_mount(struct super_block *sb, struc if (!rc){ if (volume_info.password) - strncpy(pSesInfo->password_with_pad, - volume_info.password,CIFS_ENCPWD_SIZE); + pSesInfo->password = volume_info.password; if (volume_info.username) strncpy(pSesInfo->userName, volume_info.username,MAX_USERNAME_SIZE); @@ -982,11 +1118,14 @@ cifs_mount(struct super_block *sb, struc strncpy(pSesInfo->domainName, volume_info.domainname,MAX_USERNAME_SIZE); pSesInfo->linux_uid = volume_info.linux_uid; - - rc = setup_session(xid,pSesInfo, cifs_sb->local_nls); + down(&pSesInfo->sesSem); + rc = cifs_setup_session(xid,pSesInfo, cifs_sb->local_nls); + up(&pSesInfo->sesSem); if(!rc) atomic_inc(&srvTcp->socketUseCount); - } + } else + if(volume_info.password) + kfree(volume_info.password); } /* search for existing tcon to this server share */ @@ -1013,6 +1152,11 @@ cifs_mount(struct super_block *sb, struc volume_info.username); if (tcon) { cFYI(1, ("Found match on UNC path ")); + /* we can have only one retry value for a connection + to a share so for resources mounted more than once + to the same server share the last value passed in + for the retry flag is used */ + tcon->retry = volume_info.retry; } else { tcon = tconInfoAlloc(); if (tcon == NULL) @@ -1039,8 +1183,10 @@ cifs_mount(struct super_block *sb, struc tcon, cifs_sb->local_nls); cFYI(1, ("CIFS Tcon rc = %d", rc)); } - if (!rc) + if (!rc) { atomic_inc(&pSesInfo->inUse); + tcon->retry = volume_info.retry; + } } } } @@ -1064,8 +1210,6 @@ cifs_mount(struct super_block *sb, struc CIFSSMBLogoff(xid, pSesInfo); if(pSesInfo->server->tsk) send_sig(SIGKILL,pSesInfo->server->tsk,1); - else - cFYI(1,("Can not wake captive thread on cleanup of failed mount")); set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ / 4); /* give captive thread time to exit */ } else @@ -1091,7 +1235,7 @@ cifs_mount(struct super_block *sb, struc return rc; } -int +static int CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, char session_key[CIFS_SESSION_KEY_SIZE], const struct nls_table *nls_codepage) @@ -1110,7 +1254,7 @@ CIFSSessSetup(unsigned int xid, struct c cFYI(1, ("In sesssetup ")); - smb_buffer = buf_get(); + smb_buffer = cifs_buf_get(); if (smb_buffer == 0) { return -ENOMEM; } @@ -1162,9 +1306,10 @@ CIFSSessSetup(unsigned int xid, struct c } if(user == NULL) bytes_returned = 0; /* skill null user */ - else - bytes_returned = - cifs_strtoUCS((wchar_t *) bcc_ptr, user, 100, nls_codepage); + else + bytes_returned = + cifs_strtoUCS((wchar_t *) bcc_ptr, user, 100, + nls_codepage); bcc_ptr += 2 * bytes_returned; /* convert num 16 bit words to bytes */ bcc_ptr += 2; /* trailing null */ if (domain == NULL) @@ -1257,7 +1402,7 @@ CIFSSessSetup(unsigned int xid, struct c /* We look for obvious messed up bcc or strings in response so we do not go off the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ - ses->serverOS = kcalloc(2 * (len + 1), GFP_KERNEL); + ses->serverOS = cifs_kcalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, (wchar_t *)bcc_ptr, len,nls_codepage); bcc_ptr += 2 * (len + 1); @@ -1268,7 +1413,7 @@ CIFSSessSetup(unsigned int xid, struct c len = UniStrnlen((wchar_t *)bcc_ptr, remaining_words - 1); - ses->serverNOS =kcalloc(2 * (len + 1),GFP_KERNEL); + ses->serverNOS =cifs_kcalloc(2 * (len + 1),GFP_KERNEL); cifs_strfromUCS_le(ses->serverNOS, (wchar_t *)bcc_ptr,len,nls_codepage); bcc_ptr += 2 * (len + 1); @@ -1279,7 +1424,7 @@ CIFSSessSetup(unsigned int xid, struct c len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ ses->serverDomain = - kcalloc(2*(len+1),GFP_KERNEL); + cifs_kcalloc(2*(len+1),GFP_KERNEL); cifs_strfromUCS_le(ses->serverDomain, (wchar_t *)bcc_ptr,len,nls_codepage); bcc_ptr += 2 * (len + 1); @@ -1288,20 +1433,20 @@ CIFSSessSetup(unsigned int xid, struct c } /* else no more room so create dummy domain string */ else ses->serverDomain = - kcalloc(2, + cifs_kcalloc(2, GFP_KERNEL); } else { /* no room so create dummy domain and NOS string */ ses->serverDomain = - kcalloc(2, GFP_KERNEL); + cifs_kcalloc(2, GFP_KERNEL); ses->serverNOS = - kcalloc(2, GFP_KERNEL); + cifs_kcalloc(2, GFP_KERNEL); } } else { /* ASCII */ len = strnlen(bcc_ptr, 1024); if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { - ses->serverOS = kcalloc(len + 1,GFP_KERNEL); + ses->serverOS = cifs_kcalloc(len + 1,GFP_KERNEL); strncpy(ses->serverOS,bcc_ptr, len); bcc_ptr += len; @@ -1309,14 +1454,14 @@ CIFSSessSetup(unsigned int xid, struct c bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverNOS = kcalloc(len + 1,GFP_KERNEL); + ses->serverNOS = cifs_kcalloc(len + 1,GFP_KERNEL); strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverDomain = kcalloc(len + 1,GFP_KERNEL); + ses->serverDomain = cifs_kcalloc(len + 1,GFP_KERNEL); strncpy(ses->serverDomain, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; @@ -1341,12 +1486,12 @@ CIFSSessSetup(unsigned int xid, struct c } if (smb_buffer) - buf_release(smb_buffer); + cifs_buf_release(smb_buffer); return rc; } -int +static int CIFSSpnegoSessSetup(unsigned int xid, struct cifsSesInfo *ses, char *SecurityBlob,int SecurityBlobLength, const struct nls_table *nls_codepage) @@ -1365,7 +1510,7 @@ CIFSSpnegoSessSetup(unsigned int xid, st cFYI(1, ("In spnego sesssetup ")); - smb_buffer = buf_get(); + smb_buffer = cifs_buf_get(); if (smb_buffer == 0) { return -ENOMEM; } @@ -1511,7 +1656,7 @@ CIFSSpnegoSessSetup(unsigned int xid, st the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ ses->serverOS = - kcalloc(2 * (len + 1), GFP_KERNEL); + cifs_kcalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, (wchar_t *) bcc_ptr, len, @@ -1525,7 +1670,7 @@ CIFSSpnegoSessSetup(unsigned int xid, st remaining_words - 1); ses->serverNOS = - kcalloc(2 * (len + 1), + cifs_kcalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverNOS, (wchar_t *)bcc_ptr, @@ -1538,7 +1683,7 @@ CIFSSpnegoSessSetup(unsigned int xid, st if (remaining_words > 0) { len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ - ses->serverDomain = kcalloc(2*(len+1),GFP_KERNEL); + ses->serverDomain = cifs_kcalloc(2*(len+1),GFP_KERNEL); cifs_strfromUCS_le(ses->serverDomain, (wchar_t *)bcc_ptr, len, @@ -1549,10 +1694,10 @@ CIFSSpnegoSessSetup(unsigned int xid, st } /* else no more room so create dummy domain string */ else ses->serverDomain = - kcalloc(2,GFP_KERNEL); + cifs_kcalloc(2,GFP_KERNEL); } else { /* no room so create dummy domain and NOS string */ - ses->serverDomain = kcalloc(2, GFP_KERNEL); - ses->serverNOS = kcalloc(2, GFP_KERNEL); + ses->serverDomain = cifs_kcalloc(2, GFP_KERNEL); + ses->serverNOS = cifs_kcalloc(2, GFP_KERNEL); } } else { /* ASCII */ @@ -1560,7 +1705,7 @@ CIFSSpnegoSessSetup(unsigned int xid, st if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { - ses->serverOS = kcalloc(len + 1, GFP_KERNEL); + ses->serverOS = cifs_kcalloc(len + 1, GFP_KERNEL); strncpy(ses->serverOS, bcc_ptr, len); bcc_ptr += len; @@ -1568,14 +1713,14 @@ CIFSSpnegoSessSetup(unsigned int xid, st bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverNOS = kcalloc(len + 1,GFP_KERNEL); + ses->serverNOS = cifs_kcalloc(len + 1,GFP_KERNEL); strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverDomain = kcalloc(len + 1, GFP_KERNEL); + ses->serverDomain = cifs_kcalloc(len + 1, GFP_KERNEL); strncpy(ses->serverDomain, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; @@ -1600,12 +1745,12 @@ CIFSSpnegoSessSetup(unsigned int xid, st } if (smb_buffer) - buf_release(smb_buffer); + cifs_buf_release(smb_buffer); return rc; } -int +static int CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, struct cifsSesInfo *ses, int * pNTLMv2_flag, const struct nls_table *nls_codepage) @@ -1626,7 +1771,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned i cFYI(1, ("In NTLMSSP sesssetup (negotiate) ")); *pNTLMv2_flag = FALSE; - smb_buffer = buf_get(); + smb_buffer = cifs_buf_get(); if (smb_buffer == 0) { return -ENOMEM; } @@ -1822,7 +1967,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned i the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ ses->serverOS = - kcalloc(2 * (len + 1), GFP_KERNEL); + cifs_kcalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, (wchar_t *) bcc_ptr, len, @@ -1837,7 +1982,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned i remaining_words - 1); ses->serverNOS = - kcalloc(2 * (len + 1), + cifs_kcalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses-> serverNOS, @@ -1854,7 +1999,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned i len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string is not always null terminated (for e.g. for Windows XP & 2000) */ ses->serverDomain = - kcalloc(2 * + cifs_kcalloc(2 * (len + 1), GFP_KERNEL); @@ -1880,13 +2025,13 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned i } /* else no more room so create dummy domain string */ else ses->serverDomain = - kcalloc(2, + cifs_kcalloc(2, GFP_KERNEL); } else { /* no room so create dummy domain and NOS string */ ses->serverDomain = - kcalloc(2, GFP_KERNEL); + cifs_kcalloc(2, GFP_KERNEL); ses->serverNOS = - kcalloc(2, GFP_KERNEL); + cifs_kcalloc(2, GFP_KERNEL); } } else { /* ASCII */ len = strnlen(bcc_ptr, 1024); @@ -1894,7 +2039,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned i pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { ses->serverOS = - kcalloc(len + 1, + cifs_kcalloc(len + 1, GFP_KERNEL); strncpy(ses->serverOS, bcc_ptr, len); @@ -1905,7 +2050,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned i len = strnlen(bcc_ptr, 1024); ses->serverNOS = - kcalloc(len + 1, + cifs_kcalloc(len + 1, GFP_KERNEL); strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; @@ -1914,7 +2059,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned i len = strnlen(bcc_ptr, 1024); ses->serverDomain = - kcalloc(len + 1, + cifs_kcalloc(len + 1, GFP_KERNEL); strncpy(ses->serverDomain, bcc_ptr, len); bcc_ptr += len; @@ -1940,12 +2085,12 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned i } if (smb_buffer) - buf_release(smb_buffer); + cifs_buf_release(smb_buffer); return rc; } -int +static int CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, char *ntlm_session_key, int ntlmv2_flag, const struct nls_table *nls_codepage) @@ -1966,7 +2111,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xi cFYI(1, ("In NTLMSSPSessSetup (Authenticate)")); - smb_buffer = buf_get(); + smb_buffer = cifs_buf_get(); if (smb_buffer == 0) { return -ENOMEM; } @@ -2215,7 +2360,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xi the end since (at least) WIN2K and Windows XP have a major bug in not null terminating last Unicode string in response */ ses->serverOS = - kcalloc(2 * (len + 1), GFP_KERNEL); + cifs_kcalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses->serverOS, (wchar_t *) bcc_ptr, len, @@ -2230,7 +2375,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xi remaining_words - 1); ses->serverNOS = - kcalloc(2 * (len + 1), + cifs_kcalloc(2 * (len + 1), GFP_KERNEL); cifs_strfromUCS_le(ses-> serverNOS, @@ -2246,7 +2391,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xi len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); /* last string not always null terminated (e.g. for Windows XP & 2000) */ ses->serverDomain = - kcalloc(2 * + cifs_kcalloc(2 * (len + 1), GFP_KERNEL); @@ -2271,17 +2416,17 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xi = 0; } /* else no more room so create dummy domain string */ else - ses->serverDomain = kcalloc(2,GFP_KERNEL); + ses->serverDomain = cifs_kcalloc(2,GFP_KERNEL); } else { /* no room so create dummy domain and NOS string */ - ses->serverDomain = kcalloc(2, GFP_KERNEL); - ses->serverNOS = kcalloc(2, GFP_KERNEL); + ses->serverDomain = cifs_kcalloc(2, GFP_KERNEL); + ses->serverNOS = cifs_kcalloc(2, GFP_KERNEL); } } else { /* ASCII */ len = strnlen(bcc_ptr, 1024); if (((long) bcc_ptr + len) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { - ses->serverOS = kcalloc(len + 1,GFP_KERNEL); + ses->serverOS = cifs_kcalloc(len + 1,GFP_KERNEL); strncpy(ses->serverOS,bcc_ptr, len); bcc_ptr += len; @@ -2289,14 +2434,14 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xi bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverNOS = kcalloc(len+1,GFP_KERNEL); + ses->serverNOS = cifs_kcalloc(len+1,GFP_KERNEL); strncpy(ses->serverNOS, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; bcc_ptr++; len = strnlen(bcc_ptr, 1024); - ses->serverDomain = kcalloc(len+1,GFP_KERNEL); + ses->serverDomain = cifs_kcalloc(len+1,GFP_KERNEL); strncpy(ses->serverDomain, bcc_ptr, len); bcc_ptr += len; bcc_ptr[0] = 0; @@ -2321,7 +2466,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xi } if (smb_buffer) - buf_release(smb_buffer); + cifs_buf_release(smb_buffer); return rc; } @@ -2342,7 +2487,7 @@ CIFSTCon(unsigned int xid, struct cifsSe if (ses == NULL) return -EIO; - smb_buffer = buf_get(); + smb_buffer = cifs_buf_get(); if (smb_buffer == 0) { return -ENOMEM; } @@ -2404,8 +2549,10 @@ CIFSTCon(unsigned int xid, struct cifsSe if (((long) bcc_ptr + (2 * length)) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { + if(tcon->nativeFileSystem) + kfree(tcon->nativeFileSystem); tcon->nativeFileSystem = - kcalloc(length + 2, GFP_KERNEL); + cifs_kcalloc(length + 2, GFP_KERNEL); cifs_strfromUCS_le(tcon->nativeFileSystem, (wchar_t *) bcc_ptr, length, nls_codepage); @@ -2420,8 +2567,10 @@ CIFSTCon(unsigned int xid, struct cifsSe if (((long) bcc_ptr + length) - (long) pByteArea(smb_buffer_response) <= BCC(smb_buffer_response)) { + if(tcon->nativeFileSystem) + kfree(tcon->nativeFileSystem); tcon->nativeFileSystem = - kcalloc(length + 1, GFP_KERNEL); + cifs_kcalloc(length + 1, GFP_KERNEL); strncpy(tcon->nativeFileSystem, bcc_ptr, length); } @@ -2435,7 +2584,7 @@ CIFSTCon(unsigned int xid, struct cifsSe } if (smb_buffer) - buf_release(smb_buffer); + cifs_buf_release(smb_buffer); return rc; } @@ -2472,7 +2621,7 @@ cifs_umount(struct super_block *sb, stru } else cFYI(1, ("No session or bad tcon")); } - /* BB future check active count of tcon and then free if needed BB */ + cifs_sb->tcon = NULL; if (ses) { set_current_state(TASK_INTERRUPTIBLE); @@ -2484,3 +2633,100 @@ cifs_umount(struct super_block *sb, stru FreeXid(xid); return rc; /* BB check if we should always return zero here */ } + +int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, + struct nls_table * nls_info) +{ + int rc = 0; + char ntlm_session_key[CIFS_SESSION_KEY_SIZE]; + int ntlmv2_flag = FALSE; + + /* what if server changes its buffer size after dropping the session? */ + if(pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { + rc = CIFSSMBNegotiate(xid, pSesInfo); + if(rc == -EAGAIN) /* retry only once on 1st time connection */ { + rc = CIFSSMBNegotiate(xid, pSesInfo); + if(rc == -EAGAIN) + rc = -EHOSTDOWN; + } + if(rc == 0) + pSesInfo->server->tcpStatus = CifsGood; + } + pSesInfo->capabilities = pSesInfo->server->capabilities; + if(linuxExtEnabled == 0) + pSesInfo->capabilities &= (~CAP_UNIX); + pSesInfo->sequence_number = 0; + if (!rc) { + cFYI(1,("Security Mode: 0x%x Capabilities: 0x%x Time Zone: %d", + pSesInfo->server->secMode, + pSesInfo->server->capabilities, + pSesInfo->server->timeZone)); + if (extended_security + && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) + && (pSesInfo->server->secType == NTLMSSP)) { + cFYI(1, ("New style sesssetup ")); + rc = CIFSSpnegoSessSetup(xid, pSesInfo, + NULL /* security blob */, + 0 /* blob length */, + nls_info); + } else if (extended_security + && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) + && (pSesInfo->server->secType == RawNTLMSSP)) { + cFYI(1, ("NTLMSSP sesssetup ")); + rc = CIFSNTLMSSPNegotiateSessSetup(xid, + pSesInfo, + &ntlmv2_flag, + nls_info); + if (!rc) { + if(ntlmv2_flag) { + char * v2_response; + cFYI(1,("Can use more secure NTLM version 2 password hash")); + CalcNTLMv2_partial_mac_key(pSesInfo, + nls_info); + v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL); + if(v2_response) { + CalcNTLMv2_response(pSesInfo,v2_response); +/* cifs_calculate_ntlmv2_mac_key(pSesInfo->mac_signing_key, response, ntlm_session_key, */ + kfree(v2_response); + /* BB Put dummy sig in SessSetup PDU? */ + } else + rc = -ENOMEM; + + } else { + SMBNTencrypt(pSesInfo->password, + pSesInfo->server->cryptKey, + ntlm_session_key); + + cifs_calculate_mac_key(pSesInfo->mac_signing_key, + ntlm_session_key, + pSesInfo->password); + } + /* for better security the weaker lanman hash not sent + in AuthSessSetup so we no longer calculate it */ + + rc = CIFSNTLMSSPAuthSessSetup(xid, + pSesInfo, + ntlm_session_key, + ntlmv2_flag, + nls_info); + } + } else { /* old style NTLM 0.12 session setup */ + SMBNTencrypt(pSesInfo->password, + pSesInfo->server->cryptKey, + ntlm_session_key); + + cifs_calculate_mac_key(pSesInfo->mac_signing_key, + ntlm_session_key, pSesInfo->password); + rc = CIFSSessSetup(xid, pSesInfo, + ntlm_session_key, nls_info); + } + if (rc) { + cERROR(1,("Send error in SessSetup = %d",rc)); + } else { + cFYI(1,("CIFS Session Established successfully")); + pSesInfo->status = CifsGood; + } + } + return rc; +} + --- linux-2.6.6-rc1/fs/cifs/dir.c 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/dir.c 2004-04-18 22:25:28.622479200 -0700 @@ -125,7 +125,7 @@ cifs_create(struct inode *inode, struct int rc = -ENOENT; int xid; int oplock = 0; - int desiredAccess = GENERIC_ALL; + int desiredAccess = GENERIC_READ | GENERIC_WRITE; __u16 fileHandle; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; @@ -147,11 +147,15 @@ cifs_create(struct inode *inode, struct cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path)); if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) - desiredAccess = GENERIC_READ; + desiredAccess = GENERIC_READ; else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) desiredAccess = GENERIC_WRITE; - else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) - desiredAccess = GENERIC_ALL; + else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { + /* GENERIC_ALL is too much permission to request */ + /* can cause unnecessary access denied on create */ + /* desiredAccess = GENERIC_ALL; */ + desiredAccess = GENERIC_READ | GENERIC_WRITE; + } if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) disposition = FILE_CREATE; @@ -220,6 +224,7 @@ cifs_create(struct inode *inode, struct pCifsFile->pInode = newinode; pCifsFile->invalidHandle = FALSE; pCifsFile->closePend = FALSE; + init_MUTEX(&pCifsFile->fh_sem); /* pCifsFile->pfile = file; */ /* put in at open time */ write_lock(&GlobalSMBSeslock); list_add(&pCifsFile->tlist,&pTcon->openFileList); @@ -282,19 +287,19 @@ int cifs_mknod(struct inode *inode, stru full_path, mode, current->euid, current->egid, device_number, cifs_sb->local_nls); if(!rc) { - rc = cifs_get_inode_info_unix(&newinode, full_path, - inode->i_sb); + rc = cifs_get_inode_info_unix(&newinode, full_path, + inode->i_sb); direntry->d_op = &cifs_dentry_ops; if(rc == 0) d_instantiate(direntry, newinode); } } - if (full_path) - kfree(full_path); - FreeXid(xid); + if (full_path) + kfree(full_path); + FreeXid(xid); - return rc; + return rc; } --- linux-2.6.6-rc1/fs/cifs/file.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/cifs/file.c 2004-04-18 22:25:28.629478136 -0700 @@ -47,7 +47,7 @@ cifs_open(struct inode *inode, struct fi struct list_head * tmp; char *full_path = NULL; int desiredAccess = 0x20197; - int disposition = FILE_OPEN; + int disposition; __u16 netfid; FILE_ALL_INFO * buf = NULL; @@ -57,27 +57,27 @@ cifs_open(struct inode *inode, struct fi pTcon = cifs_sb->tcon; if (file->f_flags & O_CREAT) { - /* search inode for this file and fill in file->private_data = */ - pCifsInode = CIFS_I(file->f_dentry->d_inode); - read_lock(&GlobalSMBSeslock); - list_for_each(tmp, &pCifsInode->openFileList) { - pCifsFile = list_entry(tmp,struct cifsFileInfo, flist); - if((pCifsFile->pfile == NULL)&& (pCifsFile->pid = current->pid)){ - /* set mode ?? */ - pCifsFile->pfile = file; /* needed for writepage */ - file->private_data = pCifsFile; - break; - } - } - read_unlock(&GlobalSMBSeslock); - if(file->private_data != NULL) { - rc = 0; - FreeXid(xid); - return rc; - } else { - if(file->f_flags & O_EXCL) - cERROR(1,("could not find file instance for new file %p ",file)); - } + /* search inode for this file and fill in file->private_data = */ + pCifsInode = CIFS_I(file->f_dentry->d_inode); + read_lock(&GlobalSMBSeslock); + list_for_each(tmp, &pCifsInode->openFileList) { + pCifsFile = list_entry(tmp,struct cifsFileInfo, flist); + if((pCifsFile->pfile == NULL)&& (pCifsFile->pid = current->pid)){ + /* set mode ?? */ + pCifsFile->pfile = file; /* needed for writepage */ + file->private_data = pCifsFile; + break; + } + } + read_unlock(&GlobalSMBSeslock); + if(file->private_data != NULL) { + rc = 0; + FreeXid(xid); + return rc; + } else { + if(file->f_flags & O_EXCL) + cERROR(1,("could not find file instance for new file %p ",file)); + } } full_path = build_path_from_dentry(file->f_dentry); @@ -87,29 +87,45 @@ cifs_open(struct inode *inode, struct fi desiredAccess = GENERIC_READ; else if ((file->f_flags & O_ACCMODE) == O_WRONLY) desiredAccess = GENERIC_WRITE; - else if ((file->f_flags & O_ACCMODE) == O_RDWR) - desiredAccess = GENERIC_ALL; - -/* BB check other flags carefully to find equivalent NTCreateX flags */ - -/* -#define O_CREAT 0100 -#define O_EXCL 0200 -#define O_NOCTTY 0400 -#define O_TRUNC 01000 -#define O_APPEND 02000 -#define O_NONBLOCK 04000 -#define O_NDELAY O_NONBLOCK -#define O_SYNC 010000 -#define FASYNC 020000 -#define O_DIRECT 040000 -#define O_LARGEFILE 0100000 -#define O_DIRECTORY 0200000 -#define O_NOFOLLOW 0400000 -#define O_ATOMICLOOKUP 01000000 */ - - if (file->f_flags & O_CREAT) - disposition = FILE_OVERWRITE; + else if ((file->f_flags & O_ACCMODE) == O_RDWR) { + /* GENERIC_ALL is too much permission to request */ + /* can cause unnecessary access denied on create */ + /* desiredAccess = GENERIC_ALL; */ + desiredAccess = GENERIC_READ | GENERIC_WRITE; + } + +/********************************************************************* + * open flag mapping table: + * + * POSIX Flag CIFS Disposition + * ---------- ---------------- + * O_CREAT FILE_OPEN_IF + * O_CREAT | O_EXCL FILE_CREATE + * O_CREAT | O_TRUNC FILE_OVERWRITE_IF + * O_TRUNC FILE_OVERWRITE + * none of the above FILE_OPEN + * + * Note that there is not a direct match between disposition + * FILE_SUPERSEDE (ie create whether or not file exists although + * O_CREAT | O_TRUNC is similar but truncates the existing + * file rather than creating a new file as FILE_SUPERSEDE does + * (which uses the attributes / metadata passed in on open call) + *? + *? O_SYNC is a reasonable match to CIFS writethrough flag + *? and the read write flags match reasonably. O_LARGEFILE + *? is irrelevant because largefile support is always used + *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, + * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation + *********************************************************************/ + + if((file->f_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + disposition = FILE_CREATE; + else if((file->f_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + disposition = FILE_OVERWRITE_IF; + else if((file->f_flags & O_CREAT) == O_CREAT) + disposition = FILE_OPEN_IF; + else + disposition = FILE_OPEN; if (oplockEnabled) oplock = REQ_OPLOCK; @@ -121,7 +137,7 @@ cifs_open(struct inode *inode, struct fi /* Also refresh inode by passing in file_info buf returned by SMBOpen and calling get_inode_info with returned buf (at least helps non-Unix server case */ - buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); + buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); if(buf==0) { if (full_path) kfree(full_path); @@ -134,14 +150,16 @@ cifs_open(struct inode *inode, struct fi cFYI(1, ("cifs_open returned 0x%x ", rc)); cFYI(1, ("oplock: %d ", oplock)); } else { + if(file->private_data) + kfree(file->private_data); file->private_data = - kmalloc(sizeof (struct cifsFileInfo), GFP_KERNEL); + kmalloc(sizeof (struct cifsFileInfo), GFP_KERNEL); if (file->private_data) { - memset(file->private_data, 0, - sizeof (struct cifsFileInfo)); + memset(file->private_data, 0, sizeof(struct cifsFileInfo)); pCifsFile = (struct cifsFileInfo *) file->private_data; pCifsFile->netfid = netfid; pCifsFile->pid = current->pid; + init_MUTEX(&pCifsFile->fh_sem); pCifsFile->pfile = file; /* needed for writepage */ pCifsFile->pInode = inode; pCifsFile->invalidHandle = FALSE; @@ -154,8 +172,27 @@ cifs_open(struct inode *inode, struct fi list_add(&pCifsFile->flist,&pCifsInode->openFileList); write_unlock(&GlobalSMBSeslock); write_unlock(&file->f_owner.lock); + if(pCifsInode->clientCanCacheRead) { + /* we have the inode open somewhere else + no need to discard cache data */ + } else { + if(buf) { + /* BB need same check in cifs_create too? */ - if (pTcon->ses->capabilities & CAP_UNIX) + /* if not oplocked, invalidate inode pages if mtime + or file size changed */ + struct timespec temp; + temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime)); + if(timespec_equal(&file->f_dentry->d_inode->i_mtime,&temp) && + (file->f_dentry->d_inode->i_size == (loff_t)le64_to_cpu(buf->EndOfFile))) { + cFYI(1,("inode unchanged on server")); + } else { + cFYI(1,("invalidating remote inode since open detected it changed")); + invalidate_remote_inode(file->f_dentry->d_inode); + } + } + } + if (pTcon->ses->capabilities & CAP_UNIX) rc = cifs_get_inode_info_unix(&file->f_dentry->d_inode, full_path, inode->i_sb); else @@ -200,162 +237,119 @@ cifs_open(struct inode *inode, struct fi /* Try to reaquire byte range locks that were released when session */ /* to server was lost */ -int relock_files(struct cifsFileInfo * cifsFile) +static int cifs_relock_file(struct cifsFileInfo * cifsFile) { int rc = 0; -/* list all locks open on this file */ +/* BB list all locks open on this file and relock */ + return rc; } static int cifs_reopen_file(struct inode *inode, struct file *file) { - int rc = -EACCES; - int xid, oplock; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; - struct cifsFileInfo *pCifsFile; - struct cifsInodeInfo *pCifsInode; - char *full_path = NULL; - int desiredAccess = 0x20197; - int disposition = FILE_OPEN; - __u16 netfid; - FILE_ALL_INFO * buf = NULL; - - xid = GetXid(); - - cifs_sb = CIFS_SB(inode->i_sb); - pTcon = cifs_sb->tcon; - - full_path = build_path_from_dentry(file->f_dentry); - - cFYI(1, (" inode = 0x%p file flags are 0x%x for %s", inode, file->f_flags,full_path)); - if ((file->f_flags & O_ACCMODE) == O_RDONLY) - desiredAccess = GENERIC_READ; - else if ((file->f_flags & O_ACCMODE) == O_WRONLY) - desiredAccess = GENERIC_WRITE; - else if ((file->f_flags & O_ACCMODE) == O_RDWR) - desiredAccess = GENERIC_ALL; - if (oplockEnabled) - oplock = REQ_OPLOCK; - else - oplock = FALSE; - - /* BB pass O_SYNC flag through on file attributes .. BB */ - - /* Also refresh inode by passing in file_info buf returned by SMBOpen - and calling get_inode_info with returned buf (at least - helps non-Unix server case */ - buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); - if(buf==0) { - if (full_path) - kfree(full_path); - FreeXid(xid); - return -ENOMEM; - } - rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, - CREATE_NOT_DIR, &netfid, &oplock, buf, cifs_sb->local_nls); - if (rc) { - cFYI(1, ("cifs_open returned 0x%x ", rc)); - cFYI(1, ("oplock: %d ", oplock)); - } else { - if (file->private_data) { - pCifsFile = (struct cifsFileInfo *) file->private_data; + int rc = -EACCES; + int xid, oplock; + struct cifs_sb_info *cifs_sb; + struct cifsTconInfo *pTcon; + struct cifsFileInfo *pCifsFile; + struct cifsInodeInfo *pCifsInode; + char *full_path = NULL; + int desiredAccess = 0x20197; + int disposition = FILE_OPEN; + __u16 netfid; + FILE_ALL_INFO * buf = NULL; - pCifsFile->netfid = netfid; - pCifsFile->invalidHandle = FALSE; - pCifsInode = CIFS_I(file->f_dentry->d_inode); - if(pCifsInode) { - if (pTcon->ses->capabilities & CAP_UNIX) - rc = cifs_get_inode_info_unix(&file->f_dentry->d_inode, - full_path, inode->i_sb); - else - rc = cifs_get_inode_info(&file->f_dentry->d_inode, - full_path, buf, inode->i_sb); - - if(oplock == OPLOCK_EXCLUSIVE) { - pCifsInode->clientCanCacheAll = TRUE; - pCifsInode->clientCanCacheRead = TRUE; - cFYI(1,("Exclusive Oplock granted on inode %p",file->f_dentry->d_inode)); - } else if(oplock == OPLOCK_READ) { - pCifsInode->clientCanCacheRead = TRUE; - pCifsInode->clientCanCacheAll = FALSE; - } else { - pCifsInode->clientCanCacheRead = FALSE; - pCifsInode->clientCanCacheAll = FALSE; - } - } - } else - rc = -EBADF; - } + if(inode == NULL) + return -EBADF; + if (file->private_data) { + pCifsFile = (struct cifsFileInfo *) file->private_data; + } else + return -EBADF; - if (buf) - kfree(buf); - if (full_path) - kfree(full_path); - FreeXid(xid); - return rc; -} + xid = GetXid(); + down(&pCifsFile->fh_sem); + if(pCifsFile->invalidHandle == FALSE) { + up(&pCifsFile->fh_sem); + FreeXid(xid); + return 0; + } -/* Try to reopen files that were closed when session to server was lost */ -int reopen_files(struct cifsTconInfo * pTcon, struct nls_table * nlsinfo) -{ - int rc = 0; - struct cifsFileInfo *open_file = NULL; - struct file * file = NULL; - struct list_head invalid_file_list; - struct list_head * tmp; - struct list_head * tmp1; - INIT_LIST_HEAD(&invalid_file_list); + cifs_sb = CIFS_SB(inode->i_sb); + pTcon = cifs_sb->tcon; -/* list all files open on tree connection and mark them invalid */ - write_lock(&GlobalSMBSeslock); - list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { - open_file = list_entry(tmp,struct cifsFileInfo, tlist); - if(open_file) { - open_file->invalidHandle = TRUE; - list_move(&open_file->tlist,&invalid_file_list); - } + full_path = build_path_from_dentry(file->f_dentry); + + cFYI(1, (" inode = 0x%p file flags are 0x%x for %s", inode, file->f_flags,full_path)); + if ((file->f_flags & O_ACCMODE) == O_RDONLY) + desiredAccess = GENERIC_READ; + else if ((file->f_flags & O_ACCMODE) == O_WRONLY) + desiredAccess = GENERIC_WRITE; + else if ((file->f_flags & O_ACCMODE) == O_RDWR) { + /* GENERIC_ALL is too much permission to request */ + /* can cause unnecessary access denied on create */ + /* desiredAccess = GENERIC_ALL; */ + desiredAccess = GENERIC_READ | GENERIC_WRITE; } - /* reopen files */ - list_for_each_safe(tmp,tmp1, &invalid_file_list) { - /* BB need to fix above to check list end and skip entries we do not need to reopen */ - open_file = list_entry(tmp,struct cifsFileInfo, tlist); - if(open_file == NULL) { - break; - } else { - if((open_file->invalidHandle == FALSE) && - (open_file->closePend == FALSE)) { - list_move(&open_file->tlist,&pTcon->openFileList); - continue; - } - file = open_file->pfile; - if(file->f_dentry == 0) { - cFYI(1,("Null dentry for file %p",file)); + if (oplockEnabled) + oplock = REQ_OPLOCK; + else + oplock = FALSE; + + /* BB pass O_SYNC flag through on file attributes .. BB */ + + /* Also refresh inode by passing in file_info buf returned by SMBOpen + and calling get_inode_info with returned buf (at least + helps non-Unix server case */ + buf = kmalloc(sizeof(FILE_ALL_INFO),GFP_KERNEL); + if(buf==0) { + up(&pCifsFile->fh_sem); + if (full_path) + kfree(full_path); + FreeXid(xid); + return -ENOMEM; + } + rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess, + CREATE_NOT_DIR, &netfid, &oplock, buf, cifs_sb->local_nls); + if (rc) { + up(&pCifsFile->fh_sem); + cFYI(1, ("cifs_open returned 0x%x ", rc)); + cFYI(1, ("oplock: %d ", oplock)); + } else { + pCifsFile->netfid = netfid; + pCifsFile->invalidHandle = FALSE; + up(&pCifsFile->fh_sem); + pCifsInode = CIFS_I(inode); + if(pCifsInode) { + if (pTcon->ses->capabilities & CAP_UNIX) + rc = cifs_get_inode_info_unix(&inode, + full_path, inode->i_sb); + else + rc = cifs_get_inode_info(&inode, + full_path, buf, inode->i_sb); + + if(oplock == OPLOCK_EXCLUSIVE) { + pCifsInode->clientCanCacheAll = TRUE; + pCifsInode->clientCanCacheRead = TRUE; + cFYI(1,("Exclusive Oplock granted on inode %p",file->f_dentry->d_inode)); + } else if(oplock == OPLOCK_READ) { + pCifsInode->clientCanCacheRead = TRUE; + pCifsInode->clientCanCacheAll = FALSE; } else { - write_unlock(&GlobalSMBSeslock); - rc = cifs_reopen_file(file->f_dentry->d_inode,file); - write_lock(&GlobalSMBSeslock); - if(file->private_data == NULL) { - tmp = invalid_file_list.next; - tmp1 = tmp->next; - continue; - } - - list_move(&open_file->tlist,&pTcon->openFileList); - if(rc) { - cFYI(1,("reconnecting file %s failed with %d", - file->f_dentry->d_name.name,rc)); - } else { - cFYI(1,("reconnection of %s succeeded", - file->f_dentry->d_name.name)); - } + pCifsInode->clientCanCacheRead = FALSE; + pCifsInode->clientCanCacheAll = FALSE; } + cifs_relock_file(pCifsFile); } } - write_unlock(&GlobalSMBSeslock); + + if (buf) + kfree(buf); + if (full_path) + kfree(full_path); + FreeXid(xid); return rc; } @@ -437,6 +431,7 @@ cifs_lock(struct file *file, int cmd, st __u32 numLock = 0; __u32 numUnlock = 0; __u64 length; + int wait_flag = FALSE; struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; length = 1 + pfLock->fl_end - pfLock->fl_start; @@ -454,14 +449,16 @@ cifs_lock(struct file *file, int cmd, st cFYI(1, ("Posix ")); if (pfLock->fl_flags & FL_FLOCK) cFYI(1, ("Flock ")); - if (pfLock->fl_flags & FL_SLEEP) + if (pfLock->fl_flags & FL_SLEEP) { cFYI(1, ("Blocking lock ")); + wait_flag = TRUE; + } if (pfLock->fl_flags & FL_ACCESS) - cFYI(1, ("Process suspended by mandatory locking ")); + cFYI(1, ("Process suspended by mandatory locking - not implemented yet ")); if (pfLock->fl_flags & FL_LEASE) - cFYI(1, ("Lease on file ")); - if (pfLock->fl_flags & 0xFFD0) - cFYI(1, ("Unknown lock flags ")); + cFYI(1, ("Lease on file - not implemented yet")); + if (pfLock->fl_flags & (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE))) + cFYI(1, ("Unknown lock flags 0x%x",pfLock->fl_flags)); if (pfLock->fl_type == F_WRLCK) { cFYI(1, ("F_WRLCK ")); @@ -509,7 +506,7 @@ cifs_lock(struct file *file, int cmd, st pfLock->fl_type = F_UNLCK; if (rc != 0) cERROR(1, - ("Error unlocking previously locked range %d during test of lock ", + ("Error unlocking previously locked range %d during test of lock ", rc)); rc = 0; @@ -526,7 +523,7 @@ cifs_lock(struct file *file, int cmd, st ((struct cifsFileInfo *) file->private_data)-> netfid, length, pfLock->fl_start, numUnlock, numLock, lockType, - 0 /* wait flag */ ); + wait_flag); FreeXid(xid); return rc; } @@ -541,6 +538,7 @@ cifs_write(struct file * file, const cha struct cifs_sb_info *cifs_sb; struct cifsTconInfo *pTcon; int xid, long_op; + struct cifsFileInfo * open_file; xid = GetXid(); @@ -555,20 +553,30 @@ cifs_write(struct file * file, const cha FreeXid(xid); return -EBADF; } + open_file = (struct cifsFileInfo *) file->private_data; + if (*poffset > file->f_dentry->d_inode->i_size) - long_op = 2; /* writes past end of file can take a long time */ + long_op = 2; /* writes past end of file can take a long time */ else long_op = 1; for (total_written = 0; write_size > total_written; total_written += bytes_written) { - rc = CIFSSMBWrite(xid, pTcon, - ((struct cifsFileInfo *) file-> - private_data)->netfid, + rc = -EAGAIN; + while(rc == -EAGAIN) { + if ((open_file->invalidHandle) && (!open_file->closePend)) { + rc = cifs_reopen_file(file->f_dentry->d_inode,file); + if(rc != 0) + break; + } + + rc = CIFSSMBWrite(xid, pTcon, + open_file->netfid, write_size - total_written, *poffset, &bytes_written, write_data + total_written, long_op); + } if (rc || (bytes_written == 0)) { if (total_written) break; @@ -580,10 +588,11 @@ cifs_write(struct file * file, const cha *poffset += bytes_written; long_op = FALSE; /* subsequent writes fast - 15 seconds is plenty */ } - file->f_dentry->d_inode->i_ctime = file->f_dentry->d_inode->i_mtime = CURRENT_TIME; + file->f_dentry->d_inode->i_ctime = file->f_dentry->d_inode->i_mtime = + CURRENT_TIME; if (bytes_written > 0) { if (*poffset > file->f_dentry->d_inode->i_size) - file->f_dentry->d_inode->i_size = *poffset; + i_size_write(file->f_dentry->d_inode, *poffset); } mark_inode_dirty_sync(file->f_dentry->d_inode); FreeXid(xid); @@ -605,24 +614,18 @@ cifs_partialpagewrite(struct page *page, struct cifsFileInfo *open_file = NULL; struct list_head *tmp; struct list_head *tmp1; - int xid; - - xid = GetXid(); cifs_sb = CIFS_SB(inode->i_sb); pTcon = cifs_sb->tcon; /* figure out which file struct to use if (file->private_data == NULL) { - FreeXid(xid); return -EBADF; } */ if (!mapping) { - FreeXid(xid); return -EFAULT; } else if(!mapping->host) { - FreeXid(xid); return -EFAULT; } @@ -632,14 +635,12 @@ cifs_partialpagewrite(struct page *page, if((to > PAGE_CACHE_SIZE) || (from > to)) { kunmap(page); - FreeXid(xid); return -EIO; } /* racing with truncate? */ if(offset > mapping->host->i_size) { kunmap(page); - FreeXid(xid); return 0; /* don't care */ } @@ -683,7 +684,6 @@ cifs_partialpagewrite(struct page *page, } kunmap(page); - FreeXid(xid); return rc; } @@ -727,23 +727,38 @@ cifs_commit_write(struct file *file, str int rc = 0; struct inode *inode = page->mapping->host; loff_t position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; - struct cifsFileInfo *open_file; - struct cifs_sb_info *cifs_sb; + /* struct cifsFileInfo *open_file; + struct cifs_sb_info *cifs_sb; */ xid = GetXid(); - + cFYI(1,("commit write for page %p up to position %lld for %d",page,position,to)); if (position > inode->i_size){ - inode->i_size = position; - if (file->private_data == NULL) { + i_size_write(inode, position); + /*if (file->private_data == NULL) { rc = -EBADF; } else { - cifs_sb = CIFS_SB(inode->i_sb); open_file = (struct cifsFileInfo *)file->private_data; - rc = CIFSSMBSetFileSize(xid, cifs_sb->tcon, position, - open_file->netfid,open_file->pid,FALSE); + cifs_sb = CIFS_SB(inode->i_sb); + rc = -EAGAIN; + while(rc == -EAGAIN) { + if((open_file->invalidHandle) && + (!open_file->closePend)) { + rc = cifs_reopen_file(file->f_dentry->d_inode,file); + if(rc != 0) + break; + } + if(!open_file->closePend) { + rc = CIFSSMBSetFileSize(xid, cifs_sb->tcon, + position, open_file->netfid, + open_file->pid,FALSE); + } else { + rc = -EBADF; + break; + } + } cFYI(1,(" SetEOF (commit write) rc = %d",rc)); - } - } + }*/ + } set_page_dirty(page); FreeXid(xid); @@ -769,7 +784,7 @@ cifs_fsync(struct file *file, struct den return rc; } -static int +/* static int cifs_sync_page(struct page *page) { struct address_space *mapping; @@ -784,17 +799,17 @@ cifs_sync_page(struct page *page) return 0; inode = mapping->host; if (!inode) - return 0; + return 0;*/ /* fill in rpages then result = cifs_pagein_inode(inode, index, rpages); *//* BB finish */ - cFYI(1, ("rpages is %d for sync page of Index %ld ", rpages, index)); +/* cFYI(1, ("rpages is %d for sync page of Index %ld ", rpages, index)); if (rc < 0) return rc; return 0; -} +} */ /* * As file closes, flush all cached write data for this inode checking @@ -837,6 +852,7 @@ cifs_read(struct file * file, char *read struct cifsTconInfo *pTcon; int xid; char * current_offset; + struct cifsFileInfo * open_file; xid = GetXid(); cifs_sb = CIFS_SB(file->f_dentry->d_sb); @@ -846,15 +862,24 @@ cifs_read(struct file * file, char *read FreeXid(xid); return -EBADF; } + open_file = (struct cifsFileInfo *)file->private_data; for (total_read = 0,current_offset=read_data; read_size > total_read; total_read += bytes_read,current_offset+=bytes_read) { current_read_size = min_t(const int,read_size - total_read,cifs_sb->rsize); - rc = CIFSSMBRead(xid, pTcon, - ((struct cifsFileInfo *) file-> - private_data)->netfid, + rc = -EAGAIN; + while(rc == -EAGAIN) { + if ((open_file->invalidHandle) && (!open_file->closePend)) { + rc = cifs_reopen_file(file->f_dentry->d_inode,file); + if(rc != 0) + break; + } + + rc = CIFSSMBRead(xid, pTcon, + open_file->netfid, current_read_size, *poffset, &bytes_read, ¤t_offset); + } if (rc || (bytes_read == 0)) { if (total_read) { break; @@ -862,8 +887,9 @@ cifs_read(struct file * file, char *read FreeXid(xid); return rc; } - } else + } else { *poffset += bytes_read; + } } FreeXid(xid); @@ -899,7 +925,6 @@ static void cifs_copy_cache_pages(struct break; page = list_entry(pages->prev, struct page, lru); - list_del(&page->lru); if (add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { @@ -908,24 +933,24 @@ static void cifs_copy_cache_pages(struct continue; } - page_cache_get(page); target = kmap_atomic(page,KM_USER0); if(PAGE_CACHE_SIZE > bytes_read) { memcpy(target,data,bytes_read); + /* zero the tail end of this partial page */ + memset(target+bytes_read,0,PAGE_CACHE_SIZE-bytes_read); bytes_read = 0; } else { memcpy(target,data,PAGE_CACHE_SIZE); bytes_read -= PAGE_CACHE_SIZE; } + kunmap_atomic(target,KM_USER0); - if (!pagevec_add(plru_pvec, page)) - __pagevec_lru_add(plru_pvec); flush_dcache_page(page); SetPageUptodate(page); - kunmap_atomic(target,KM_USER0); unlock_page(page); - page_cache_release(page); + if (!pagevec_add(plru_pvec, page)) + __pagevec_lru_add(plru_pvec); data += PAGE_CACHE_SIZE; } return; @@ -947,46 +972,78 @@ cifs_readpages(struct file *file, struct char * smb_read_data = 0; struct smb_com_read_rsp * pSMBr; struct pagevec lru_pvec; + struct cifsFileInfo * open_file; xid = GetXid(); if (file->private_data == NULL) { FreeXid(xid); return -EBADF; } - + open_file = (struct cifsFileInfo *)file->private_data; cifs_sb = CIFS_SB(file->f_dentry->d_sb); pTcon = cifs_sb->tcon; pagevec_init(&lru_pvec, 0); for(i = 0;iprev, struct page, lru); offset = (loff_t)page->index << PAGE_CACHE_SHIFT; + /* count adjacent pages that we will read into */ + contig_pages = 0; + expected_index = list_entry(page_list->prev,struct page,lru)->index; + list_for_each_entry_reverse(tmp_page,page_list,lru) { + if(tmp_page->index == expected_index) { + contig_pages++; + expected_index++; + } else { + break; + } + } + if(contig_pages + i > num_pages) { + contig_pages = num_pages - i; + } + /* for reads over a certain size could initiate async read ahead */ - cFYI(0,("Read %d pages into cache at offset %ld ", - num_pages-i, (unsigned long) offset)); - - read_size = (num_pages - i) * PAGE_CACHE_SIZE; + read_size = contig_pages * PAGE_CACHE_SIZE; /* Read size needs to be in multiples of one page */ read_size = min_t(const unsigned int,read_size,cifs_sb->rsize & PAGE_CACHE_MASK); - rc = CIFSSMBRead(xid, pTcon, - ((struct cifsFileInfo *) file-> - private_data)->netfid, - read_size, offset, - &bytes_read, &smb_read_data); + rc = -EAGAIN; + while(rc == -EAGAIN) { + if ((open_file->invalidHandle) && (!open_file->closePend)) { + rc = cifs_reopen_file(file->f_dentry->d_inode,file); + if(rc != 0) + break; + } + + rc = CIFSSMBRead(xid, pTcon, + open_file->netfid, + read_size, offset, + &bytes_read, &smb_read_data); + /* BB need to check return code here */ + if(rc== -EAGAIN) { + if(smb_read_data) { + cifs_buf_release(smb_read_data); + smb_read_data = 0; + } + } + } if ((rc < 0) || (smb_read_data == NULL)) { cFYI(1,("Read error in readpages: %d",rc)); /* clean up remaing pages off list */ - while (!list_empty(page_list) && (i < num_pages)) { - page = list_entry(page_list->prev, - struct page, lru); + page = list_entry(page_list->prev, struct page, lru); list_del(&page->lru); + page_cache_release(page); } break; } else if (bytes_read > 0) { @@ -994,24 +1051,37 @@ cifs_readpages(struct file *file, struct cifs_copy_cache_pages(mapping, page_list, bytes_read, smb_read_data + 4 /* RFC1000 hdr */ + le16_to_cpu(pSMBr->DataOffset), &lru_pvec); + i += bytes_read >> PAGE_CACHE_SHIFT; - if((bytes_read & PAGE_CACHE_MASK) != bytes_read) { + + if((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) { cFYI(1,("Partial page %d of %d read to cache",i++,num_pages)); - break; + + i++; /* account for partial page */ + + /* server copy of file can have smaller size than client */ + /* BB do we need to verify this common case ? this case is ok - + if we are at server EOF we will hit it on next read */ + + /* while(!list_empty(page_list) && (i < num_pages)) { + page = list_entry(page_list->prev,struct page, list); + list_del(&page->list); + page_cache_release(page); + } + break; */ } } else { - cFYI(1,("No bytes read cleaning remaining pages off readahead list")); + cFYI(1,("No bytes read (%d) at offset %lld . Cleaning remaining pages from readahead list",bytes_read,offset)); /* BB turn off caching and do new lookup on file size at server? */ while (!list_empty(page_list) && (i < num_pages)) { - page = list_entry(page_list->prev, - struct page, lru); + page = list_entry(page_list->prev, struct page, lru); list_del(&page->lru); + page_cache_release(page); /* BB removeme - replace with zero of page? */ } - break; } if(smb_read_data) { - buf_release(smb_read_data); + cifs_buf_release(smb_read_data); smb_read_data = 0; } bytes_read = 0; @@ -1019,6 +1089,12 @@ cifs_readpages(struct file *file, struct pagevec_lru_add(&lru_pvec); +/* need to free smb_read_data buf before exit */ + if(smb_read_data) { + cifs_buf_release(smb_read_data); + smb_read_data = 0; + } + FreeXid(xid); return rc; } @@ -1122,7 +1198,7 @@ fill_in_inode(struct inode *tmp_inode, }/* could add code here - to validate if device or weird share type? */ /* can not fill in nlink here as in qpathinfo version and Unx search */ - tmp_inode->i_size = pfindData->EndOfFile; + i_size_write(tmp_inode,pfindData->EndOfFile); tmp_inode->i_blocks = (tmp_inode->i_blksize - 1 + pfindData->AllocationSize) >> tmp_inode->i_blkbits; if (pfindData->AllocationSize < pfindData->EndOfFile) @@ -1196,7 +1272,7 @@ unix_fill_in_inode(struct inode *tmp_ino pfindData->NumOfBytes = le64_to_cpu(pfindData->NumOfBytes); pfindData->EndOfFile = le64_to_cpu(pfindData->EndOfFile); - tmp_inode->i_size = pfindData->EndOfFile; + i_size_write(tmp_inode,pfindData->EndOfFile); tmp_inode->i_blocks = (tmp_inode->i_blksize - 1 + pfindData->NumOfBytes) >> tmp_inode->i_blkbits; @@ -1220,7 +1296,7 @@ unix_fill_in_inode(struct inode *tmp_ino } } -void +static void construct_dentry(struct qstr *qstring, struct file *file, struct inode **ptmp_inode, struct dentry **pnew_dentry) { @@ -1244,6 +1320,11 @@ construct_dentry(struct qstr *qstring, s } } else { tmp_dentry = d_alloc(file->f_dentry, qstring); + if(tmp_dentry == NULL) { + cERROR(1,("Failed allocating dentry")); + return; + } + *ptmp_inode = new_inode(file->f_dentry->d_sb); tmp_dentry->d_op = &cifs_dentry_ops; cFYI(0, (" instantiate dentry 0x%p with inode 0x%p ", @@ -1256,13 +1337,44 @@ construct_dentry(struct qstr *qstring, s *pnew_dentry = tmp_dentry; } -void +static void reset_resume_key(struct file * dir_file, + unsigned char * filename, + unsigned int len,int Unicode,struct nls_table * nls_tab) { + struct cifsFileInfo *cifsFile; + + cifsFile = (struct cifsFileInfo *)dir_file->private_data; + if(cifsFile == NULL) + return; + if(cifsFile->search_resume_name) { + kfree(cifsFile->search_resume_name); + } + + if(Unicode) + len *= 2; + cifsFile->resume_name_length = len; + + cifsFile->search_resume_name = + kmalloc(cifsFile->resume_name_length, GFP_KERNEL); + + if(Unicode) + cifs_strtoUCS((wchar_t *) cifsFile->search_resume_name, + filename, len, nls_tab); + else + memcpy(cifsFile->search_resume_name, filename, + cifsFile->resume_name_length); + cFYI(1,("Reset resume key to: %s with len %d",filename,len)); + return; +} + + + +static int cifs_filldir(struct qstr *pqstring, FILE_DIRECTORY_INFO * pfindData, struct file *file, filldir_t filldir, void *direntry) { struct inode *tmp_inode; struct dentry *tmp_dentry; - int object_type; + int object_type,rc; pqstring->name = pfindData->FileName; pqstring->len = pfindData->FileNameLength; @@ -1270,19 +1382,25 @@ cifs_filldir(struct qstr *pqstring, FILE construct_dentry(pqstring, file, &tmp_inode, &tmp_dentry); fill_in_inode(tmp_inode, pfindData, &object_type); - filldir(direntry, pfindData->FileName, pqstring->len, file->f_pos, + rc = filldir(direntry, pfindData->FileName, pqstring->len, file->f_pos, tmp_inode->i_ino, object_type); + if(rc) { + /* due to readdir error we need to recalculate resume + key so next readdir will restart on right entry */ + cFYI(1,("Error %d on filldir of %s",rc ,pfindData->FileName)); + } dput(tmp_dentry); + return rc; } -void +static int cifs_filldir_unix(struct qstr *pqstring, FILE_UNIX_INFO * pUnixFindData, struct file *file, filldir_t filldir, void *direntry) { struct inode *tmp_inode; struct dentry *tmp_dentry; - int object_type; + int object_type, rc; pqstring->name = pUnixFindData->FileName; pqstring->len = strnlen(pUnixFindData->FileName, MAX_PATHCONF); @@ -1290,9 +1408,15 @@ cifs_filldir_unix(struct qstr *pqstring, construct_dentry(pqstring, file, &tmp_inode, &tmp_dentry); unix_fill_in_inode(tmp_inode, pUnixFindData, &object_type); - filldir(direntry, pUnixFindData->FileName, pqstring->len, + rc = filldir(direntry, pUnixFindData->FileName, pqstring->len, file->f_pos, tmp_inode->i_ino, object_type); + if(rc) { + /* due to readdir error we need to recalculate resume + key so next readdir will restart on right entry */ + cFYI(1,("Error %d on filldir of %s",rc ,pUnixFindData->FileName)); + } dput(tmp_dentry); + return rc; } int @@ -1378,8 +1502,7 @@ cifs_readdir(struct file *file, void *di searchHandle = findParms.SearchHandle; if(file->private_data == NULL) file->private_data = - kmalloc(sizeof(struct cifsFileInfo), - GFP_KERNEL); + kmalloc(sizeof(struct cifsFileInfo),GFP_KERNEL); if (file->private_data) { memset(file->private_data, 0, sizeof (struct cifsFileInfo)); @@ -1387,6 +1510,7 @@ cifs_readdir(struct file *file, void *di (struct cifsFileInfo *) file->private_data; cifsFile->netfid = searchHandle; cifsFile->invalidHandle = FALSE; + init_MUTEX(&cifsFile->fh_sem); } else { rc = -ENOMEM; break; @@ -1471,10 +1595,18 @@ cifs_readdir(struct file *file, void *di FileName[0] != '.') || (pfindData-> FileName[1] != '.'))) { - cifs_filldir(&qstring, + if(cifs_filldir(&qstring, pfindData, file, filldir, - direntry); + direntry)) { + /* do not end search if + kernel not ready to take + remaining entries yet */ + reset_resume_key(file, pfindData->FileName,qstring.len, + Unicode, cifs_sb->local_nls); + findParms.EndofSearch = 0; + break; + } file->f_pos++; } } else { /* UnixSearch */ @@ -1501,11 +1633,19 @@ cifs_readdir(struct file *file, void *di FileName[0] != '.') || (pfindDataUnix-> FileName[1] != '.'))) { - cifs_filldir_unix(&qstring, + if(cifs_filldir_unix(&qstring, pfindDataUnix, file, filldir, - direntry); + direntry)) { + /* do not end search if + kernel not ready to take + remaining entries yet */ + findParms.EndofSearch = 0; + reset_resume_key(file, pfindDataUnix->FileName, + qstring.len,Unicode,cifs_sb->local_nls); + break; + } file->f_pos++; } } @@ -1573,6 +1713,11 @@ cifs_readdir(struct file *file, void *di rc = -ENOMEM; break; } + /* Free the memory allocated by previous findfirst + or findnext call - we can not reuse the memory since + the resume name may not be same string length */ + if(cifsFile->search_resume_name) + kfree(cifsFile->search_resume_name); cifsFile->search_resume_name = kmalloc(cifsFile->resume_name_length, GFP_KERNEL); cFYI(1,("Last file: %s with name %d bytes long", @@ -1603,6 +1748,11 @@ cifs_readdir(struct file *file, void *di rc = -ENOMEM; break; } + /* Free the memory allocated by previous findfirst + or findnext call - we can not reuse the memory since + the resume name may not be same string length */ + if(cifsFile->search_resume_name) + kfree(cifsFile->search_resume_name); cifsFile->search_resume_name = kmalloc(cifsFile->resume_name_length, GFP_KERNEL); cFYI(1,("fnext last file: %s with name %d bytes long", @@ -1634,11 +1784,19 @@ cifs_readdir(struct file *file, void *di || (pfindData->FileName[0] != '.') || (pfindData->FileName[1] != '.'))) { - cifs_filldir + if(cifs_filldir (&qstring, pfindData, file, filldir, - direntry); + direntry)) { + /* do not end search if + kernel not ready to take + remaining entries yet */ + findNextParms.EndofSearch = 0; + reset_resume_key(file, pfindData->FileName,qstring.len, + Unicode,cifs_sb->local_nls); + break; + } file->f_pos++; } } else { /* UnixSearch */ @@ -1668,11 +1826,19 @@ cifs_readdir(struct file *file, void *di || (pfindDataUnix-> FileName[1] != '.'))) { - cifs_filldir_unix + if(cifs_filldir_unix (&qstring, pfindDataUnix, file, filldir, - direntry); + direntry)) { + /* do not end search if + kernel not ready to take + remaining entries yet */ + findNextParms.EndofSearch = 0; + reset_resume_key(file, pfindDataUnix->FileName,qstring.len, + Unicode,cifs_sb->local_nls); + break; + } file->f_pos++; } } @@ -1696,34 +1862,31 @@ cifs_readdir(struct file *file, void *di return rc; } +int cifs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + cFYI(1,("prepare write for page %p from %d to %d",page,from,to)); + if (!PageUptodate(page)) { + if (to - from != PAGE_CACHE_SIZE) { + void *kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr, 0, from); + memset(kaddr + to, 0, PAGE_CACHE_SIZE - to); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + SetPageUptodate(page); + } + return 0; +} -struct address_space_operations cifs_addr_ops = { - .readpage = cifs_readpage, - .readpages = cifs_readpages, - .writepage = cifs_writepage, - .prepare_write = simple_prepare_write, - .commit_write = cifs_commit_write, - .sync_page = cifs_sync_page, - /*.direct_IO = */ -}; - -struct address_space_operations cifs_addr_ops_writethrough = { - .readpage = cifs_readpage, - .readpages = cifs_readpages, - .writepage = cifs_writepage, - .prepare_write = simple_prepare_write, - .commit_write = cifs_commit_write, - .sync_page = cifs_sync_page, - /*.direct_IO = */ -}; -struct address_space_operations cifs_addr_ops_nocache = { +struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, .readpages = cifs_readpages, .writepage = cifs_writepage, - .prepare_write = simple_prepare_write, + .prepare_write = simple_prepare_write, /* BB fixme BB */ +/* .prepare_write = cifs_prepare_write, */ /* BB removeme BB */ .commit_write = cifs_commit_write, - .sync_page = cifs_sync_page, + /* .sync_page = cifs_sync_page, */ /*.direct_IO = */ }; - --- linux-2.6.6-rc1/fs/cifs/inode.c 2003-09-08 13:58:58.000000000 -0700 +++ 25/fs/cifs/inode.c 2004-04-18 22:25:28.631477832 -0700 @@ -125,7 +125,7 @@ cifs_get_inode_info_unix(struct inode ** inode->i_nlink = le64_to_cpu(findData.Nlinks); findData.NumOfBytes = le64_to_cpu(findData.NumOfBytes); findData.EndOfFile = le64_to_cpu(findData.EndOfFile); - inode->i_size = findData.EndOfFile; + i_size_write(inode,findData.EndOfFile); /* blksize needs to be multiple of two. So safer to default to blksize and blkbits set in superblock so 2**blkbits and blksize will match */ /* inode->i_blksize = @@ -204,10 +204,10 @@ cifs_get_inode_info(struct inode **pinod strnlen(search_path, MAX_PATHCONF) + 1, GFP_KERNEL); if (tmp_path == NULL) { - if(buf) - kfree(buf); - FreeXid(xid); - return -ENOMEM; + if(buf) + kfree(buf); + FreeXid(xid); + return -ENOMEM; } strncpy(tmp_path, pTcon->treeName, MAX_TREE_SIZE); @@ -218,10 +218,10 @@ cifs_get_inode_info(struct inode **pinod kfree(tmp_path); /* BB fix up inode etc. */ } else if (rc) { - if(buf) - kfree(buf); - FreeXid(xid); - return rc; + if(buf) + kfree(buf); + FreeXid(xid); + return rc; } } else { struct cifsInodeInfo *cifsInfo; @@ -272,10 +272,10 @@ cifs_get_inode_info(struct inode **pinod inode->i_mode &= ~(S_IWUGO); /* BB add code here - validate if device or weird share or device type? */ } - inode->i_size = le64_to_cpu(pfindData->EndOfFile); + i_size_write(inode,le64_to_cpu(pfindData->EndOfFile)); pfindData->AllocationSize = le64_to_cpu(pfindData->AllocationSize); inode->i_blocks = - (inode->i_blksize - 1 + pfindData->AllocationSize) >> inode->i_blkbits; + (inode->i_blksize - 1 + pfindData->AllocationSize) >> inode->i_blkbits; inode->i_nlink = le32_to_cpu(pfindData->NumberOfLinks); @@ -380,8 +380,8 @@ cifs_unlink(struct inode *inode, struct __u16 netfid; rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE, - CREATE_NOT_DIR | CREATE_DELETE_ON_CLOSE, - &netfid, &oplock, NULL, cifs_sb->local_nls); + CREATE_NOT_DIR | CREATE_DELETE_ON_CLOSE, + &netfid, &oplock, NULL, cifs_sb->local_nls); if(rc==0) { CIFSSMBRenameOpenFile(xid,pTcon,netfid,NULL,cifs_sb->local_nls); CIFSSMBClose(xid, pTcon, netfid); @@ -426,6 +426,7 @@ cifs_mkdir(struct inode *inode, struct d rc = CIFSSMBMkDir(xid, pTcon, full_path, cifs_sb->local_nls); if (rc) { cFYI(1, ("cifs_mkdir returned 0x%x ", rc)); + d_drop(direntry); } else { inode->i_nlink++; if (pTcon->ses->capabilities & CAP_UNIX) @@ -479,7 +480,7 @@ cifs_rmdir(struct inode *inode, struct d if (!rc) { inode->i_nlink--; - direntry->d_inode->i_size = 0; + i_size_write(direntry->d_inode,0); direntry->d_inode->i_nlink = 0; } @@ -530,17 +531,17 @@ cifs_rename(struct inode *source_inode, } if((rc == -EIO)||(rc == -EEXIST)) { - int oplock = FALSE; - __u16 netfid; + int oplock = FALSE; + __u16 netfid; - rc = CIFSSMBOpen(xid, pTcon, fromName, FILE_OPEN, GENERIC_READ, - CREATE_NOT_DIR, - &netfid, &oplock, NULL, cifs_sb_source->local_nls); - if(rc==0) { - CIFSSMBRenameOpenFile(xid,pTcon,netfid, - toName, cifs_sb_source->local_nls); - CIFSSMBClose(xid, pTcon, netfid); - } + rc = CIFSSMBOpen(xid, pTcon, fromName, FILE_OPEN, GENERIC_READ, + CREATE_NOT_DIR, + &netfid, &oplock, NULL, cifs_sb_source->local_nls); + if(rc==0) { + CIFSSMBRenameOpenFile(xid,pTcon,netfid, + toName, cifs_sb_source->local_nls); + CIFSSMBClose(xid, pTcon, netfid); + } } if (fromName) kfree(fromName); @@ -559,6 +560,21 @@ cifs_revalidate(struct dentry *direntry) char *full_path; struct cifs_sb_info *cifs_sb; struct cifsInodeInfo *cifsInode; + loff_t local_size; + struct timespec local_mtime; + int invalidate_inode = FALSE; + + if(direntry->d_inode == NULL) + return -ENOENT; + + cifsInode = CIFS_I(direntry->d_inode); + + if(cifsInode == NULL) + return -ENOENT; + + /* no sense revalidating inode info on file that no one can write */ + if(CIFS_I(direntry->d_inode)->clientCanCacheRead) + return rc; xid = GetXid(); @@ -571,10 +587,6 @@ cifs_revalidate(struct dentry *direntry) direntry->d_inode->i_count.counter, direntry, direntry->d_time, jiffies)); - - cifsInode = CIFS_I(direntry->d_inode); - /* BB add check - do not need to revalidate oplocked files */ - if (time_before(jiffies, cifsInode->time + HZ) && lookupCacheEnabled) { if((S_ISREG(direntry->d_inode->i_mode) == 0) || (direntry->d_inode->i_nlink == 1)) { @@ -586,6 +598,10 @@ cifs_revalidate(struct dentry *direntry) cFYI(1,("Have to revalidate file due to hardlinks")); } } + + /* save mtime and size */ + local_mtime = direntry->d_inode->i_mtime; + local_size = direntry->d_inode->i_size; if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) { rc = cifs_get_inode_info_unix(&direntry->d_inode, full_path, @@ -606,8 +622,43 @@ cifs_revalidate(struct dentry *direntry) } /* should we remap certain errors, access denied?, to zero */ - /* BB if not oplocked, invalidate inode pages if mtime has changed */ + /* if not oplocked, we invalidate inode pages if mtime + or file size had changed on server */ + + if(timespec_equal(&local_mtime,&direntry->d_inode->i_mtime) && + (local_size == direntry->d_inode->i_size)) { + cFYI(1,("cifs_revalidate - inode unchanged")); + } else { + /* file may have changed on server */ + if(cifsInode->clientCanCacheRead) { + /* no need to invalidate inode pages since we were + the only ones who could have modified the file and + the server copy is staler than ours */ + } else { + invalidate_inode = TRUE; + } + } + + /* need to write out dirty pages here */ + down(&direntry->d_inode->i_sem); + if(direntry->d_inode->i_mapping) { + /* do we need to lock inode until after invalidate completes below? */ + filemap_fdatawrite(direntry->d_inode->i_mapping); + } + if(invalidate_inode) { + filemap_fdatawait(direntry->d_inode->i_mapping); + /* may eventually have to do this for open files too */ + if(list_empty(&(cifsInode->openFileList))) { + /* Has changed on server - flush read ahead pages */ + cFYI(1,("Invalidating read ahead data on closed file")); + invalidate_remote_inode(direntry->d_inode); + } + } + + + up(&direntry->d_inode->i_sem); + if (full_path) kfree(full_path); FreeXid(xid); @@ -623,78 +674,25 @@ int cifs_getattr(struct vfsmount *mnt, s return err; } -void -cifs_truncate_file(struct inode *inode) -{ /* BB remove - may not need this function after all BB */ - int xid; +static int cifs_truncate_page(struct address_space *mapping, loff_t from) +{ + pgoff_t index = from >> PAGE_CACHE_SHIFT; + unsigned offset = from & (PAGE_CACHE_SIZE-1); + struct page *page; + char *kaddr; int rc = 0; - struct cifsFileInfo *open_file = NULL; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; - struct cifsInodeInfo *cifsInode; - struct dentry *dirent; - char *full_path = NULL; - - xid = GetXid(); - cifs_sb = CIFS_SB(inode->i_sb); - pTcon = cifs_sb->tcon; - - if (list_empty(&inode->i_dentry)) { - cERROR(1, - ("Can not get pathname from empty dentry in inode 0x%p ", - inode)); - FreeXid(xid); - return; - } - dirent = list_entry(inode->i_dentry.next, struct dentry, d_alias); - if (dirent) { - full_path = build_path_from_dentry(dirent); - rc = CIFSSMBSetEOF(xid, pTcon, full_path, inode->i_size,FALSE, - cifs_sb->local_nls); - cFYI(1,(" SetEOF (truncate) rc = %d",rc)); - if(rc == -ETXTBSY) { - cifsInode = CIFS_I(inode); - if(!list_empty(&(cifsInode->openFileList))) { - open_file = list_entry(cifsInode->openFileList.next, - struct cifsFileInfo, flist); - /* We could check if file is open for writing first */ - rc = CIFSSMBSetFileSize(xid, pTcon, inode->i_size, - open_file->netfid,open_file->pid,FALSE); - } else { - cFYI(1,(" No open files to get file handle from")); - } - } - if (!rc) - CIFSSMBSetEOF(xid,pTcon,full_path,inode->i_size,TRUE,cifs_sb->local_nls); - /* allocation size setting seems optional so ignore return code */ - } - if (full_path) - kfree(full_path); - FreeXid(xid); - return; -} - -static int cifs_trunc_page(struct address_space *mapping, loff_t from) -{ - pgoff_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); - struct page *page; - char *kaddr; - int rc = 0; - - page = grab_cache_page(mapping, index); - if (!page) - return -ENOMEM; - - kaddr = kmap_atomic(page, KM_USER0); - memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); - flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); - return rc; + page = grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + unlock_page(page); + page_cache_release(page); + return rc; } int @@ -705,6 +703,7 @@ cifs_setattr(struct dentry *direntry, st struct cifsTconInfo *pTcon; char *full_path = NULL; int rc = -EACCES; + int found = FALSE; struct cifsFileInfo *open_file = NULL; FILE_BASIC_INFO time_buf; int set_time = FALSE; @@ -712,6 +711,7 @@ cifs_setattr(struct dentry *direntry, st __u64 uid = 0xFFFFFFFFFFFFFFFFULL; __u64 gid = 0xFFFFFFFFFFFFFFFFULL; struct cifsInodeInfo *cifsInode; + struct list_head * tmp; xid = GetXid(); @@ -726,32 +726,63 @@ cifs_setattr(struct dentry *direntry, st /* BB check if we need to refresh inode from server now ? BB */ - cFYI(1, (" Changing attributes 0x%x", attrs->ia_valid)); + /* need to flush data before changing file size on server */ + filemap_fdatawrite(direntry->d_inode->i_mapping); + filemap_fdatawait(direntry->d_inode->i_mapping); if (attrs->ia_valid & ATTR_SIZE) { - rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size,FALSE, - cifs_sb->local_nls); - cFYI(1,(" SetEOF (setattrs) rc = %d",rc)); + read_lock(&GlobalSMBSeslock); + /* To avoid spurious oplock breaks from server, in the case + of inodes that we already have open, avoid doing path + based setting of file size if we can do it by handle. + This keeps our caching token (oplock) and avoids + timeouts when the local oplock break takes longer to flush + writebehind data than the SMB timeout for the SetPathInfo + request would allow */ + list_for_each(tmp, &cifsInode->openFileList) { + open_file = list_entry(tmp,struct cifsFileInfo, flist); + /* We check if file is open for writing first */ + if((open_file->pfile) && + ((open_file->pfile->f_flags & O_RDWR) || + (open_file->pfile->f_flags & O_WRONLY))) { + if(open_file->invalidHandle == FALSE) { + /* we found a valid, writeable network file + handle to use to try to set the file size */ + __u16 nfid = open_file->netfid; + __u32 npid = open_file->pid; + read_unlock(&GlobalSMBSeslock); + found = TRUE; + rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, + nfid,npid,FALSE); + cFYI(1,("SetFileSize by handle (setattrs) rc = %d",rc)); + /* Do not need reopen and retry on EAGAIN since we will + retry by pathname below */ - if(rc == -ETXTBSY) { - if(!list_empty(&(cifsInode->openFileList))) { - open_file = list_entry(cifsInode->openFileList.next, - struct cifsFileInfo, flist); - /* We could check if file is open for writing first */ - rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, - open_file->netfid,open_file->pid,FALSE); - } else { - cFYI(1,(" No open files to get file handle from")); + break; /* now that we found one valid file handle no + sense continuing to loop trying others */ + } } } - /* For Allocation Size - do not need to call the following - it did not hurt if it fails but why bother */ - /* CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, TRUE, cifs_sb->local_nls);*/ + if(found == FALSE) { + read_unlock(&GlobalSMBSeslock); + } + + + if(rc != 0) { + /* Set file size by pathname rather than by handle either + because no valid, writeable file handle for it was found or + because there was an error setting it by handle */ + rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size,FALSE, + cifs_sb->local_nls); + cFYI(1,(" SetEOF by path (setattrs) rc = %d",rc)); + } + + /* Server is ok setting allocation size implicitly - no need to call: */ + /*CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, TRUE, cifs_sb->local_nls);*/ + if (rc == 0) { rc = vmtruncate(direntry->d_inode, attrs->ia_size); - cifs_trunc_page(direntry->d_inode->i_mapping, direntry->d_inode->i_size); - -/* cFYI(1,("truncate_page to 0x%lx \n",direntry->d_inode->i_size)); */ + cifs_truncate_page(direntry->d_inode->i_mapping, direntry->d_inode->i_size); } } if (attrs->ia_valid & ATTR_UID) { @@ -816,6 +847,8 @@ cifs_setattr(struct dentry *direntry, st /* BB what if setting one attribute fails (such as size) but time setting works */ time_buf.CreationTime = 0; /* do not change */ + /* In the future we should experiment - try setting timestamps + via Handle (SetFileInfo) instead of by path */ rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf, cifs_sb->local_nls); } @@ -831,8 +864,7 @@ cifs_setattr(struct dentry *direntry, st void cifs_delete_inode(struct inode *inode) { - /* Note: called without the big kernel filelock - remember spinlocks! */ cFYI(1, ("In cifs_delete_inode, inode = 0x%p ", inode)); - /* may have to add back in when safe distributed caching of - directories via e.g. FindNotify added */ + /* may have to add back in if and when safe distributed caching of + directories added e.g. via FindNotify */ } --- linux-2.6.6-rc1/fs/cifs/link.c 2003-09-08 13:58:58.000000000 -0700 +++ 25/fs/cifs/link.c 2004-04-18 22:25:28.632477680 -0700 @@ -96,6 +96,8 @@ cifs_follow_link(struct dentry *direntry pTcon = cifs_sb->tcon; target_path = kmalloc(PATH_MAX, GFP_KERNEL); if(target_path == NULL) { + if (full_path) + kfree(full_path); FreeXid(xid); return -ENOMEM; } @@ -212,6 +214,8 @@ cifs_readlink(struct dentry *direntry, c len = buflen; tmpbuffer = kmalloc(len,GFP_KERNEL); if(tmpbuffer == NULL) { + if (full_path) + kfree(full_path); FreeXid(xid); return -ENOMEM; } @@ -251,10 +255,11 @@ cifs_readlink(struct dentry *direntry, c cFYI(1,("num referral: %d",num_referrals)); if(referrals) { cFYI(1,("referral string: %s ",referrals)); - strncpy(tmpbuffer, referrals, len-1); + strncpy(tmpbuffer, referrals, len-1); } } - + if(referrals) + kfree(referrals); kfree(tmp_path); if(referrals) { kfree(referrals); --- linux-2.6.6-rc1/fs/cifs/md4.c 2003-06-14 12:18:28.000000000 -0700 +++ 25/fs/cifs/md4.c 2004-04-18 22:25:28.633477528 -0700 @@ -3,7 +3,7 @@ Version 1.9. a implementation of MD4 designed for use in the SMB authentication protocol Copyright (C) Andrew Tridgell 1997-1998. - Modified by Steve French (sfrench@us.ibm.com) 2002 + Modified by Steve French (sfrench@us.ibm.com) 2002-2003 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -21,13 +21,7 @@ */ #include #include - -/* NOTE: This code makes no attempt to be fast! - - It assumes that a int is at least 32 bits long -*/ - -static __u32 A, B, C, D; +/* NOTE: This code makes no attempt to be fast! */ static __u32 F(__u32 X, __u32 Y, __u32 Z) @@ -54,25 +48,26 @@ lshift(__u32 x, int s) return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s)); } -#define ROUND1(a,b,c,d,k,s) a = lshift(a + F(b,c,d) + X[k], s) -#define ROUND2(a,b,c,d,k,s) a = lshift(a + G(b,c,d) + X[k] + (__u32)0x5A827999,s) -#define ROUND3(a,b,c,d,k,s) a = lshift(a + H(b,c,d) + X[k] + (__u32)0x6ED9EBA1,s) +#define ROUND1(a,b,c,d,k,s) (*a) = lshift((*a) + F(*b,*c,*d) + X[k], s) +#define ROUND2(a,b,c,d,k,s) (*a) = lshift((*a) + G(*b,*c,*d) + X[k] + (__u32)0x5A827999,s) +#define ROUND3(a,b,c,d,k,s) (*a) = lshift((*a) + H(*b,*c,*d) + X[k] + (__u32)0x6ED9EBA1,s) /* this applies md4 to 64 byte chunks */ static void -mdfour64(__u32 * M) +mdfour64(__u32 * M, __u32 * A, __u32 *B, __u32 * C, __u32 *D) { int j; __u32 AA, BB, CC, DD; __u32 X[16]; + for (j = 0; j < 16; j++) X[j] = M[j]; - AA = A; - BB = B; - CC = C; - DD = D; + AA = *A; + BB = *B; + CC = *C; + DD = *D; ROUND1(A, B, C, D, 0, 3); ROUND1(D, A, B, C, 1, 7); @@ -125,15 +120,15 @@ mdfour64(__u32 * M) ROUND3(C, D, A, B, 7, 11); ROUND3(B, C, D, A, 15, 15); - A += AA; - B += BB; - C += CC; - D += DD; - - A &= 0xFFFFFFFF; - B &= 0xFFFFFFFF; - C &= 0xFFFFFFFF; - D &= 0xFFFFFFFF; + *A += AA; + *B += BB; + *C += CC; + *D += DD; + + *A &= 0xFFFFFFFF; + *B &= 0xFFFFFFFF; + *C &= 0xFFFFFFFF; + *D &= 0xFFFFFFFF; for (j = 0; j < 16; j++) X[j] = 0; @@ -166,15 +161,14 @@ mdfour(unsigned char *out, unsigned char __u32 M[16]; __u32 b = n * 8; int i; - - A = 0x67452301; - B = 0xefcdab89; - C = 0x98badcfe; - D = 0x10325476; + __u32 A = 0x67452301; + __u32 B = 0xefcdab89; + __u32 C = 0x98badcfe; + __u32 D = 0x10325476; while (n > 64) { copy64(M, in); - mdfour64(M); + mdfour64(M,&A,&B, &C, &D); in += 64; n -= 64; } @@ -187,13 +181,13 @@ mdfour(unsigned char *out, unsigned char if (n <= 55) { copy4(buf + 56, b); copy64(M, buf); - mdfour64(M); + mdfour64(M, &A, &B, &C, &D); } else { copy4(buf + 120, b); copy64(M, buf); - mdfour64(M); + mdfour64(M, &A, &B, &C, &D); copy64(M, buf + 64); - mdfour64(M); + mdfour64(M, &A, &B, &C, &D); } for (i = 0; i < 128; i++) --- linux-2.6.6-rc1/fs/cifs/misc.c 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/misc.c 2004-04-18 22:25:28.634477376 -0700 @@ -25,6 +25,8 @@ #include "cifsglob.h" #include "cifsproto.h" #include "cifs_debug.h" +#include "smberr.h" +#include "nterr.h" extern kmem_cache_t *cifs_req_cachep; extern struct task_struct * oplockThread; @@ -99,6 +101,8 @@ sesInfoFree(struct cifsSesInfo *buf_to_f kfree(buf_to_free->serverDomain); if (buf_to_free->serverNOS) kfree(buf_to_free->serverNOS); + if (buf_to_free->password) + kfree(buf_to_free->password); kfree(buf_to_free); } @@ -139,20 +143,10 @@ tconInfoFree(struct cifsTconInfo *buf_to kfree(buf_to_free); } -void * -kcalloc(size_t size, int type) -{ - void *addr; - addr = kmalloc(size, type); - if (addr) - memset(addr, 0, size); - return addr; -} - struct smb_hdr * -buf_get(void) +cifs_buf_get(void) { - struct smb_hdr *ret_buf; + struct smb_hdr *ret_buf = 0; /* We could use negotiated size instead of max_msgsize - but it may be more efficient to always alloc same size @@ -171,11 +165,11 @@ buf_get(void) } void -buf_release(void *buf_to_free) +cifs_buf_release(void *buf_to_free) { if (buf_to_free == NULL) { - cFYI(1, ("Null buffer passed to buf_release")); + cFYI(1, ("Null buffer passed to cifs_buf_release")); return; } kmem_cache_free(cifs_req_cachep, buf_to_free); @@ -267,7 +261,7 @@ header_assemble(struct smb_hdr *buffer, buffer->Uid = ses->Suid; break; } else { - /* BB eventually call setup_session here */ + /* BB eventually call cifs_setup_session here */ cFYI(1,("local UID found but smb sess with this server does not exist")); } } @@ -324,8 +318,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, ("Entering checkSMB with Length: %x, smb_buf_length: %x ", length, ntohl(smb->smb_buf_length))); if (((unsigned int)length < 2 + sizeof (struct smb_hdr)) - || (4 + ntohl(smb->smb_buf_length) > - CIFS_MAX_MSGSIZE + MAX_CIFS_HDR_SIZE)) { + || (ntohl(smb->smb_buf_length) > + CIFS_MAX_MSGSIZE + MAX_CIFS_HDR_SIZE - 4)) { if ((unsigned int)length < 2 + sizeof (struct smb_hdr)) { cERROR(1, ("Length less than 2 + sizeof smb_hdr ")); if (((unsigned int)length >= sizeof (struct smb_hdr) - 1) @@ -333,8 +327,8 @@ checkSMB(struct smb_hdr *smb, __u16 mid, return 0; /* some error cases do not return wct and bcc */ } - if (4 + ntohl(smb->smb_buf_length) > - CIFS_MAX_MSGSIZE + MAX_CIFS_HDR_SIZE) + if (ntohl(smb->smb_buf_length) > + CIFS_MAX_MSGSIZE + MAX_CIFS_HDR_SIZE - 4) cERROR(1, ("smb_buf_length greater than CIFS_MAX_MSGSIZE ... ")); cERROR(1, @@ -369,8 +363,22 @@ is_valid_oplock_break(struct smb_hdr *bu cFYI(1,("Checking for oplock break")); if(pSMB->hdr.Command != SMB_COM_LOCKING_ANDX) return FALSE; - if(pSMB->hdr.Flags & SMBFLG_RESPONSE) - return FALSE; /* server sends us "request" here */ + if(pSMB->hdr.Flags & SMBFLG_RESPONSE) { + /* no sense logging error on invalid handle on oplock + break - harmless race between close request and oplock + break response is expected from time to time writing out + large dirty files cached on the client */ + if ((NT_STATUS_INVALID_HANDLE) == + le32_to_cpu(pSMB->hdr.Status.CifsError)) { + cFYI(1,("invalid handle on oplock break")); + return TRUE; + } else if (ERRbadfid == + le16_to_cpu(pSMB->hdr.Status.DosError.Error)) { + return TRUE; + } else { + return FALSE; /* on valid oplock brk we get "request" */ + } + } if(pSMB->hdr.WordCount != 8) return FALSE; @@ -387,8 +395,6 @@ is_valid_oplock_break(struct smb_hdr *bu netfile = list_entry(tmp1,struct cifsFileInfo,tlist); if(pSMB->Fid == netfile->netfid) { struct cifsInodeInfo *pCifsInode; - /* BB Add following logic to mark inode for write through - inode->i_data.a_ops = &cifs_addr_ops_writethrough; */ read_unlock(&GlobalSMBSeslock); cFYI(1,("Matching file id, processing oplock break")); pCifsInode = --- linux-2.6.6-rc1/fs/cifs/netmisc.c 2003-06-22 12:04:44.000000000 -0700 +++ 25/fs/cifs/netmisc.c 2004-04-18 22:25:28.635477224 -0700 @@ -125,10 +125,10 @@ const struct smb_to_posix_error mapping_ /* Convert string containing dotted ip address to binary form */ /* returns 0 if invalid address */ -/* BB add address family, change rc to status flag and return *//* also see inet_pton */ -/* To identify v4 vs. v6 - 1) check for colon (v6 only) 2) then call inet_pton to parse for bad address */ +/* BB add address family, change rc to status flag and return union or for ipv6 */ +/* will need parent to call something like inet_pton to convert ipv6 address BB */ int -inet_addr(char *cp) +cifs_inet_pton(int address_family, char *cp,void *dst) { struct in_addr address; int value; @@ -140,6 +140,9 @@ inet_addr(char *cp) static const int addr_class_max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff }; + if(address_family != AF_INET) + return -EAFNOSUPPORT; + for (i = 0; i < 4; i++) { bytes[i] = 0; } @@ -166,6 +169,9 @@ inet_addr(char *cp) return 0; *end++ = value; temp = *++cp; + } else if (temp == ':') { + cFYI(1,("IPv6 addresses not supported for CIFS mounts yet")); + return -1; } else break; } @@ -182,8 +188,8 @@ inet_addr(char *cp) return 0; address.s_addr = *((int *) bytes) | htonl(value); - return address.s_addr; - + *((int *)dst) = address.s_addr; + return 1; /* success */ } /***************************************************************************** --- linux-2.6.6-rc1/fs/cifs/README 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/README 2004-04-18 22:25:28.597483000 -0700 @@ -1,30 +1,30 @@ -The CIFS VFS support for Linux supports many advanced network filesystem -features such as heirarchical dfs like namespace, hardlinks, locking and more. -It was designed to comply with the SNIA CIFS Technical Reference (which supersedes -the 1992 X/Open SMB Standard) as well as to perform best practice practical -interoperability with Windows 2000, Windows XP, Samba and equivalent +The CIFS VFS support for Linux supports many advanced network filesystem +features such as heirarchical dfs like namespace, hardlinks, locking and more. +It was designed to comply with the SNIA CIFS Technical Reference (which +supersedes the 1992 X/Open SMB Standard) as well as to perform best practice +practical interoperability with Windows 2000, Windows XP, Samba and equivalent servers. -For questions or bug reports please contact sfrench@samba.org (sfrench@us.ibm.com) +For questions or bug reports please contact: + sfrench@samba.org (sfrench@us.ibm.com) Build instructions: ================== For Linux 2.4: -1a) Get the linux kernel source with cifs vfs already in it -from bitkeeper via bk://cifs.bkbits.net/linux-2.4 -or -1b) Get the kernel source (e.g.from http://www.kernel.org) +1) Get the kernel source (e.g.from http://www.kernel.org) and download the cifs vfs source (see the project page at http://us1.samba.org/samba/Linux_CIFS_client.html) and change directory into the top of the kernel directory then patch the kernel (e.g. "patch -p1 < cifs_24.patch") to add the cifs vfs to your kernel configure options if it has not already been added (e.g. current SuSE and UL -users do not need to do not need that patch since the cifs vfs is +users do not need to apply the cifs_24.patch since the cifs vfs is already in the kernel configure menu) and then mkdir linux/fs/cifs and then copy the current cifs vfs files from the cifs download to your kernel build directory e.g. + cp /fs/cifs/* to /fs/cifs + 2) make menuconfig (or make xconfig) 3) select cifs from within the network filesystem choices 4) save and exit @@ -53,56 +53,105 @@ would simply type "make install"). If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on the CIFS VFS web site) copy it to the same directory in which mount.smbfs and -similar files reside (usually /sbin). Although the helper software is required, -mount.cifs is recommended. Eventually the Samba 3.0 utility program "net" -may also be helpful since it may someday provide easier mount syntax for users used -to Windows e.g. - net use -Note that running Winbind on all of your Linux clients is useful in -in mapping Uids and Gids consistently to the proper network user. - -Samba Considerations -==================== -To get the maximum benefit from the CIFS VFS, we recommend using a server that -supports the SNIA CIFS Unix Extensions standard (e.g. Samba 2.2.5 or later or -Samba 3.0) but the CIFS vfs works fine with a wide variety of CIFS servers. -Note that uid, gid and file permissions will display default values if you do -not have a server that supports the Unix extensions for CIFS (such as Samba 2.2.3 or -later). To enable the Unix CIFS Extensions in the Samba server, add the line: +similar files reside (usually /sbin). Although the helper software is not +required, mount.cifs is recommended. Eventually the Samba 3.0 utility program +"net" may also be helpful since it may someday provide easier mount syntax for +users who are used to Windows e.g. net use +Note that running the Winbind pam/nss module (logon service) on all of your +Linux clients is useful in mapping Uids and Gids consistently across the +domain to the proper network user. The mount.cifs mount helper can be +trivially built from Samba 3.0 or later source e.g. by executing: + + gcc samba/source/client/mount.cifs.c -o mount.cifs + +Note that when the mount.cifs utility is run suid (allowing user mounts), +in order to reduce risks, the "nosuid" mount flag is passed in on mount to +disallow execution of an suid program mounted on the remote target. +When mount is executed as root, nosuid is not passed in by default, +and execution of suid programs on the remote target would be enabled +by default. This can be changed, as with nfs and other filesystems, +by simply specifying "nosuid" among the mount options. For user mounts +though to be able to pass the suid flag to mount requires rebuilding +mount.cifs with the following flag: + + gcc samba/source/client/mount.cifs.c -DCIFS_ALLOW_USR_SUID -o mount.cifs + +There is a corresponding manual page for cifs mounting in the Samba 3.0 and +later source tree in docs/manpages/mount.cifs.8 + +Samba Considerations +==================== +To get the maximum benefit from the CIFS VFS, we recommend using a server that +supports the SNIA CIFS Unix Extensions standard (e.g. Samba 2.2.5 or later or +Samba 3.0) but the CIFS vfs works fine with a wide variety of CIFS servers. +Note that uid, gid and file permissions will display default values if you do +not have a server that supports the Unix extensions for CIFS (such as Samba +2.2.5 or later). To enable the Unix CIFS Extensions in the Samba server, add +the line: + unix extensions = yes -to your smb.conf file on the server. Note that the following smb.conf settings are -also useful (on the Samba server) when the majority of clients are Unix -or Linux: + +to your smb.conf file on the server. Note that the following smb.conf settings +are also useful (on the Samba server) when the majority of clients are Unix or +Linux: + case sensitive = yes - delete readonly = yes -Some administrators also change the "map archive" and the "create mask" parameters -from their default values. Creating special devices (mknod) remotely may require -specifying a mkdev function to Samba. For more information on these see the manual -pages ("man smb.conf") on the Samba server system. Note that the cifs vfs, unlike the -smbfs vfs, does not read the smb.conf on the client system (the few optional settings -are passed in on mount via -o parameters instead). Note that Samba 2.2.7 or later -includes a fix that allows the CIFS VFS to delete open files (required for strict -POSIX compliance). Windows Servers already supported this feature. + delete readonly = yes + +Some administrators also change the "map archive" and the "create mask" +parameters from their default values. Creating special devices (mknod) remotely +may require specifying a mkdev function to Samba. For more information on these +see the manual pages ("man smb.conf") on the Samba server system. Note that the +cifs vfs, unlike the smbfs vfs, does not read the smb.conf on the client system +(the few optional settings are passed in on mount via -o parameters instead). +Note that Samba 2.2.7 or later includes a fix that allows the CIFS VFS to delete +open files (required for strict POSIX compliance). Windows Servers already +supported this feature. Use instructions: ================ -Once the CIFS VFS support is built into the kernel or installed as a module (cifs.o), -you can use mount syntax like the following to access Samba or Windows servers: +Once the CIFS VFS support is built into the kernel or installed as a module +(cifs.o), you can use mount syntax like the following to access Samba or Windows +servers: + mount -t cifs //9.53.216.11/e$ /mnt -o user=myname,pass=mypassword -after -o the following cifs vfs specific options are supported: + +Before -o the option -v may be specified to make the mount.cifs +mount helper display the mount steps more verbosely. +After -o the following commonly used cifs vfs specific options +are supported: + user= pass= domain= -TCP names (in addition to ip addresses) will be available when the mount helper -(mount.cifs) is complete + +Other cifs mount options are described below. Use of TCP names (in addition to +ip addresses) is available if the mount helper (mount.cifs) is installed. If +you do not trust the server to which are mounted, or if you do not have +cifs signing enabled (and the physical network is insecure), consider use +of the standard mount options "noexec" and "nosuid" to reduce the risk of +running an altered binary on your local system (downloaded from a hostile server +or altered by a hostile router). + +When using the mount helper mount.cifs, passwords may be specified via alternate +mechanisms, instead of specifying it after -o using the normal "pass=" syntax +on the command line: +1) By including it in a credential file. Specify credentials=filename as one +of the mount options. Credential files contain two lines + username=someuser + password=your_password +2) By specifying the password in the PASSWD environment variable (similarly +the user name can be taken from the USER environment variable). + +If no password is provided, mount.cifs will prompt for password entry Restrictions ============ -Servers must support the NTLM SMB dialect (which is the most recent, supported by Samba -and Windows NT, 2000 and XP and many other SMB/CIFS servers) and servers must support -either "pure-TCP" (port 445 TCP/IP CIFS connections) or RFC 1001/1002 support for -"Netbios-Over-TCP/IP." Neither of these is likely to be a problem as most servers -support this. IPv6 support is planned for the future. +Servers must support the NTLM SMB dialect (which is the most recent, supported +by Samba and Windows NT, 2000 and XP and many other SMB/CIFS servers) and +servers must support either "pure-TCP" (port 445 TCP/IP CIFS connections) or RFC +1001/1002 support for "Netbios-Over-TCP/IP." Neither of these is likely to be a +problem as most servers support this. IPv6 support is planned for the future. CIFS VFS Mount Options ====================== @@ -141,54 +190,91 @@ A partial list of the supported mount op ro mount network share read-only version used to distinguish different versions of the mount helper utility (not typically needed) + sep if first mount option (after the -o), overrides + the comma as the separator between the mount + parms. e.g. + -o user=myname,password=mypassword,domain=mydom + could be passed instead with period as the separator by + -o sep=.user=myname.password=mypassword.domain=mydom + this might be useful when comma is contained within username + or password or domain. This option is less important + when the cifs mount helper cifs.mount (version 1.1 or later) + is used. + nosuid Do not allow remote executables with the suid bit + program to be executed. This is only meaningful for mounts + to servers such as Samba which support the CIFS Unix Extensions. + If you do not trust the servers in your network (your mount + targets) it is recommended that you specify this option for + greater security. + suid Allow remote files on this mountpoint with suid enabled to + be executed (default for mounts when executed as root, + nosuid is default for user mounts). + credentials (allow valid when the cifs mount helper, mount.cifs, is + installed. Specifies the name of the credential file which + will be read to obtain the userid and password. Misc /proc/fs/cifs Flags and Debug Info ======================================= Informational pseudo-files: - DebugData Displays information about active CIFS sessions - SimultaneousOps Counter which holds maximum number of +DebugData Displays information about active CIFS sessions +SimultaneousOps Counter which holds maximum number of simultaneous outstanding SMB/CIFS requests. - Stats Lists summary resource usage information +Stats Lists summary resource usage information Configuration pseudo-files: - MultiuserMount If set to one, more than one CIFS session to +MultiuserMount If set to one, more than one CIFS session to the same server ip address can be established if more than one uid accesses the same mount point and if the uids user/password mapping information is available. (default is 0) - PacketSigningEnabled If set to one, cifs packet signing is enabled +PacketSigningEnabled If set to one, cifs packet signing is enabled and will be used if the server requires it. If set to two, cifs packet signing is required even if the server considers packet signing optional. (default 1) - cifsFYI If set to one, additional debug information is +cifsFYI If set to one, additional debug information is logged to the system error log. (default 0) - ExtendedSecurity If set to one, SPNEGO session establishment +ExtendedSecurity If set to one, SPNEGO session establishment is allowed which enables more advanced secure CIFS session establishment (default 0) - NTLMV2Enabled If set to one, more secure password hashes +NTLMV2Enabled If set to one, more secure password hashes are used when the server supports them and when kerberos is not negotiated (default 0) - traceSMB If set to one, debug information is logged to the +traceSMB If set to one, debug information is logged to the system error log with the start of smb requests and responses (default 0) - LookupCacheEnable If set to one, inode information is kept cached +LookupCacheEnable If set to one, inode information is kept cached for one second improving performance of lookups (default 1) - OplockEnabled If set to one, safe distributed caching enabled. +OplockEnabled If set to one, safe distributed caching enabled. (default 1) +LinuxExtensionsEnabled If set to one then the client will attempt to + use the CIFS "UNIX" extensions which are optional + protocol enhancements that allow CIFS servers + to return accurate UID/GID information as well + as support symbolic links. If you use servers + such as Samba that support the CIFS Unix + extensions but do not want to use symbolic link + support and want to map the uid and gid fields + to values supplied at mount (rather than the + actual values, then set this to zero. (default 1) + +These experimental features and tracing can be enabled by changing flags in +/proc/fs/cifs (after the cifs module has been installed or built into the +kernel, e.g. insmod cifs). To enable a feature set it to 1 e.g. to enable +tracing to the kernel message log type: -These experimental features and tracing can be enabled by changing flags in /proc/fs/cifs -(after the cifs module has been installed or built into the kernel, e.g. insmod cifs). -To enable a feature set it to 1 e.g. to enable tracing to the kernel message log -type: echo 1 > /proc/fs/cifs/cifsFYI + and for more extensive tracing including the start of smb requests and responses + echo 1 > /proc/fs/cifs/traceSMB -Also note that "cat /proc/fs/cifs/DebugData" will display some information about the -active sessions and the shares that are mounted. NTLMv2 enablement and packet -signing will not work since they the implementation is not quite complete. Do not enable -these flags unless you are doing specific testing. Enabling extended security works to -Windows 2000 Workstations and XP but not to Windows 2000 server or Samba since it does not -usually send "raw NTLMSSP" (instead it sends NTLMSSP encapsulated in SPNEGO/GSSAPI, which -support is not complete in the CIFS VFS yet). + +Also note that "cat /proc/fs/cifs/DebugData" will display some information about +the active sessions and the shares that are mounted. Note: NTLMv2 enablement +will not work since they its implementation is not quite complete yet. +Do not alter these configuration values unless you are doing specific testing. +Enabling extended security works to Windows 2000 Workstations and XP but not to +Windows 2000 server or Samba since it does not usually send "raw NTLMSSP" +(instead it sends NTLMSSP encapsulated in SPNEGO/GSSAPI, which support is not +complete in the CIFS VFS yet). --- linux-2.6.6-rc1/fs/cifs/smbencrypt.c 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/smbencrypt.c 2004-04-18 22:25:28.636477072 -0700 @@ -23,8 +23,6 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -extern int DEBUGLEVEL; - #include #include #include @@ -45,9 +43,7 @@ extern int DEBUGLEVEL; /* following came from the other byteorder.h to avoid include conflicts */ #define CVAL(buf,pos) (((unsigned char *)(buf))[pos]) #define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) -#define SIVALX(buf,pos,val) (SSVALX(buf,pos,val&0xFFFF),SSVALX(buf,pos+2,val>>16)) #define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) -#define SIVAL(buf,pos,val) SIVALX((buf),(pos),((__u32)(val))) /*The following definitions come from lib/md4.c */ @@ -96,12 +92,6 @@ SMBencrypt(unsigned char *passwd, unsign SMBOWFencrypt(p21, c8, p24); -#ifdef DEBUG_PASSWORD - DEBUG(100, ("SMBencrypt: lm#, challenge, response\n")); - dump_data(100, (char *) p21, 16); - dump_data(100, (char *) c8, 8); - dump_data(100, (char *) p24, 24); -#endif memset(p14,0,15); memset(p21,0,21); } @@ -151,12 +141,17 @@ E_md4hash(const unsigned char *passwd, u __u16 wpwd[129]; /* Password cannot be longer than 128 characters */ - len = strlen((char *) passwd); - if (len > 128) - len = 128; - /* Password must be converted to NT unicode */ - _my_mbstowcs(wpwd, passwd, len); - wpwd[len] = 0; /* Ensure string is null terminated */ + if(passwd) { + len = strlen((char *) passwd); + if (len > 128) { + len = 128; + } + /* Password must be converted to NT unicode */ + _my_mbstowcs(wpwd, passwd, len); + } else + len = 0; + + wpwd[len] = 0; /* Ensure string is null terminated */ /* Calculate length in bytes */ len = _my_wcslen(wpwd) * sizeof (__u16); @@ -179,12 +174,6 @@ nt_lm_owf_gen(char *pwd, unsigned char n memset(nt_p16, '\0', 16); E_md4hash(passwd, nt_p16); -#ifdef DEBUG_PASSWORD - DEBUG(100, ("nt_lm_owf_gen: pwd, nt#\n")); - dump_data(120, passwd, strlen(passwd)); - dump_data(100, (char *) nt_p16, 16); -#endif - /* Mangle the passwords into Lanman format */ passwd[14] = '\0'; /* strupper(passwd); */ @@ -194,11 +183,6 @@ nt_lm_owf_gen(char *pwd, unsigned char n memset(p16, '\0', 16); E_P16((unsigned char *) passwd, (unsigned char *) p16); -#ifdef DEBUG_PASSWORD - DEBUG(100, ("nt_lm_owf_gen: pwd, lm#\n")); - dump_data(120, passwd, strlen(passwd)); - dump_data(100, (char *) p16, 16); -#endif /* clear out local copy of user's password (just being paranoid). */ memset(passwd, '\0', sizeof (passwd)); } @@ -235,13 +219,6 @@ ntv2_owf_gen(const unsigned char owf[16] hmac_md5_update((const unsigned char *) dom_u, domain_l * 2, &ctx); hmac_md5_final(kr_buf, &ctx); -#ifdef DEBUG_PASSWORD - DEBUG(100, ("ntv2_owf_gen: user, domain, owfkey, kr\n")); - dump_data(100, user_u, user_l * 2); - dump_data(100, dom_u, domain_l * 2); - dump_data(100, owf, 16); - dump_data(100, kr_buf, 16); -#endif kfree(user_u); } @@ -270,12 +247,6 @@ NTLMSSPOWFencrypt(unsigned char passwd[8 memset(p21 + 8, 0xbd, 8); E_P24(p21, ntlmchalresp, p24); -#ifdef DEBUG_PASSWORD - DEBUG(100, ("NTLMSSPOWFencrypt: p21, c8, p24\n")); - dump_data(100, (char *) p21, 21); - dump_data(100, (char *) ntlmchalresp, 8); - dump_data(100, (char *) p24, 24); -#endif } /* Does the NT MD4 hash then des encryption. */ @@ -289,13 +260,6 @@ SMBNTencrypt(unsigned char *passwd, unsi E_md4hash(passwd, p21); SMBOWFencrypt(p21, c8, p24); - -#ifdef DEBUG_PASSWORD - DEBUG(100, ("SMBNTencrypt: nt#, challenge, response\n")); - dump_data(100, (char *) p21, 16); - dump_data(100, (char *) c8, 8); - dump_data(100, (char *) p24, 24); -#endif } /* Does the md5 encryption from the NT hash for NTLMv2. */ @@ -310,37 +274,6 @@ SMBOWFencrypt_ntv2(const unsigned char k hmac_md5_update(srv_chal->data, srv_chal->length, &ctx); hmac_md5_update(cli_chal->data, cli_chal->length, &ctx); hmac_md5_final(resp_buf, &ctx); - -#ifdef DEBUG_PASSWORD - DEBUG(100, ("SMBOWFencrypt_ntv2: srv_chal, cli_chal, resp_buf\n")); - dump_data(100, srv_chal->data, srv_chal->length); - dump_data(100, cli_chal->data, cli_chal->length); - dump_data(100, resp_buf, 16); -#endif -} - -static struct data_blob LMv2_generate_response(const unsigned char ntlm_v2_hash[16], - const struct data_blob * server_chal) -{ - unsigned char lmv2_response[16]; - struct data_blob lmv2_client_data/* = data_blob(NULL, 8)*/; /* BB Fix BB */ - struct data_blob final_response /* = data_blob(NULL, 24)*/; /* BB Fix BB */ - - /* LMv2 */ - /* client-supplied random data */ - get_random_bytes(lmv2_client_data.data, lmv2_client_data.length); - /* Given that data, and the challenge from the server, generate a response */ - SMBOWFencrypt_ntv2(ntlm_v2_hash, server_chal, &lmv2_client_data, lmv2_response); - memcpy(final_response.data, lmv2_response, sizeof(lmv2_response)); - - /* after the first 16 bytes is the random data we generated above, - so the server can verify us with it */ - memcpy(final_response.data+sizeof(lmv2_response), - lmv2_client_data.data, lmv2_client_data.length); - -/* data_blob_free(&lmv2_client_data); */ /* BB fix BB */ - - return final_response; } void @@ -352,11 +285,6 @@ SMBsesskeygen_ntv2(const unsigned char k hmac_md5_init_limK_to_64(kr, 16, &ctx); hmac_md5_update(nt_resp, 16, &ctx); hmac_md5_final((unsigned char *) sess_key, &ctx); - -#ifdef DEBUG_PASSWORD - DEBUG(100, ("SMBsesskeygen_ntv2:\n")); - dump_data(100, sess_key, 16); -#endif } void @@ -364,66 +292,4 @@ SMBsesskeygen_ntv1(const unsigned char k const unsigned char *nt_resp, __u8 sess_key[16]) { mdfour((unsigned char *) sess_key, (unsigned char *) kr, 16); - -#ifdef DEBUG_PASSWORD - DEBUG(100, ("SMBsesskeygen_ntv1:\n")); - dump_data(100, sess_key, 16); -#endif -} - -/*********************************************************** - encode a password buffer. The caller gets to figure out - what to put in it. -************************************************************/ -int -encode_pw_buffer(char buffer[516], char *new_pw, int new_pw_length) -{ - get_random_bytes(buffer, sizeof (buffer)); - - memcpy(&buffer[512 - new_pw_length], new_pw, new_pw_length); - - /* - * The length of the new password is in the last 4 bytes of - * the data buffer. - */ - SIVAL(buffer, 512, new_pw_length); - - return TRUE; -} - -int SMBNTLMv2encrypt(const char *user, const char *domain, const char *password, - const struct data_blob *server_chal, - const struct data_blob *names_blob, - struct data_blob *lm_response, struct data_blob *nt_response, - struct data_blob *nt_session_key,struct nls_table * nls_codepage) -{ - unsigned char nt_hash[16]; - unsigned char ntlm_v2_hash[16]; - E_md4hash(password, nt_hash); - - /* We don't use the NT# directly. Instead we use it mashed up with - the username and domain. - This prevents username swapping during the auth exchange - */ - ntv2_owf_gen(nt_hash, user, domain, ntlm_v2_hash,nls_codepage); - - if (nt_response) { -/* *nt_response = NTLMv2_generate_response(ntlm_v2_hash, server_chal, - names_blob); */ /* BB fix BB */ - if (nt_session_key) { -/* *nt_session_key = data_blob(NULL, 16); */ /* BB fix BB */ - - /* The NTLMv2 calculations also provide a session key, for signing etc later */ - /* use only the first 16 bytes of nt_response for session key */ - SMBsesskeygen_ntv2(ntlm_v2_hash, nt_response->data, nt_session_key->data); - } - } - - /* LMv2 */ - - if (lm_response) { - *lm_response = LMv2_generate_response(ntlm_v2_hash, server_chal); - } - - return TRUE; } --- linux-2.6.6-rc1/fs/cifs/TODO 2003-07-27 12:14:40.000000000 -0700 +++ 25/fs/cifs/TODO 2004-04-18 22:25:28.597483000 -0700 @@ -1,4 +1,4 @@ -version 0.8.1 July 4th, 2003 +version 1.0.2 January 29, 2004 A Partial List of Known Problems and Missing Features ===================================================== @@ -27,8 +27,8 @@ used (Kerberos or NTLMSSP). Signing alre f) Directory entry caching relies on a 1 second timer, rather than using FindNotify or equivalent. - (started) -g) There may be a few additional changes that could be done to take advantage -of recent 2.5 kernel improvements in byte-range locking +g) A few byte range testcases fail due to POSIX vs. Windows/CIFS +style byte range lock differences h) quota support @@ -36,8 +36,6 @@ i) support for the Linux 2.5 kernel new which will allow us to expose dos attributes as well as real ACLs -j) finish off the mount helper, mount.cifs - (started) - k) finish writepages support (multi-page write behind for improved performance) and syncpage @@ -56,17 +54,15 @@ p) Improve performance of readpages by s at a time when 8 pages or more are requested. -KNOWN BUGS (updated July 4th, 2003) +KNOWN BUGS (updated January 30, 2004) ==================================== 1) existing symbolic links (Windows reparse points) are recognized but can not be created remotely. They are implemented for Samba and those that support the CIFS Unix extensions but Samba has a bug currently handling symlink text beginning with slash -2) delete of file with read-only attribute set will fail (may be ok) -3) mount helper syntax not quite matching man page -4) follow_link and readdir code does not follow dfs junctions +2) follow_link and readdir code does not follow dfs junctions but recognizes them -5) create of new files to FAT partitions on Windows servers can +3) create of new files to FAT partitions on Windows servers can succeed but still return access denied (appears to be Windows not client problem). NTFS partitions do not have this problem. --- linux-2.6.6-rc1/fs/cifs/transport.c 2003-10-25 14:45:46.000000000 -0700 +++ 25/fs/cifs/transport.c 2004-04-18 22:25:28.638476768 -0700 @@ -37,7 +37,6 @@ struct mid_q_entry * AllocMidQEntry(struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) { struct mid_q_entry *temp; - int timeout = 10 * HZ; if (ses == NULL) { cERROR(1, ("Null session passed in to AllocMidQEntry ")); @@ -63,25 +62,11 @@ AllocMidQEntry(struct smb_hdr *smb_buffe temp->tsk = current; } - while ((ses->server->tcpStatus != CifsGood) && (timeout > 0)){ - /* Give the tcp thread up to 10 seconds to reconnect */ - /* Should we wake up tcp thread first? BB */ - timeout = wait_event_interruptible_timeout(ses->server->response_q, - (ses->server->tcpStatus == CifsGood), timeout); - } - - if (ses->server->tcpStatus == CifsGood) { - spin_lock(&GlobalMid_Lock); - list_add_tail(&temp->qhead, &ses->server->pending_mid_q); - atomic_inc(&midCount); - temp->midState = MID_REQUEST_ALLOCATED; - spin_unlock(&GlobalMid_Lock); - } else { - cERROR(1,("Need to reconnect after session died to server")); - if (temp) - kmem_cache_free(cifs_mid_cachep, temp); - return NULL; - } + spin_lock(&GlobalMid_Lock); + list_add_tail(&temp->qhead, &ses->server->pending_mid_q); + atomic_inc(&midCount); + temp->midState = MID_REQUEST_ALLOCATED; + spin_unlock(&GlobalMid_Lock); return temp; } @@ -93,7 +78,7 @@ DeleteMidQEntry(struct mid_q_entry *midE list_del(&midEntry->qhead); atomic_dec(&midCount); spin_unlock(&GlobalMid_Lock); - buf_release(midEntry->resp_buf); + cifs_buf_release(midEntry->resp_buf); kmem_cache_free(cifs_mid_cachep, midEntry); } @@ -190,10 +175,35 @@ SendReceive(const unsigned int xid, stru long timeout; struct mid_q_entry *midQ; + if ((ses == NULL) || (ses->server == NULL)) { + cERROR(1,("Null tcp session or smb session: %p",ses)); + return -EIO; + } + + if (ses->server->tcpStatus == CifsExiting) { + return -ENOENT; + } else if (ses->server->tcpStatus == CifsNeedReconnect) { + cFYI(1,("tcp session dead - return to caller to retry")); + return -EAGAIN; + } else if (ses->status != CifsGood) { + /* check if SMB session is bad because we are setting it up */ + if((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && + (in_buf->Command != SMB_COM_NEGOTIATE)) { + return -EAGAIN; + } /* else ok - we are setting up session */ + } + /* make sure that we sign in the same order that we send on this socket + and avoid races inside tcp sendmsg code that could cause corruption + of smb data */ + down(&ses->server->tcpSem); midQ = AllocMidQEntry(in_buf, ses); - if (midQ == NULL) + if (midQ == NULL) { + up(&ses->server->tcpSem); return -EIO; + } + if (in_buf->smb_buf_length > CIFS_MAX_MSGSIZE + MAX_CIFS_HDR_SIZE - 4) { + up(&ses->server->tcpSem); cERROR(1, ("Illegal length, greater than maximum frame, %d ", in_buf->smb_buf_length)); @@ -201,23 +211,25 @@ SendReceive(const unsigned int xid, stru return -EIO; } - if (in_buf->smb_buf_length > 12) - in_buf->Flags2 = cpu_to_le16(in_buf->Flags2); + if (in_buf->smb_buf_length > 12) + in_buf->Flags2 = cpu_to_le16(in_buf->Flags2); - rc = cifs_sign_smb(in_buf, ses, &midQ->sequence_number); + rc = cifs_sign_smb(in_buf, ses, &midQ->sequence_number); midQ->midState = MID_REQUEST_SUBMITTED; rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->sockAddr)); - + (struct sockaddr *) &(ses->server->addr.sockAddr)); + up(&ses->server->tcpSem); if (long_op == -1) goto cifs_no_response_exit; - if (long_op > 1) /* writes past end of file can take looooong time */ + else if (long_op == 2) /* writes past end of file can take looooong time */ timeout = 300 * HZ; else if (long_op == 1) timeout = 45 * HZ; /* should be greater than servers oplock break timeout (about 43 seconds) */ - else + else if (long_op > 2) { + timeout = MAX_SCHEDULE_TIMEOUT; + } else timeout = 15 * HZ; /* wait for 15 seconds or until woken up due to response arriving or due to last connection to this server being unmounted */ @@ -227,26 +239,39 @@ SendReceive(const unsigned int xid, stru midState & MID_RESPONSE_RECEIVED, timeout); if (signal_pending(current)) { - cERROR(1, ("CIFS: caught signal")); + cFYI(1, ("CIFS: caught signal")); DeleteMidQEntry(midQ); return -EINTR; - } else { - if (midQ->resp_buf) + } else { /* BB spinlock protect this against races with demux thread */ + spin_lock(&GlobalMid_Lock); + if (midQ->resp_buf) { + spin_unlock(&GlobalMid_Lock); receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length); - else { + } else { cFYI(1,("No response buffer")); + if(midQ->midState == MID_REQUEST_SUBMITTED) { + if(ses->server->tcpStatus == CifsExiting) + rc = -EHOSTDOWN; + else { + ses->server->tcpStatus = CifsNeedReconnect; + midQ->midState = MID_RETRY_NEEDED; + } + } + + if(midQ->midState == MID_RETRY_NEEDED) { + rc = -EAGAIN; + cFYI(1,("marking request for retry")); + } else { + rc = -EIO; + } + spin_unlock(&GlobalMid_Lock); DeleteMidQEntry(midQ); - ses->server->tcpStatus = CifsNeedReconnect; - return -EIO; + return rc; } } - if (timeout == 0) { - cFYI(1, - ("Timeout on receive. Assume response SMB is invalid.")); - rc = -ETIMEDOUT; - } else if (receive_len > CIFS_MAX_MSGSIZE + MAX_CIFS_HDR_SIZE) { + if (receive_len > CIFS_MAX_MSGSIZE + MAX_CIFS_HDR_SIZE) { cERROR(1, ("Frame too large received. Length: %d Xid: %d", receive_len, xid)); --- linux-2.6.6-rc1/fs/coda/inode.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/coda/inode.c 2004-04-18 22:25:24.911043424 -0700 @@ -82,6 +82,12 @@ void coda_destroy_inodecache(void) printk(KERN_INFO "coda_inode_cache: not all structures were freed\n"); } +static int coda_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_NODIRATIME; + return 0; +} + /* exported operations */ struct super_operations coda_super_operations = { @@ -90,6 +96,7 @@ struct super_operations coda_super_opera .clear_inode = coda_clear_inode, .put_super = coda_put_super, .statfs = coda_statfs, + .remount_fs = coda_remount, }; static int get_device_index(struct coda_mount_data *data) --- linux-2.6.6-rc1/fs/coda/psdev.c 2003-09-27 18:57:46.000000000 -0700 +++ 25/fs/coda/psdev.c 2004-04-18 22:25:24.912043272 -0700 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -61,6 +62,7 @@ unsigned long coda_timeout = 30; /* .. s struct venus_comm coda_comms[MAX_CODADEVS]; +static struct class_simple *coda_psdev_class; /* * Device operations @@ -358,20 +360,38 @@ static struct file_operations coda_psdev static int init_coda_psdev(void) { - int i; + int i, err = 0; if (register_chrdev(CODA_PSDEV_MAJOR,"coda_psdev", &coda_psdev_fops)) { printk(KERN_ERR "coda_psdev: unable to get major %d\n", CODA_PSDEV_MAJOR); return -EIO; } + coda_psdev_class = class_simple_create(THIS_MODULE, "coda_psdev"); + if (IS_ERR(coda_psdev_class)) { + err = PTR_ERR(coda_psdev_class); + goto out_chrdev; + } devfs_mk_dir ("coda"); for (i = 0; i < MAX_CODADEVS; i++) { - devfs_mk_cdev(MKDEV(CODA_PSDEV_MAJOR, i), + class_simple_device_add(coda_psdev_class, MKDEV(CODA_PSDEV_MAJOR,i), + NULL, "cfs%d", i); + err = devfs_mk_cdev(MKDEV(CODA_PSDEV_MAJOR, i), S_IFCHR|S_IRUSR|S_IWUSR, "coda/%d", i); + if (err) + goto out_class; } coda_sysctl_init(); - return 0; + goto out; + +out_class: + for (i = 0; i < MAX_CODADEVS; i++) + class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i)); + class_simple_destroy(coda_psdev_class); +out_chrdev: + unregister_chrdev(CODA_PSDEV_MAJOR, "coda_psdev"); +out: + return err; } @@ -408,8 +428,11 @@ static int __init init_coda(void) } return 0; out: - for (i = 0; i < MAX_CODADEVS; i++) + for (i = 0; i < MAX_CODADEVS; i++) { + class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i)); devfs_remove("coda/%d", i); + } + class_simple_destroy(coda_psdev_class); devfs_remove("coda"); unregister_chrdev(CODA_PSDEV_MAJOR,"coda_psdev"); coda_sysctl_clean(); @@ -427,8 +450,11 @@ static void __exit exit_coda(void) if ( err != 0 ) { printk("coda: failed to unregister filesystem\n"); } - for (i = 0; i < MAX_CODADEVS; i++) + for (i = 0; i < MAX_CODADEVS; i++) { + class_simple_device_remove(MKDEV(CODA_PSDEV_MAJOR, i)); devfs_remove("coda/%d", i); + } + class_simple_destroy(coda_psdev_class); devfs_remove("coda"); unregister_chrdev(CODA_PSDEV_MAJOR, "coda_psdev"); coda_sysctl_clean(); --- linux-2.6.6-rc1/fs/compat.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/compat.c 2004-04-18 22:25:59.141839552 -0700 @@ -34,9 +34,17 @@ #include #include #include +#include +#include +#include +#include +#include +#include + #include /* siocdevprivate_ioctl */ #include +#include /* * Not all architectures have sys_utime, so implement this in terms @@ -794,3 +802,839 @@ asmlinkage int compat_sys_mount(char __u return retval; } +static ssize_t compat_do_readv_writev(int type, struct file *file, + const struct compat_iovec __user *uvector, + unsigned long nr_segs, loff_t *pos) +{ + typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); + typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); + + compat_ssize_t tot_len; + struct iovec iovstack[UIO_FASTIOV]; + struct iovec *iov=iovstack, *vector; + ssize_t ret; + int seg; + io_fn_t fn; + iov_fn_t fnv; + struct inode *inode; + + /* + * SuS says "The readv() function *may* fail if the iovcnt argument + * was less than or equal to 0, or greater than {IOV_MAX}. Linux has + * traditionally returned zero for zero segments, so... + */ + ret = 0; + if (nr_segs == 0) + goto out; + + /* + * First get the "struct iovec" from user memory and + * verify all the pointers + */ + ret = -EINVAL; + if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) + goto out; + if (!file->f_op) + goto out; + if (nr_segs > UIO_FASTIOV) { + ret = -ENOMEM; + iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); + if (!iov) + goto out; + } + ret = -EFAULT; + if (verify_area(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) + goto out; + + /* + * Single unix specification: + * We should -EINVAL if an element length is not >= 0 and fitting an + * ssize_t. The total length is fitting an ssize_t + * + * Be careful here because iov_len is a size_t not an ssize_t + */ + tot_len = 0; + vector = iov; + ret = -EINVAL; + for (seg = 0 ; seg < nr_segs; seg++) { + compat_ssize_t tmp = tot_len; + compat_ssize_t len; + compat_uptr_t buf; + + if (__get_user(len, &uvector->iov_len) || + __get_user(buf, &uvector->iov_base)) { + ret = -EFAULT; + goto out; + } + if (len < 0) /* size_t not fitting an compat_ssize_t .. */ + goto out; + tot_len += len; + if (tot_len < tmp) /* maths overflow on the compat_ssize_t */ + goto out; + vector->iov_base = compat_ptr(buf); + vector->iov_len = (compat_size_t) len; + uvector++; + vector++; + } + if (tot_len == 0) { + ret = 0; + goto out; + } + + inode = file->f_dentry->d_inode; + /* VERIFY_WRITE actually means a read, as we write to user space */ + ret = locks_verify_area((type == READ + ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), + inode, file, *pos, tot_len); + if (ret) + goto out; + + fnv = NULL; + if (type == READ) { + fn = file->f_op->read; + fnv = file->f_op->readv; + } else { + fn = (io_fn_t)file->f_op->write; + fnv = file->f_op->writev; + } + if (fnv) { + ret = fnv(file, iov, nr_segs, pos); + goto out; + } + + /* Do it by hand, with file-ops */ + ret = 0; + vector = iov; + while (nr_segs > 0) { + void __user * base; + size_t len; + ssize_t nr; + + base = vector->iov_base; + len = vector->iov_len; + vector++; + nr_segs--; + + nr = fn(file, base, len, pos); + + if (nr < 0) { + if (!ret) ret = nr; + break; + } + ret += nr; + if (nr != len) + break; + } +out: + if (iov != iovstack) + kfree(iov); + if ((ret + (type == READ)) > 0) + dnotify_parent(file->f_dentry, + (type == READ) ? DN_ACCESS : DN_MODIFY); + return ret; +} + +asmlinkage ssize_t +compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen) +{ + struct file *file; + ssize_t ret = -EBADF; + + file = fget(fd); + if (!file) + return -EBADF; + + if (!(file->f_mode & FMODE_READ)) + goto out; + + ret = -EINVAL; + if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) + goto out; + + ret = compat_do_readv_writev(READ, file, vec, vlen, &file->f_pos); + +out: + fput(file); + return ret; +} + +asmlinkage ssize_t +compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec, unsigned long vlen) +{ + struct file *file; + ssize_t ret = -EBADF; + + file = fget(fd); + if (!file) + return -EBADF; + if (!(file->f_mode & FMODE_WRITE)) + goto out; + + ret = -EINVAL; + if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) + goto out; + + ret = compat_do_readv_writev(WRITE, file, vec, vlen, &file->f_pos); + +out: + fput(file); + return ret; +} + +/* + * compat_count() counts the number of arguments/envelopes. It is basically + * a copy of count() from fs/exec.c, except that it works with 32 bit argv + * and envp pointers. + */ +static int compat_count(compat_uptr_t *argv, int max) +{ + int i = 0; + + if (argv != NULL) { + for (;;) { + compat_uptr_t p; + + if (get_user(p, argv)) + return -EFAULT; + if (!p) + break; + argv++; + if(++i > max) + return -E2BIG; + } + } + return i; +} + +/* + * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c + * except that it works with 32 bit argv and envp pointers. + */ +static int compat_copy_strings(int argc, compat_uptr_t __user *argv, + struct linux_binprm *bprm) +{ + struct page *kmapped_page = NULL; + char *kaddr = NULL; + int ret; + + while (argc-- > 0) { + compat_uptr_t str; + int len; + unsigned long pos; + + if (get_user(str, argv+argc) || + !(len = strnlen_user(compat_ptr(str), bprm->p))) { + ret = -EFAULT; + goto out; + } + + if (bprm->p < len) { + ret = -E2BIG; + goto out; + } + + bprm->p -= len; + /* XXX: add architecture specific overflow check here. */ + pos = bprm->p; + + while (len > 0) { + int i, new, err; + int offset, bytes_to_copy; + struct page *page; + + offset = pos % PAGE_SIZE; + i = pos/PAGE_SIZE; + page = bprm->page[i]; + new = 0; + if (!page) { + page = alloc_page(GFP_HIGHUSER); + bprm->page[i] = page; + if (!page) { + ret = -ENOMEM; + goto out; + } + new = 1; + } + + if (page != kmapped_page) { + if (kmapped_page) + kunmap(kmapped_page); + kmapped_page = page; + kaddr = kmap(kmapped_page); + } + if (new && offset) + memset(kaddr, 0, offset); + bytes_to_copy = PAGE_SIZE - offset; + if (bytes_to_copy > len) { + bytes_to_copy = len; + if (new) + memset(kaddr+offset+len, 0, + PAGE_SIZE-offset-len); + } + err = copy_from_user(kaddr+offset, compat_ptr(str), + bytes_to_copy); + if (err) { + ret = -EFAULT; + goto out; + } + + pos += bytes_to_copy; + str += bytes_to_copy; + len -= bytes_to_copy; + } + } + ret = 0; +out: + if (kmapped_page) + kunmap(kmapped_page); + return ret; +} + +#ifdef CONFIG_MMU + +#define free_arg_pages(bprm) do { } while (0) + +#else + +static inline void free_arg_pages(struct linux_binprm *bprm) +{ + int i; + + for (i = 0; i < MAX_ARG_PAGES; i++) { + if (bprm->page[i]) + __free_page(bprm->page[i]); + bprm->page[i] = NULL; + } +} + +#endif /* CONFIG_MMU */ + +/* + * compat_do_execve() is mostly a copy of do_execve(), with the exception + * that it processes 32 bit argv and envp pointers. + */ +int compat_do_execve(char * filename, + compat_uptr_t __user *argv, + compat_uptr_t __user *envp, + struct pt_regs * regs) +{ + struct linux_binprm bprm; + struct file *file; + int retval; + int i; + + sched_balance_exec(); + + file = open_exec(filename); + + retval = PTR_ERR(file); + if (IS_ERR(file)) + return retval; + + bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); + memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); + + bprm.file = file; + bprm.filename = filename; + bprm.interp = filename; + bprm.sh_bang = 0; + bprm.loader = 0; + bprm.exec = 0; + bprm.security = NULL; + bprm.mm = mm_alloc(); + retval = -ENOMEM; + if (!bprm.mm) + goto out_file; + + retval = init_new_context(current, bprm.mm); + if (retval < 0) + goto out_mm; + + bprm.argc = compat_count(argv, bprm.p / sizeof(compat_uptr_t)); + if ((retval = bprm.argc) < 0) + goto out_mm; + + bprm.envc = compat_count(envp, bprm.p / sizeof(compat_uptr_t)); + if ((retval = bprm.envc) < 0) + goto out_mm; + + retval = security_bprm_alloc(&bprm); + if (retval) + goto out; + + retval = prepare_binprm(&bprm); + if (retval < 0) + goto out; + + retval = copy_strings_kernel(1, &bprm.filename, &bprm); + if (retval < 0) + goto out; + + bprm.exec = bprm.p; + retval = compat_copy_strings(bprm.envc, envp, &bprm); + if (retval < 0) + goto out; + + retval = compat_copy_strings(bprm.argc, argv, &bprm); + if (retval < 0) + goto out; + + retval = search_binary_handler(&bprm,regs); + if (retval >= 0) { + free_arg_pages(&bprm); + + /* execve success */ + security_bprm_free(&bprm); + return retval; + } + +out: + /* Something went wrong, return the inode and free the argument pages*/ + for (i = 0 ; i < MAX_ARG_PAGES ; i++) { + struct page * page = bprm.page[i]; + if (page) + __free_page(page); + } + + if (bprm.security) + security_bprm_free(&bprm); + +out_mm: + if (bprm.mm) + mmdrop(bprm.mm); + +out_file: + if (bprm.file) { + allow_write_access(bprm.file); + fput(bprm.file); + } + + return retval; +} + +#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) + +#define ROUND_UP(x,y) (((x)+(y)-1)/(y)) + +/* + * Ooo, nasty. We need here to frob 32-bit unsigned longs to + * 64-bit unsigned longs. + */ +static inline +int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, + unsigned long *fdset) +{ + nr = ROUND_UP(nr, __COMPAT_NFDBITS); + if (ufdset) { + unsigned long odd; + + if (verify_area(VERIFY_WRITE, ufdset, nr*sizeof(compat_ulong_t))) + return -EFAULT; + + odd = nr & 1UL; + nr &= ~1UL; + while (nr) { + unsigned long h, l; + __get_user(l, ufdset); + __get_user(h, ufdset+1); + ufdset += 2; + *fdset++ = h << 32 | l; + nr -= 2; + } + if (odd) + __get_user(*fdset, ufdset); + } else { + /* Tricky, must clear full unsigned long in the + * kernel fdset at the end, this makes sure that + * actually happens. + */ + memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t)); + } + return 0; +} + +static inline +void compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, + unsigned long *fdset) +{ + unsigned long odd; + nr = ROUND_UP(nr, __COMPAT_NFDBITS); + + if (!ufdset) + return; + + odd = nr & 1UL; + nr &= ~1UL; + while (nr) { + unsigned long h, l; + l = *fdset++; + h = l >> 32; + __put_user(l, ufdset); + __put_user(h, ufdset+1); + ufdset += 2; + nr -= 2; + } + if (odd) + __put_user(*fdset, ufdset); +} + + +/* + * This is a virtual copy of sys_select from fs/select.c and probably + * should be compared to it from time to time + */ +static void *select_bits_alloc(int size) +{ + return kmalloc(6 * size, GFP_KERNEL); +} + +static void select_bits_free(void *bits, int size) +{ + kfree(bits); +} + +/* + * We can actually return ERESTARTSYS instead of EINTR, but I'd + * like to be certain this leads to no problems. So I return + * EINTR just for safety. + * + * Update: ERESTARTSYS breaks at least the xview clock binary, so + * I'm trying ERESTARTNOHAND which restart only when you want to. + */ +#define MAX_SELECT_SECONDS \ + ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) + +asmlinkage long +compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, + compat_ulong_t __user *exp, struct compat_timeval __user *tvp) +{ + fd_set_bits fds; + char *bits; + long timeout; + int ret, size, max_fdset; + + timeout = MAX_SCHEDULE_TIMEOUT; + if (tvp) { + time_t sec, usec; + + if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp))) + || (ret = __get_user(sec, &tvp->tv_sec)) + || (ret = __get_user(usec, &tvp->tv_usec))) + goto out_nofds; + + ret = -EINVAL; + if (sec < 0 || usec < 0) + goto out_nofds; + + if ((unsigned long) sec < MAX_SELECT_SECONDS) { + timeout = ROUND_UP(usec, 1000000/HZ); + timeout += sec * (unsigned long) HZ; + } + } + + ret = -EINVAL; + if (n < 0) + goto out_nofds; + + /* max_fdset can increase, so grab it once to avoid race */ + max_fdset = current->files->max_fdset; + if (n > max_fdset) + n = max_fdset; + + /* + * We need 6 bitmaps (in/out/ex for both incoming and outgoing), + * since we used fdset we need to allocate memory in units of + * long-words. + */ + ret = -ENOMEM; + size = FDS_BYTES(n); + bits = select_bits_alloc(size); + if (!bits) + goto out_nofds; + fds.in = (unsigned long *) bits; + fds.out = (unsigned long *) (bits + size); + fds.ex = (unsigned long *) (bits + 2*size); + fds.res_in = (unsigned long *) (bits + 3*size); + fds.res_out = (unsigned long *) (bits + 4*size); + fds.res_ex = (unsigned long *) (bits + 5*size); + + if ((ret = compat_get_fd_set(n, inp, fds.in)) || + (ret = compat_get_fd_set(n, outp, fds.out)) || + (ret = compat_get_fd_set(n, exp, fds.ex))) + goto out; + zero_fd_set(n, fds.res_in); + zero_fd_set(n, fds.res_out); + zero_fd_set(n, fds.res_ex); + + ret = do_select(n, &fds, &timeout); + + if (tvp && !(current->personality & STICKY_TIMEOUTS)) { + time_t sec = 0, usec = 0; + if (timeout) { + sec = timeout / HZ; + usec = timeout % HZ; + usec *= (1000000/HZ); + } + if (put_user(sec, &tvp->tv_sec) || + put_user(usec, &tvp->tv_usec)) + ret = -EFAULT; + } + + if (ret < 0) + goto out; + if (!ret) { + ret = -ERESTARTNOHAND; + if (signal_pending(current)) + goto out; + ret = 0; + } + + compat_set_fd_set(n, inp, fds.res_in); + compat_set_fd_set(n, outp, fds.res_out); + compat_set_fd_set(n, exp, fds.res_ex); + +out: + select_bits_free(bits, size); +out_nofds: + return ret; +} + +#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) +/* Stuff for NFS server syscalls... */ +struct compat_nfsctl_svc { + u16 svc32_port; + s32 svc32_nthreads; +}; + +struct compat_nfsctl_client { + s8 cl32_ident[NFSCLNT_IDMAX+1]; + s32 cl32_naddr; + struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX]; + s32 cl32_fhkeytype; + s32 cl32_fhkeylen; + u8 cl32_fhkey[NFSCLNT_KEYMAX]; +}; + +struct compat_nfsctl_export { + char ex32_client[NFSCLNT_IDMAX+1]; + char ex32_path[NFS_MAXPATHLEN+1]; + compat_dev_t ex32_dev; + compat_ino_t ex32_ino; + compat_int_t ex32_flags; + compat_uid_t ex32_anon_uid; + compat_gid_t ex32_anon_gid; +}; + +struct compat_nfsctl_fdparm { + struct sockaddr gd32_addr; + s8 gd32_path[NFS_MAXPATHLEN+1]; + compat_int_t gd32_version; +}; + +struct compat_nfsctl_fsparm { + struct sockaddr gd32_addr; + s8 gd32_path[NFS_MAXPATHLEN+1]; + compat_int_t gd32_maxlen; +}; + +struct compat_nfsctl_arg { + compat_int_t ca32_version; /* safeguard */ + union { + struct compat_nfsctl_svc u32_svc; + struct compat_nfsctl_client u32_client; + struct compat_nfsctl_export u32_export; + struct compat_nfsctl_fdparm u32_getfd; + struct compat_nfsctl_fsparm u32_getfs; + } u; +#define ca32_svc u.u32_svc +#define ca32_client u.u32_client +#define ca32_export u.u32_export +#define ca32_getfd u.u32_getfd +#define ca32_getfs u.u32_getfs +}; + +union compat_nfsctl_res { + __u8 cr32_getfh[NFS_FHSIZE]; + struct knfsd_fh cr32_getfs; +}; + +static int compat_nfs_svc_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg *arg) +{ + int err; + + err = access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc)); + err |= get_user(karg->ca_version, &arg->ca32_version); + err |= __get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port); + err |= __get_user(karg->ca_svc.svc_nthreads, &arg->ca32_svc.svc32_nthreads); + return (err) ? -EFAULT : 0; +} + +static int compat_nfs_clnt_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg *arg) +{ + int err; + + err = access_ok(VERIFY_READ, &arg->ca32_client, sizeof(arg->ca32_client)); + err |= get_user(karg->ca_version, &arg->ca32_version); + err |= __copy_from_user(&karg->ca_client.cl_ident[0], + &arg->ca32_client.cl32_ident[0], + NFSCLNT_IDMAX); + err |= __get_user(karg->ca_client.cl_naddr, &arg->ca32_client.cl32_naddr); + err |= __copy_from_user(&karg->ca_client.cl_addrlist[0], + &arg->ca32_client.cl32_addrlist[0], + (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)); + err |= __get_user(karg->ca_client.cl_fhkeytype, + &arg->ca32_client.cl32_fhkeytype); + err |= __get_user(karg->ca_client.cl_fhkeylen, + &arg->ca32_client.cl32_fhkeylen); + err |= __copy_from_user(&karg->ca_client.cl_fhkey[0], + &arg->ca32_client.cl32_fhkey[0], + NFSCLNT_KEYMAX); + + return (err) ? -EFAULT : 0; +} + +static int compat_nfs_exp_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg *arg) +{ + int err; + + err = access_ok(VERIFY_READ, &arg->ca32_export, sizeof(arg->ca32_export)); + err |= get_user(karg->ca_version, &arg->ca32_version); + err |= __copy_from_user(&karg->ca_export.ex_client[0], + &arg->ca32_export.ex32_client[0], + NFSCLNT_IDMAX); + err |= __copy_from_user(&karg->ca_export.ex_path[0], + &arg->ca32_export.ex32_path[0], + NFS_MAXPATHLEN); + err |= __get_user(karg->ca_export.ex_dev, + &arg->ca32_export.ex32_dev); + err |= __get_user(karg->ca_export.ex_ino, + &arg->ca32_export.ex32_ino); + err |= __get_user(karg->ca_export.ex_flags, + &arg->ca32_export.ex32_flags); + err |= __get_user(karg->ca_export.ex_anon_uid, + &arg->ca32_export.ex32_anon_uid); + err |= __get_user(karg->ca_export.ex_anon_gid, + &arg->ca32_export.ex32_anon_gid); + SET_UID(karg->ca_export.ex_anon_uid, karg->ca_export.ex_anon_uid); + SET_GID(karg->ca_export.ex_anon_gid, karg->ca_export.ex_anon_gid); + + return (err) ? -EFAULT : 0; +} + +static int compat_nfs_getfd_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg *arg) +{ + int err; + + err = access_ok(VERIFY_READ, &arg->ca32_getfd, sizeof(arg->ca32_getfd)); + err |= get_user(karg->ca_version, &arg->ca32_version); + err |= __copy_from_user(&karg->ca_getfd.gd_addr, + &arg->ca32_getfd.gd32_addr, + (sizeof(struct sockaddr))); + err |= __copy_from_user(&karg->ca_getfd.gd_path, + &arg->ca32_getfd.gd32_path, + (NFS_MAXPATHLEN+1)); + err |= __get_user(karg->ca_getfd.gd_version, + &arg->ca32_getfd.gd32_version); + + return (err) ? -EFAULT : 0; +} + +static int compat_nfs_getfs_trans(struct nfsctl_arg *karg, struct compat_nfsctl_arg *arg) +{ + int err; + + err = access_ok(VERIFY_READ, &arg->ca32_getfs, sizeof(arg->ca32_getfs)); + err |= get_user(karg->ca_version, &arg->ca32_version); + err |= __copy_from_user(&karg->ca_getfs.gd_addr, + &arg->ca32_getfs.gd32_addr, + (sizeof(struct sockaddr))); + err |= __copy_from_user(&karg->ca_getfs.gd_path, + &arg->ca32_getfs.gd32_path, + (NFS_MAXPATHLEN+1)); + err |= __get_user(karg->ca_getfs.gd_maxlen, + &arg->ca32_getfs.gd32_maxlen); + + return (err) ? -EFAULT : 0; +} + +/* This really doesn't need translations, we are only passing + * back a union which contains opaque nfs file handle data. + */ +static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, union compat_nfsctl_res *res) +{ + int err; + + err = copy_to_user(res, kres, sizeof(*res)); + + return (err) ? -EFAULT : 0; +} + +asmlinkage long compat_sys_nfsservctl(int cmd, struct compat_nfsctl_arg *arg, + union compat_nfsctl_res *res) +{ + struct nfsctl_arg *karg; + union nfsctl_res *kres; + mm_segment_t oldfs; + int err; + + karg = kmalloc(sizeof(*karg), GFP_USER); + kres = kmalloc(sizeof(*kres), GFP_USER); + if(!karg || !kres) { + err = -ENOMEM; + goto done; + } + + switch(cmd) { + case NFSCTL_SVC: + err = compat_nfs_svc_trans(karg, arg); + break; + + case NFSCTL_ADDCLIENT: + err = compat_nfs_clnt_trans(karg, arg); + break; + + case NFSCTL_DELCLIENT: + err = compat_nfs_clnt_trans(karg, arg); + break; + + case NFSCTL_EXPORT: + case NFSCTL_UNEXPORT: + err = compat_nfs_exp_trans(karg, arg); + break; + + case NFSCTL_GETFD: + err = compat_nfs_getfd_trans(karg, arg); + break; + + case NFSCTL_GETFS: + err = compat_nfs_getfs_trans(karg, arg); + break; + + default: + err = -EINVAL; + goto done; + } + + oldfs = get_fs(); + set_fs(KERNEL_DS); + err = sys_nfsservctl(cmd, karg, kres); + set_fs(oldfs); + + if (err) + goto done; + + if((cmd == NFSCTL_GETFD) || + (cmd == NFSCTL_GETFS)) + err = compat_nfs_getfh_res_trans(kres, res); + +done: + kfree(karg); + kfree(kres); + return err; +} +#else /* !NFSD */ +long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) +{ + return sys_ni_syscall(); +} +#endif --- linux-2.6.6-rc1/fs/cramfs/inode.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/cramfs/inode.c 2004-04-18 22:25:24.912043272 -0700 @@ -193,6 +193,12 @@ static void cramfs_put_super(struct supe sb->s_fs_info = NULL; } +static int cramfs_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_RDONLY; + return 0; +} + static int cramfs_fill_super(struct super_block *sb, void *data, int silent) { int i; @@ -483,6 +489,7 @@ static struct inode_operations cramfs_di static struct super_operations cramfs_ops = { .put_super = cramfs_put_super, + .remount_fs = cramfs_remount, .statfs = cramfs_statfs, }; --- linux-2.6.6-rc1/fs/dcache.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/dcache.c 2004-04-18 22:25:32.688861016 -0700 @@ -1557,13 +1557,9 @@ static void __init dcache_init(unsigned * flag could be removed here, to hint to the allocator that * it should not try to get multiple page regions. */ - dentry_cache = kmem_cache_create("dentry_cache", - sizeof(struct dentry), - 0, - SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, - NULL, NULL); - if (!dentry_cache) - panic("Cannot create dentry cache"); + dentry_cache = kmem_cache_create("dentry_cache", sizeof(struct dentry), + 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, + NULL, NULL); set_shrinker(DEFAULT_SEEKS, shrink_dcache_memory); @@ -1628,17 +1624,11 @@ void __init vfs_caches_init(unsigned lon reserve = (mempages - nr_free_pages()) * 3/2; mempages -= reserve; - names_cachep = kmem_cache_create("names_cache", - PATH_MAX, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!names_cachep) - panic("Cannot create names SLAB cache"); - - filp_cachep = kmem_cache_create("filp", - sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN, filp_ctor, filp_dtor); - if(!filp_cachep) - panic("Cannot create filp SLAB cache"); + names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + + filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor); dcache_init(mempages); inode_init(mempages); --- linux-2.6.6-rc1/fs/direct-io.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/direct-io.c 2004-04-18 22:25:57.540083056 -0700 @@ -125,7 +125,7 @@ struct dio { /* AIO related stuff */ struct kiocb *iocb; /* kiocb */ int is_async; /* is IO async ? */ - int result; /* IO result */ + ssize_t result; /* IO result */ }; /* @@ -898,7 +898,7 @@ out: /* * Releases both i_sem and i_alloc_sem */ -static int +static ssize_t direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs, unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io, @@ -906,8 +906,8 @@ direct_io_worker(int rw, struct kiocb *i { unsigned long user_addr; int seg; - int ret = 0; - int ret2; + ssize_t ret = 0; + ssize_t ret2; size_t bytes; dio->bio = NULL; @@ -1096,7 +1096,7 @@ direct_io_worker(int rw, struct kiocb *i * * For writes to S_ISBLK files, i_sem is not held on entry; it is never taken. */ -int +ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, --- linux-2.6.6-rc1/fs/dnotify.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/dnotify.c 2004-04-18 22:25:32.688861016 -0700 @@ -173,9 +173,7 @@ EXPORT_SYMBOL_GPL(dnotify_parent); static int __init dnotify_init(void) { dn_cache = kmem_cache_create("dnotify_cache", - sizeof(struct dnotify_struct), 0, 0, NULL, NULL); - if (!dn_cache) - panic("cannot create dnotify slab cache"); + sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL, NULL); return 0; } --- linux-2.6.6-rc1/fs/dquot.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/dquot.c 2004-04-18 22:25:32.690860712 -0700 @@ -1,16 +1,13 @@ /* - * Implementation of the diskquota system for the LINUX operating - * system. QUOTA is implemented using the BSD system call interface as - * the means of communication with the user level. Currently only the - * ext2 filesystem has support for disk quotas. Other filesystems may - * be added in the future. This file contains the generic routines - * called by the different filesystems on allocation of an inode or - * block. These routines take care of the administration needed to - * have a consistent diskquota tracking system. The ideas of both - * user and group quotas are based on the Melbourne quota system as - * used on BSD derived systems. The internal implementation is - * based on one of the several variants of the LINUX inode-subsystem - * with added complexity of the diskquota system. + * Implementation of the diskquota system for the LINUX operating system. QUOTA + * is implemented using the BSD system call interface as the means of + * communication with the user level. This file contains the generic routines + * called by the different filesystems on allocation of an inode or block. + * These routines take care of the administration needed to have a consistent + * diskquota tracking system. The ideas of both user and group quotas are based + * on the Melbourne quota system as used on BSD derived systems. The internal + * implementation is based on one of the several variants of the LINUX + * inode-subsystem with added complexity of the diskquota system. * * Version: $Id: dquot.c,v 6.3 1996/11/17 18:35:34 mvw Exp mvw $ * @@ -52,6 +49,9 @@ * New SMP locking. * Jan Kara, , 10/2002 * + * Added journalled quota support + * Jan Kara, , 2003,2004 + * * (C) Copyright 1994 - 1997 Marco van Wieringen */ @@ -104,13 +104,17 @@ * * Each dquot has its dq_lock semaphore. Locked dquots might not be referenced * from inodes (dquot_alloc_space() and such don't check the dq_lock). - * Currently dquot is locked only when it is being read to memory on the first - * dqget(). Write operations on dquots don't hold dq_lock as they copy data - * under dq_data_lock spinlock to internal buffers before writing. + * Currently dquot is locked only when it is being read to memory (or space for + * it is being allocated) on the first dqget() and when it is being released on + * the last dqput(). The allocation and release oparations are serialized by + * the dq_lock and by checking the use count in dquot_release(). Write + * operations on dquots don't hold dq_lock as they copy data under dq_data_lock + * spinlock to internal buffers before writing. * * Lock ordering (including journal_lock) is following: * dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_lock > dqio_sem */ + spinlock_t dq_list_lock = SPIN_LOCK_UNLOCKED; spinlock_t dq_data_lock = SPIN_LOCK_UNLOCKED; @@ -256,6 +260,9 @@ static inline void remove_inuse(struct d dqstats.allocated_dquots--; list_del(&dquot->dq_inuse); } +/* + * End of list functions needing dq_list_lock + */ static void wait_on_dquot(struct dquot *dquot) { @@ -263,34 +270,98 @@ static void wait_on_dquot(struct dquot * up(&dquot->dq_lock); } -static int read_dqblk(struct dquot *dquot) +#define mark_dquot_dirty(dquot) ((dquot)->dq_sb->dq_op->mark_dirty(dquot)) + +/* No locks needed here as ANY_DQUOT_DIRTY is used just by sync and so the + * worst what can happen is that dquot is not written by concurrent sync... */ +int dquot_mark_dquot_dirty(struct dquot *dquot) +{ + set_bit(DQ_MOD_B, &(dquot)->dq_flags); + set_bit(DQF_ANY_DQUOT_DIRTY_B, &(sb_dqopt((dquot)->dq_sb)-> + info[(dquot)->dq_type].dqi_flags)); + return 0; +} + +void mark_info_dirty(struct super_block *sb, int type) { - int ret; + set_bit(DQF_INFO_DIRTY_B, &sb_dqopt(sb)->info[type].dqi_flags); +} +EXPORT_SYMBOL(mark_info_dirty); + +/* + * Read dquot from disk and alloc space for it + */ + +int dquot_acquire(struct dquot *dquot) +{ + int ret = 0; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); down(&dquot->dq_lock); down(&dqopt->dqio_sem); - ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot); + if (!test_bit(DQ_READ_B, &dquot->dq_flags)) + ret = dqopt->ops[dquot->dq_type]->read_dqblk(dquot); + if (ret < 0) + goto out_iolock; + set_bit(DQ_READ_B, &dquot->dq_flags); + /* Instantiate dquot if needed */ + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) { + ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); + if (ret < 0) + goto out_iolock; + } + set_bit(DQ_ACTIVE_B, &dquot->dq_flags); +out_iolock: up(&dqopt->dqio_sem); up(&dquot->dq_lock); return ret; } -static int commit_dqblk(struct dquot *dquot) +/* + * Write dquot to disk + */ +int dquot_commit(struct dquot *dquot) { - int ret; + int ret = 0; struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); down(&dqopt->dqio_sem); - ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); + clear_bit(DQ_MOD_B, &dquot->dq_flags); + /* Inactive dquot can be only if there was error during read/init + * => we have better not writing it */ + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) + ret = dqopt->ops[dquot->dq_type]->commit_dqblk(dquot); up(&dqopt->dqio_sem); + if (info_dirty(&dqopt->info[dquot->dq_type])) + dquot->dq_sb->dq_op->write_info(dquot->dq_sb, dquot->dq_type); + return ret; +} + +/* + * Release dquot + */ +int dquot_release(struct dquot *dquot) +{ + int ret = 0; + struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); + + down(&dquot->dq_lock); + /* Check whether we are not racing with some other dqget() */ + if (atomic_read(&dquot->dq_count) > 1) + goto out_dqlock; + down(&dqopt->dqio_sem); + ret = dqopt->ops[dquot->dq_type]->release_dqblk(dquot); + clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); + up(&dqopt->dqio_sem); +out_dqlock: + up(&dquot->dq_lock); return ret; } /* Invalidate all dquots on the list. Note that this function is called after - * quota is disabled so no new quota might be created. Because we hold - * dqonoff_sem and pointers were already removed from inodes we actually know - * that no quota for this sb+type should be held. */ + * quota is disabled and pointers from inodes removed so there cannot be new + * quota users. Also because we hold dqonoff_sem there can be no quota users + * for this sb+type at all. */ static void invalidate_dquots(struct super_block *sb, int type) { struct dquot *dquot; @@ -317,7 +388,7 @@ static void invalidate_dquots(struct sup spin_unlock(&dq_list_lock); } -static int vfs_quota_sync(struct super_block *sb, int type) +int vfs_quota_sync(struct super_block *sb, int type) { struct list_head *head; struct dquot *dquot; @@ -328,9 +399,11 @@ static int vfs_quota_sync(struct super_b restart: /* At this point any dirty dquot will definitely be written so we can clear dirty flag from info */ + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt)) clear_bit(DQF_ANY_DQUOT_DIRTY_B, &dqopt->info[cnt].dqi_flags); + spin_unlock(&dq_data_lock); spin_lock(&dq_list_lock); list_for_each(head, &inuse_list) { dquot = list_entry(head, struct dquot, dq_inuse); @@ -338,10 +411,13 @@ restart: continue; if (type != -1 && dquot->dq_type != type) continue; - if (!dquot->dq_sb) /* Invalidated? */ - continue; if (!dquot_dirty(dquot)) continue; + /* Dirty and inactive can be only bad dquot... */ + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) + continue; + /* Now we have active dquot from which someone is holding reference so we + * can safely just increase use count */ atomic_inc(&dquot->dq_count); dqstats.lookups++; spin_unlock(&dq_list_lock); @@ -352,11 +428,9 @@ restart: spin_unlock(&dq_list_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) - if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) && info_dirty(&dqopt->info[cnt])) { - down(&dqopt->dqio_sem); - dqopt->ops[cnt]->write_file_info(sb, cnt); - up(&dqopt->dqio_sem); - } + if ((cnt == type || type == -1) && sb_has_quota_enabled(sb, cnt) + && info_dirty(&dqopt->info[cnt])) + sb->dq_op->write_info(sb, cnt); spin_lock(&dq_list_lock); dqstats.syncs++; spin_unlock(&dq_list_lock); @@ -431,11 +505,20 @@ we_slept: spin_unlock(&dq_list_lock); return; } - if (dquot_dirty(dquot)) { + /* Need to release dquot? */ + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); + /* Commit dquot before releasing */ dquot->dq_sb->dq_op->write_dquot(dquot); goto we_slept; } + /* Clear flag in case dquot was inactive (something bad happened) */ + clear_bit(DQ_MOD_B, &dquot->dq_flags); + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + dquot_release(dquot); + goto we_slept; + } atomic_dec(&dquot->dq_count); #ifdef __DQUOT_PARANOIA /* sanity check */ @@ -494,7 +577,6 @@ we_slept: insert_dquot_hash(dquot); dqstats.lookups++; spin_unlock(&dq_list_lock); - read_dqblk(dquot); } else { if (!atomic_read(&dquot->dq_count)) remove_free_dquot(dquot); @@ -502,11 +584,17 @@ we_slept: dqstats.cache_hits++; dqstats.lookups++; spin_unlock(&dq_list_lock); - wait_on_dquot(dquot); if (empty) kmem_cache_free(dquot_cachep, empty); } - + /* Wait for dq_lock - after this we know that either dquot_release() is already + * finished or it will be canceled due to dq_count > 1 test */ + wait_on_dquot(dquot); + /* Read the dquot and instantiate it (everything done only if needed) */ + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_acquire(dquot) < 0) { + dqput(dquot); + return NODQUOT; + } #ifdef __DQUOT_PARANOIA if (!dquot->dq_sb) /* Has somebody invalidated entry under us? */ BUG(); @@ -540,12 +628,10 @@ restart: struct file *filp = list_entry(p, struct file, f_list); struct inode *inode = filp->f_dentry->d_inode; if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) { - struct vfsmount *mnt = mntget(filp->f_vfsmnt); struct dentry *dentry = dget(filp->f_dentry); file_list_unlock(); sb->dq_op->initialize(inode, type); dput(dentry); - mntput(mnt); /* As we may have blocked we had better restart... */ goto restart; } @@ -627,13 +713,11 @@ static void drop_dquot_ref(struct super_ static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number) { dquot->dq_dqb.dqb_curinodes += number; - mark_dquot_dirty(dquot); } static inline void dquot_incr_space(struct dquot *dquot, qsize_t number) { dquot->dq_dqb.dqb_curspace += number; - mark_dquot_dirty(dquot); } static inline void dquot_decr_inodes(struct dquot *dquot, unsigned long number) @@ -645,7 +729,6 @@ static inline void dquot_decr_inodes(str if (dquot->dq_dqb.dqb_curinodes < dquot->dq_dqb.dqb_isoftlimit) dquot->dq_dqb.dqb_itime = (time_t) 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); - mark_dquot_dirty(dquot); } static inline void dquot_decr_space(struct dquot *dquot, qsize_t number) @@ -657,7 +740,6 @@ static inline void dquot_decr_space(stru if (toqb(dquot->dq_dqb.dqb_curspace) < dquot->dq_dqb.dqb_bsoftlimit) dquot->dq_dqb.dqb_btime = (time_t) 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); - mark_dquot_dirty(dquot); } static inline int need_print_warning(struct dquot *dquot) @@ -810,25 +892,22 @@ static int check_bdq(struct dquot *dquot } /* - * Externally referenced functions through dquot_operations in inode. - * - * Note: this is a blocking operation. + * Initialize quota pointers in inode + * Transaction must be started at entry */ -void dquot_initialize(struct inode *inode, int type) +int dquot_initialize(struct inode *inode, int type) { unsigned int id = 0; - int cnt; + int cnt, ret = 0; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) - return; + return 0; down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Having dqptr_sem we know NOQUOTA flags can't be altered... */ - if (IS_NOQUOTA(inode)) { - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - return; - } - /* Build list of quotas to initialize... */ + if (IS_NOQUOTA(inode)) + goto out_err; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; @@ -846,14 +925,16 @@ void dquot_initialize(struct inode *inod inode->i_flags |= S_QUOTA; } } +out_err: up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + return ret; } /* * Release all quotas referenced by inode * Transaction must be started at an entry */ -void dquot_drop(struct inode *inode) +int dquot_drop(struct inode *inode) { int cnt; @@ -866,9 +947,19 @@ void dquot_drop(struct inode *inode) } } up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + return 0; } /* + * Following four functions update i_blocks+i_bytes fields and + * quota information (together with appropriate checks) + * NOTE: We absolutely rely on the fact that caller dirties + * the inode (usually macros in quotaops.h care about this) and + * holds a handle for the current transaction so that dquot write and + * inode write go into the same transaction. + */ + +/* * This operation can block, but only after everything is updated */ int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) @@ -876,8 +967,10 @@ int dquot_alloc_space(struct inode *inod int cnt, ret = NO_QUOTA; char warntype[MAXQUOTAS]; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) { +out_add: inode_add_bytes(inode, number); return QUOTA_OK; } @@ -885,10 +978,11 @@ int dquot_alloc_space(struct inode *inod warntype[cnt] = NOWARN; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + if (IS_NOQUOTA(inode)) { /* Now we can do reliable test... */ + up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + goto out_add; + } spin_lock(&dq_data_lock); - /* Now recheck reliably when holding dqptr_sem */ - if (IS_NOQUOTA(inode)) - goto add_bytes; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] == NODQUOT) continue; @@ -900,11 +994,15 @@ int dquot_alloc_space(struct inode *inod continue; dquot_incr_space(inode->i_dquot[cnt], number); } -add_bytes: inode_add_bytes(inode, number); ret = QUOTA_OK; warn_put_all: spin_unlock(&dq_data_lock); + if (ret == QUOTA_OK) + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt]) + mark_dquot_dirty(inode->i_dquot[cnt]); flush_warnings(inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return ret; @@ -918,13 +1016,13 @@ int dquot_alloc_inode(const struct inode int cnt, ret = NO_QUOTA; char warntype[MAXQUOTAS]; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) return QUOTA_OK; for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = NOWARN; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; @@ -945,6 +1043,11 @@ int dquot_alloc_inode(const struct inode ret = QUOTA_OK; warn_put_all: spin_unlock(&dq_data_lock); + if (ret == QUOTA_OK) + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt]) + mark_dquot_dirty(inode->i_dquot[cnt]); flush_warnings((struct dquot **)inode->i_dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return ret; @@ -953,46 +1056,55 @@ warn_put_all: /* * This is a non-blocking operation. */ -void dquot_free_space(struct inode *inode, qsize_t number) +int dquot_free_space(struct inode *inode, qsize_t number) { unsigned int cnt; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) { +out_sub: inode_sub_bytes(inode, number); - return; + return QUOTA_OK; } down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - spin_lock(&dq_data_lock); /* Now recheck reliably when holding dqptr_sem */ - if (IS_NOQUOTA(inode)) - goto sub_bytes; + if (IS_NOQUOTA(inode)) { + up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + goto out_sub; + } + spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt] == NODQUOT) continue; dquot_decr_space(inode->i_dquot[cnt], number); } -sub_bytes: inode_sub_bytes(inode, number); spin_unlock(&dq_data_lock); + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt]) + mark_dquot_dirty(inode->i_dquot[cnt]); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + return QUOTA_OK; } /* * This is a non-blocking operation. */ -void dquot_free_inode(const struct inode *inode, unsigned long number) +int dquot_free_inode(const struct inode *inode, unsigned long number) { unsigned int cnt; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) - return; + return QUOTA_OK; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); - return; + return QUOTA_OK; } spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1001,7 +1113,12 @@ void dquot_free_inode(const struct inode dquot_decr_inodes(inode->i_dquot[cnt], number); } spin_unlock(&dq_data_lock); + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (inode->i_dquot[cnt]) + mark_dquot_dirty(inode->i_dquot[cnt]); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); + return QUOTA_OK; } /* @@ -1018,7 +1135,8 @@ int dquot_transfer(struct inode *inode, chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; char warntype[MAXQUOTAS]; - /* Solve deadlock when we recurse when holding dqptr_sem... */ + /* First test before acquiring semaphore - solves deadlocks when we + * re-enter the quota code and are already holding the semaphore */ if (IS_NOQUOTA(inode)) return QUOTA_OK; /* Clear the arrays */ @@ -1026,15 +1144,15 @@ int dquot_transfer(struct inode *inode, transfer_to[cnt] = transfer_from[cnt] = NODQUOT; warntype[cnt] = NOWARN; } - down(&sb_dqopt(inode->i_sb)->dqonoff_sem); down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); /* Now recheck reliably when holding dqptr_sem */ if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); - up(&sb_dqopt(inode->i_sb)->dqonoff_sem); return QUOTA_OK; } - /* First build the transfer_to list - here we can block on reading of dquots... */ + /* First build the transfer_to list - here we can block on + * reading/instantiating of dquots. We know that the transaction for + * us was already started so we don't violate lock ranking here */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { switch (cnt) { case USRQUOTA: @@ -1082,7 +1200,13 @@ int dquot_transfer(struct inode *inode, ret = QUOTA_OK; warn_put_all: spin_unlock(&dq_data_lock); - up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + /* Dirtify all the dquots - this can block when journalling */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (transfer_from[cnt]) + mark_dquot_dirty(transfer_from[cnt]); + if (transfer_to[cnt]) + mark_dquot_dirty(transfer_to[cnt]); + } flush_warnings(transfer_to, warntype); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1091,7 +1215,21 @@ warn_put_all: if (ret == NO_QUOTA && transfer_to[cnt] != NODQUOT) dqput(transfer_to[cnt]); } - up(&sb_dqopt(inode->i_sb)->dqonoff_sem); + up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); + return ret; +} + +/* + * Write info of quota file to disk + */ +int dquot_commit_info(struct super_block *sb, int type) +{ + int ret; + struct quota_info *dqopt = sb_dqopt(sb); + + down(&dqopt->dqio_sem); + ret = dqopt->ops[type]->write_file_info(sb, type); + up(&dqopt->dqio_sem); return ret; } @@ -1099,22 +1237,18 @@ warn_put_all: * Definitions of diskquota operations. */ struct dquot_operations dquot_operations = { - .initialize = dquot_initialize, /* mandatory */ - .drop = dquot_drop, /* mandatory */ + .initialize = dquot_initialize, + .drop = dquot_drop, .alloc_space = dquot_alloc_space, .alloc_inode = dquot_alloc_inode, .free_space = dquot_free_space, .free_inode = dquot_free_inode, .transfer = dquot_transfer, - .write_dquot = commit_dqblk + .write_dquot = dquot_commit, + .mark_dirty = dquot_mark_dquot_dirty, + .write_info = dquot_commit_info }; -/* Function used by filesystems for initializing the dquot_operations structure */ -void init_dquot_operations(struct dquot_operations *fsdqops) -{ - memcpy(fsdqops, &dquot_operations, sizeof(dquot_operations)); -} - static inline void set_enable_flags(struct quota_info *dqopt, int type) { switch (type) { @@ -1166,17 +1300,14 @@ int vfs_quota_off(struct super_block *sb * Now all dquots should be invalidated, all writes done so we should be only * users of the info. No locks needed. */ - if (info_dirty(&dqopt->info[cnt])) { - down(&dqopt->dqio_sem); - dqopt->ops[cnt]->write_file_info(sb, cnt); - up(&dqopt->dqio_sem); - } + if (info_dirty(&dqopt->info[cnt])) + sb->dq_op->write_info(sb, cnt); if (dqopt->ops[cnt]->free_file_info) dqopt->ops[cnt]->free_file_info(sb, cnt); put_quota_format(dqopt->info[cnt].dqi_format); fput(dqopt->files[cnt]); - dqopt->files[cnt] = (struct file *)NULL; + dqopt->files[cnt] = NULL; dqopt->info[cnt].dqi_flags = 0; dqopt->info[cnt].dqi_igrace = 0; dqopt->info[cnt].dqi_bgrace = 0; @@ -1187,33 +1318,30 @@ out: return 0; } -int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) +/* + * Turn quotas on on a device + */ + +/* Helper function when we already have file open */ +static int vfs_quota_on_file(struct file *f, int type, int format_id) { - struct file *f; + struct quota_format_type *fmt = find_quota_format(format_id); struct inode *inode; + struct super_block *sb = f->f_dentry->d_sb; struct quota_info *dqopt = sb_dqopt(sb); - struct quota_format_type *fmt = find_quota_format(format_id); - int error, cnt; struct dquot *to_drop[MAXQUOTAS]; + int error, cnt; unsigned int oldflags; if (!fmt) return -ESRCH; - f = filp_open(path, O_RDWR, 0600); - if (IS_ERR(f)) { - error = PTR_ERR(f); - goto out_fmt; - } error = -EIO; if (!f->f_op || !f->f_op->read || !f->f_op->write) - goto out_f; - error = security_quota_on(f); - if (error) - goto out_f; + goto out_fmt; inode = f->f_dentry->d_inode; error = -EACCES; if (!S_ISREG(inode->i_mode)) - goto out_f; + goto out_fmt; down(&dqopt->dqonoff_sem); if (sb_has_quota_enabled(sb, type)) { @@ -1235,7 +1363,7 @@ int vfs_quota_on(struct super_block *sb, inode->i_flags &= ~S_QUOTA; up_write(&dqopt->dqptr_sem); /* We must put dquots outside of dqptr_sem because we may need to - * start transaction for write */ + * start transaction for dquot_release() */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (to_drop[cnt]) dqput(to_drop[cnt]); @@ -1262,14 +1390,58 @@ out_file_init: out_lock: up_write(&dqopt->dqptr_sem); up(&dqopt->dqonoff_sem); -out_f: - filp_close(f, NULL); out_fmt: put_quota_format(fmt); return error; } +/* Actual function called from quotactl() */ +int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path) +{ + struct file *f; + int error; + + f = filp_open(path, O_RDWR, 0600); + if (IS_ERR(f)) + return PTR_ERR(f); + error = security_quota_on(f); + if (error) + goto out_f; + error = vfs_quota_on_file(f, type, format_id); + if (!error) + return 0; +out_f: + filp_close(f, NULL); + return error; +} + +/* + * Function used by filesystems when filp_open() would fail (filesystem is + * being mounted now). We will use a private file structure. Caller is + * responsible that it's IO functions won't need vfsmnt structure or + * some dentry tricks... + */ +int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry) +{ + struct file *f; + int error; + + dget(dentry); /* Get a reference for struct file */ + f = dentry_open(dentry, NULL, O_RDWR); + if (IS_ERR(f)) { + error = PTR_ERR(f); + goto out_dentry; + } + error = vfs_quota_on_file(f, type, format_id); + if (!error) + return 0; + fput(f); +out_dentry: + dput(dentry); + return error; +} + /* Generic routine for getting common part of quota structure */ static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) { @@ -1353,8 +1525,8 @@ static void do_set_dqblk(struct dquot *d clear_bit(DQ_FAKE_B, &dquot->dq_flags); else set_bit(DQ_FAKE_B, &dquot->dq_flags); - mark_dquot_dirty(dquot); spin_unlock(&dq_data_lock); + mark_dquot_dirty(dquot); } int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di) @@ -1411,8 +1583,10 @@ int vfs_set_dqinfo(struct super_block *s mi->dqi_igrace = ii->dqi_igrace; if (ii->dqi_valid & IIF_FLAGS) mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK); - mark_info_dirty(mi); spin_unlock(&dq_data_lock); + mark_info_dirty(sb, type); + /* Force write to disk */ + sb->dq_op->write_info(sb, type); up(&sb_dqopt(sb)->dqonoff_sem); return 0; } @@ -1529,12 +1703,9 @@ static int __init dquot_init(void) dquot_cachep = kmem_cache_create("dquot", sizeof(struct dquot), sizeof(unsigned long) * 4, - SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, NULL, NULL); - if (!dquot_cachep) - panic("Cannot create dquot SLAB cache"); - + SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, + NULL, NULL); set_shrinker(DEFAULT_SEEKS, shrink_dqcache_memory); - return 0; } module_init(dquot_init); @@ -1544,4 +1715,21 @@ EXPORT_SYMBOL(unregister_quota_format); EXPORT_SYMBOL(dqstats); EXPORT_SYMBOL(dq_list_lock); EXPORT_SYMBOL(dq_data_lock); -EXPORT_SYMBOL(init_dquot_operations); +EXPORT_SYMBOL(vfs_quota_on); +EXPORT_SYMBOL(vfs_quota_on_mount); +EXPORT_SYMBOL(vfs_quota_off); +EXPORT_SYMBOL(vfs_quota_sync); +EXPORT_SYMBOL(vfs_get_dqinfo); +EXPORT_SYMBOL(vfs_set_dqinfo); +EXPORT_SYMBOL(vfs_get_dqblk); +EXPORT_SYMBOL(vfs_set_dqblk); +EXPORT_SYMBOL(dquot_commit); +EXPORT_SYMBOL(dquot_commit_info); +EXPORT_SYMBOL(dquot_mark_dquot_dirty); +EXPORT_SYMBOL(dquot_initialize); +EXPORT_SYMBOL(dquot_drop); +EXPORT_SYMBOL(dquot_alloc_space); +EXPORT_SYMBOL(dquot_alloc_inode); +EXPORT_SYMBOL(dquot_free_space); +EXPORT_SYMBOL(dquot_free_inode); +EXPORT_SYMBOL(dquot_transfer); --- linux-2.6.6-rc1/fs/efs/super.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/efs/super.c 2004-04-18 22:25:24.918042360 -0700 @@ -77,12 +77,19 @@ void efs_put_super(struct super_block *s s->s_fs_info = NULL; } +static int efs_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_RDONLY; + return 0; +} + static struct super_operations efs_superblock_operations = { .alloc_inode = efs_alloc_inode, .destroy_inode = efs_destroy_inode, .read_inode = efs_read_inode, .put_super = efs_put_super, .statfs = efs_statfs, + .remount_fs = efs_remount, }; static int __init init_efs_fs(void) { --- linux-2.6.6-rc1/fs/eventpoll.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/eventpoll.c 2004-04-18 22:25:32.691860560 -0700 @@ -1695,22 +1695,14 @@ static int __init eventpoll_init(void) ep_poll_safewake_init(&psw); /* Allocates slab cache used to allocate "struct epitem" items */ - error = -ENOMEM; - epi_cache = kmem_cache_create("eventpoll_epi", - sizeof(struct epitem), - 0, - SLAB_HWCACHE_ALIGN | EPI_SLAB_DEBUG, NULL, NULL); - if (!epi_cache) - goto eexit_1; + epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem), + 0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC, + NULL, NULL); /* Allocates slab cache used to allocate "struct eppoll_entry" */ - error = -ENOMEM; pwq_cache = kmem_cache_create("eventpoll_pwq", - sizeof(struct eppoll_entry), - 0, - EPI_SLAB_DEBUG, NULL, NULL); - if (!pwq_cache) - goto eexit_2; + sizeof(struct eppoll_entry), 0, + EPI_SLAB_DEBUG|SLAB_PANIC, NULL, NULL); /* * Register the virtual file system that will be the source of inodes @@ -1718,27 +1710,20 @@ static int __init eventpoll_init(void) */ error = register_filesystem(&eventpoll_fs_type); if (error) - goto eexit_3; + goto epanic; /* Mount the above commented virtual file system */ eventpoll_mnt = kern_mount(&eventpoll_fs_type); error = PTR_ERR(eventpoll_mnt); if (IS_ERR(eventpoll_mnt)) - goto eexit_4; - - DNPRINTK(3, (KERN_INFO "[%p] eventpoll: successfully initialized.\n", current)); + goto epanic; + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: successfully initialized.\n", + current)); return 0; -eexit_4: - unregister_filesystem(&eventpoll_fs_type); -eexit_3: - kmem_cache_destroy(pwq_cache); -eexit_2: - kmem_cache_destroy(epi_cache); -eexit_1: - - return error; +epanic: + panic("eventpoll_init() failed\n"); } @@ -1755,4 +1740,3 @@ module_init(eventpoll_init); module_exit(eventpoll_exit); MODULE_LICENSE("GPL"); - --- linux-2.6.6-rc1/fs/exec.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/exec.c 2004-04-18 22:26:02.146382792 -0700 @@ -48,7 +48,6 @@ #include #include -#include #include #ifdef CONFIG_KMOD @@ -293,53 +292,42 @@ EXPORT_SYMBOL(copy_strings_kernel); * This routine is used to map in a page into an address space: needed by * execve() for the initial stack and environment pages. * - * tsk->mmap_sem is held for writing. + * tsk->mm->mmap_sem is held for writing. */ void put_dirty_page(struct task_struct *tsk, struct page *page, unsigned long address, pgprot_t prot) { + struct mm_struct *mm = tsk->mm; pgd_t * pgd; pmd_t * pmd; pte_t * pte; - struct pte_chain *pte_chain; - if (page_count(page) != 1) - printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", - page, address); - - pgd = pgd_offset(tsk->mm, address); - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto out_sig; - spin_lock(&tsk->mm->page_table_lock); - pmd = pmd_alloc(tsk->mm, pgd, address); + pgd = pgd_offset(mm, address); + spin_lock(&mm->page_table_lock); + pmd = pmd_alloc(mm, pgd, address); if (!pmd) goto out; - pte = pte_alloc_map(tsk->mm, pmd, address); + pte = pte_alloc_map(mm, pmd, address); if (!pte) goto out; if (!pte_none(*pte)) { pte_unmap(pte); goto out; } + mm->rss++; lru_cache_add_active(page); flush_dcache_page(page); set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot)))); - pte_chain = page_add_rmap(page, pte, pte_chain); + page_add_anon_rmap(page, mm, address); pte_unmap(pte); - tsk->mm->rss++; - spin_unlock(&tsk->mm->page_table_lock); + spin_unlock(&mm->page_table_lock); /* no need for flush_tlb */ - pte_chain_free(pte_chain); return; out: - spin_unlock(&tsk->mm->page_table_lock); -out_sig: + spin_unlock(&mm->page_table_lock); __free_page(page); force_sig(SIGKILL, tsk); - pte_chain_free(pte_chain); - return; } int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) @@ -438,6 +426,7 @@ int setup_arg_pages(struct linux_binprm mpnt->vm_ops = NULL; mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; + mpol_set_vma_default(mpnt); INIT_LIST_HEAD(&mpnt->shared); mpnt->vm_private_data = (void *) 0; insert_vm_struct(mm, mpnt); @@ -609,7 +598,9 @@ static inline int de_thread(struct task_ newsig->group_stop_count = 0; newsig->curr_target = NULL; init_sigpending(&newsig->shared_pending); + INIT_LIST_HEAD(&newsig->posix_timers); + newsig->tty = oldsig->tty; newsig->pgrp = oldsig->pgrp; newsig->session = oldsig->session; newsig->leader = oldsig->leader; @@ -846,7 +837,8 @@ int flush_old_exec(struct linux_binprm * flush_thread(); if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || - permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL)) + permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) || + (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) current->mm->dumpable = 0; /* An exec changes our domain. We are no longer part of the thread @@ -868,15 +860,6 @@ out: EXPORT_SYMBOL(flush_old_exec); -/* - * We mustn't allow tracing of suid binaries, unless - * the tracer has the capability to trace anything.. - */ -static inline int must_not_trace_exec(struct task_struct * p) -{ - return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP); -} - /* * Fill the binprm structure from the inode. * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes @@ -943,27 +926,7 @@ EXPORT_SYMBOL(prepare_binprm); void compute_creds(struct linux_binprm *bprm) { - task_lock(current); - if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) { - current->mm->dumpable = 0; - - if (must_not_trace_exec(current) - || atomic_read(¤t->fs->count) > 1 - || atomic_read(¤t->files->count) > 1 - || atomic_read(¤t->sighand->count) > 1) { - if(!capable(CAP_SETUID)) { - bprm->e_uid = current->uid; - bprm->e_gid = current->gid; - } - } - } - - current->suid = current->euid = current->fsuid = bprm->e_uid; - current->sgid = current->egid = current->fsgid = bprm->e_gid; - - task_unlock(current); - - security_bprm_compute_creds(bprm); + security_bprm_apply_creds(bprm); } EXPORT_SYMBOL(compute_creds); @@ -1118,6 +1081,7 @@ int do_execve(char * filename, bprm.file = file; bprm.filename = filename; bprm.interp = filename; + bprm.interp_flags = 0; bprm.sh_bang = 0; bprm.loader = 0; bprm.exec = 0; --- linux-2.6.6-rc1/fs/ext2/inode.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/ext2/inode.c 2004-04-18 22:25:57.345112696 -0700 @@ -655,7 +655,7 @@ ext2_get_blocks(struct inode *inode, sec return ret; } -static int +static ssize_t ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { --- linux-2.6.6-rc1/fs/ext3/balloc.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/ext3/balloc.c 2004-04-18 22:25:53.617679352 -0700 @@ -96,9 +96,87 @@ read_block_bitmap(struct super_block *sb error_out: return bh; } +/* + * The reservation window structure operations + * -------------------------------------------- + * Operations include: + * dump, find, add, remove, is_empty, find_next_reservable_window, etc. + * + * We use sorted double linked list for the per-filesystem reservation + * window list. (like in vm_region). + * + * Initially, we keep those small operations in the abstract functions, + * so later if we need a better searching tree than double linked-list, + * we could easily switch to that without changing too much + * code. + */ +static inline void rsv_window_dump(struct reserve_window *head, char *fn) +{ + struct reserve_window *rsv; + + printk("Block Allocation Reservation Windows Map (%s):\n", fn); + list_for_each_entry(rsv, &head->rsv_list, rsv_list) { + printk("reservation window 0x%p start: %d, end: %d\n", + rsv, rsv->rsv_start, rsv->rsv_end); + } +} + +static int +goal_in_my_reservation(struct reserve_window *rsv, int goal, + unsigned int group, struct super_block * sb) +{ + unsigned long group_first_block, group_last_block; + + group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + + group * EXT3_BLOCKS_PER_GROUP(sb); + group_last_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; + + if ((rsv->rsv_start > group_last_block) || + (rsv->rsv_end < group_first_block)) + return 0; + if ((goal >= 0) && ((goal + group_first_block < rsv->rsv_start) + || (goal + group_first_block > rsv->rsv_end))) + return 0; + return 1; +} + +static inline void rsv_window_add(struct reserve_window *rsv, + struct reserve_window *prev) +{ + /* insert the new reservation window after the head */ + list_add(&rsv->rsv_list, &prev->rsv_list); +} + +static inline void rsv_window_remove(struct reserve_window *rsv) +{ + rsv->rsv_start = 0; + rsv->rsv_end = 0; + rsv->rsv_alloc_hit = 0; + list_del(&rsv->rsv_list); + INIT_LIST_HEAD(&rsv->rsv_list); +} + +static inline int rsv_is_empty(struct reserve_window *rsv) +{ + /* a valid reservation end block could not be 0 */ + return (rsv->rsv_end == 0); +} + +void ext3_discard_reservation(struct inode *inode) +{ + struct ext3_inode_info *ei = EXT3_I(inode); + struct reserve_window *rsv = &ei->i_rsv_window; + spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + + if (!rsv_is_empty(rsv)) { + spin_lock(rsv_lock); + rsv_window_remove(rsv); + spin_unlock(rsv_lock); + } +} /* Free given blocks, update quota and i_blocks field */ -void ext3_free_blocks (handle_t *handle, struct inode * inode, +void ext3_free_blocks(handle_t *handle, struct inode *inode, unsigned long block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; @@ -296,7 +374,7 @@ error_return: * data-writes at some point, and disable it for metadata allocations or * sync-data inodes. */ -static inline int ext3_test_allocatable(int nr, struct buffer_head *bh) +static int ext3_test_allocatable(int nr, struct buffer_head *bh) { int ret; struct journal_head *jh = bh2jh(bh); @@ -313,6 +391,33 @@ static inline int ext3_test_allocatable( return ret; } +static int +bitmap_search_next_usable_block(int start, struct buffer_head *bh, + int maxblocks) +{ + int next; + struct journal_head *jh = bh2jh(bh); + + /* + * The bitmap search --- search forward alternately through the actual + * bitmap and the last-committed copy until we find a bit free in + * both + */ + while (start < maxblocks) { + next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start); + if (next >= maxblocks) + return -1; + if (ext3_test_allocatable(next, bh)) + return next; + jbd_lock_bh_state(bh); + if (jh->b_committed_data) + start = ext3_find_next_zero_bit(jh->b_committed_data, + maxblocks, next); + jbd_unlock_bh_state(bh); + } + return -1; +} + /* * Find an allocatable block in a bitmap. We honour both the bitmap and * its last-committed copy (if that exists), and perform the "most @@ -325,7 +430,6 @@ find_next_usable_block(int start, struct { int here, next; char *p, *r; - struct journal_head *jh = bh2jh(bh); if (start > 0) { /* @@ -337,6 +441,8 @@ find_next_usable_block(int start, struct * next 64-bit boundary is simple.. */ int end_goal = (start + 63) & ~63; + if (end_goal > maxblocks) + end_goal = maxblocks; here = ext3_find_next_zero_bit(bh->b_data, end_goal, start); if (here < end_goal && ext3_test_allocatable(here, bh)) return here; @@ -359,19 +465,8 @@ find_next_usable_block(int start, struct * bitmap and the last-committed copy until we find a bit free in * both */ - while (here < maxblocks) { - next = ext3_find_next_zero_bit(bh->b_data, maxblocks, here); - if (next >= maxblocks) - return -1; - if (ext3_test_allocatable(next, bh)) - return next; - jbd_lock_bh_state(bh); - if (jh->b_committed_data) - here = ext3_find_next_zero_bit(jh->b_committed_data, - maxblocks, next); - jbd_unlock_bh_state(bh); - } - return -1; + here = bitmap_search_next_usable_block(here, bh, maxblocks); + return here; } /* @@ -407,62 +502,445 @@ claim_block(spinlock_t *lock, int block, */ static int ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, - struct buffer_head *bitmap_bh, int goal, int *errp) + struct buffer_head *bitmap_bh, int goal, struct reserve_window *my_rsv) { - int i; - int fatal; - int credits = 0; - - *errp = 0; + int group_first_block, start, end; - /* - * Make sure we use undo access for the bitmap, because it is critical - * that we do the frozen_data COW on bitmap buffers in all cases even - * if the buffer is in BJ_Forget state in the committing transaction. - */ - BUFFER_TRACE(bitmap_bh, "get undo access for new block"); - fatal = ext3_journal_get_undo_access(handle, bitmap_bh, &credits); - if (fatal) { - *errp = fatal; - goto fail; + /* we do allocation within the reservation window if we have a window */ + if (my_rsv) { + group_first_block = + le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + + group * EXT3_BLOCKS_PER_GROUP(sb); + if (my_rsv->rsv_start >= group_first_block) + start = my_rsv->rsv_start - group_first_block; + else + /* reservation window cross group boundary */ + start = 0; + end = my_rsv->rsv_end - group_first_block + 1; + if (end > EXT3_BLOCKS_PER_GROUP(sb)) + /* reservation window crosses group boundary */ + end = EXT3_BLOCKS_PER_GROUP(sb); + if ((start <= goal) && (goal < end)) + start = goal; + else + goal = -1; + } else { + if (goal > 0) + start = goal; + else + start = 0; + end = EXT3_BLOCKS_PER_GROUP(sb); } + BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb)); + repeat: if (goal < 0 || !ext3_test_allocatable(goal, bitmap_bh)) { - goal = find_next_usable_block(goal, bitmap_bh, - EXT3_BLOCKS_PER_GROUP(sb)); + goal = find_next_usable_block(start, bitmap_bh, end); if (goal < 0) goto fail_access; - - for (i = 0; i < 7 && goal > 0 && - ext3_test_allocatable(goal - 1, bitmap_bh); - i++, goal--); } + start = goal; if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) { /* * The block was allocated by another thread, or it was * allocated and then freed by another thread */ + start++; goal++; - if (goal >= EXT3_BLOCKS_PER_GROUP(sb)) + if (start >= end) goto fail_access; goto repeat; } + if (my_rsv) + my_rsv->rsv_alloc_hit++; + return goal; +fail_access: + return -1; +} + +/** + * find_next_reservable_window(): + * find a reservable space within the given range + * It does not allocate the reservation window for now + * alloc_new_reservation() will do the work later. + * + * @search_head: the head of the searching list; + * This is not necessary the list head of the whole filesystem + * + * we have both head and start_block to assist the search + * for the reservable space. The list start from head, + * but we will shift to the place where start_block is, + * then start from there, we looking for a resevable space. + * + * @fs_rsv_head: per-filesystem reervation list head. + * + * @size: the target new reservation window size + * @group_first_block: the first block we consider to start + * the real search from + * + * @last_block: + * the maxium block number that our goal reservable space + * could start from. This is normally the last block in this + * group. The search will end when we found the start of next + * possiblereservable space is out of this boundary. + * This could handle the cross bounday reservation window request. + * + * basically we search from the given range, rather than the whole + * reservation double linked list, (start_block, last_block) + * to find a free region that of of my size and has not + * been reserved. + * + * on succeed, it returns the reservation window to be append to. + * failed, return NULL. + */ +static inline +struct reserve_window *find_next_reservable_window( + struct reserve_window *search_head, + struct reserve_window *fs_rsv_head, + unsigned long size, int *start_block, + int last_block) +{ + struct reserve_window *rsv; + int cur; + + /* TODO:make the start of the reservation window byte alligned */ + /*cur = *start_block & 8;*/ + cur = *start_block; + rsv = list_entry(search_head->rsv_list.next, + struct reserve_window, rsv_list); + while (rsv != fs_rsv_head) { + if (cur + size <= rsv->rsv_start) { + /* + * Found a reserveable space big enough. We could + * have a reservation across the group boundary here + */ + break; + } + if (cur <= rsv->rsv_end) + cur = rsv->rsv_end + 1; + + /* TODO? + * in the case we could not find a reservable space + * that is what is expected, during the re-search, we could + * remember what's the largest reservable space we could have + * and return that one. + * + * For now it will fail if we could not find the reservable + * space with expected-size (or more)... + */ + rsv = list_entry(rsv->rsv_list.next, + struct reserve_window, rsv_list); + if (cur > last_block) + return NULL; /* fail */ + } + /* + * we come here either : + * when we rearch to the end of the whole list, + * and there is empty reservable space after last entry in the list. + * append it to the end of the list. + * + * or we found one reservable space in the middle of the list, + * return the reservation window that we could append to. + * succeed. + */ + *start_block = cur; + return list_entry(rsv->rsv_list.prev, struct reserve_window, rsv_list); +} + +/** + * alloc_new_reservation()--allocate a new reservation window + * if there is an existing reservation, discard it first + * then allocate the new one from there + * otherwise allocate the new reservation from the given + * start block, or the beginning of the group, if a goal + * is not given. + * + * To make a new reservation, we search part of the filesystem + * reservation list(the list that inside the group). + * + * If we have a old reservation, the search goal is the end of + * last reservation. If we do not have a old reservatio, then we + * start from a given goal, or the first block of the group, if + * the goal is not given. + * + * We first find a reservable space after the goal, then from + * there,we check the bitmap for the first free block after + * it. If there is no free block until the end of group, then the + * whole group is full, we failed. Otherwise, check if the free + * block is inside the expected reservable space, if so, we + * succeed. + * If the first free block is outside the reseravle space, then + * start from the first free block, we search for next avalibale + * space, and go on. + * + * on succeed, a new reservation will be found and inserted into the list + * It contains at least one free block, and it is not overlap with other + * reservation window. + * + * failed: we failed to found a reservation window in this group + * + * @rsv: the reservation + * + * @goal: The goal. It is where the search for a + * free reservable space should start from. + * if we have a old reservation, start_block is the end of + * old reservation. Otherwise, + * if we have a goal(goal >0 ), then start from there, + * no goal(goal = -1), we start from the first block + * of the group. + * + * @sb: the super block + * @group: the group we are trying to do allocate in + * @bitmap_bh: the block group block bitmap + */ +static int alloc_new_reservation(struct reserve_window *my_rsv, + int goal, struct super_block *sb, + unsigned int group, struct buffer_head *bitmap_bh) +{ + struct reserve_window *search_head; + int group_first_block, group_end_block, start_block; + int first_free_block; + int reservable_space_start; + struct reserve_window *prev_rsv; + struct reserve_window *fs_rsv_head = &EXT3_SB(sb)->s_rsv_window_head; + unsigned long size; + + group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + + group * EXT3_BLOCKS_PER_GROUP(sb); + group_end_block = group_first_block + EXT3_BLOCKS_PER_GROUP(sb) - 1; + + if (goal < 0) + start_block = group_first_block; + else + start_block = goal + group_first_block; + + size = atomic_read(&my_rsv->rsv_goal_size); + /* if we have a old reservation, discard it first */ + if (!rsv_is_empty(my_rsv)) { + /* + * if the old reservation is cross group boundary + * we will come here when we just failed to allocate from + * the first part of the window. We still have another part + * that belongs to the next group. In this case, there is no + * point to discard our window and try to allocate a new one + * in this group(which will fail). we should + * keep the reservation window, just simply move on. + * + * Maybe we could shift the start block of the reservation + * window to the first block of next group. + */ + + if ((my_rsv->rsv_start <= group_end_block) && + (my_rsv->rsv_end > group_end_block)) + return -1; + + /* remember where we are before we discard the old one */ + if (my_rsv->rsv_end + 1 > start_block) + start_block = my_rsv->rsv_end + 1; + search_head = list_entry(my_rsv->rsv_list.prev, + struct reserve_window, rsv_list); + if ((my_rsv->rsv_alloc_hit > (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { + /* + * if we previously allocation hit ration is greater than half + * we double the size of reservation window next time + * otherwise keep the same + */ + size = size * 2; + atomic_set(&my_rsv->rsv_goal_size, size); + } + rsv_window_remove(my_rsv); + } + else { + /* + * we don't have a reservation, + * we set our goal(start_block) and + * the list head for the search + */ + search_head = fs_rsv_head; + } - BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block"); - fatal = ext3_journal_dirty_metadata(handle, bitmap_bh); + /* + * find_next_reservable_window() simply find a reservable window + * inside the given range(start_block, group_end_block). + * + * To make sure the reservation window has a free bit inside it, we + * need to check the bitmap after we found a reservable window. + */ +retry: + prev_rsv = find_next_reservable_window(search_head, fs_rsv_head, size, + &start_block, group_end_block); + if (prev_rsv == NULL) + goto failed; + reservable_space_start = start_block; + /* + * On success, find_next_reservable_window() returns the + * reservation window where there is a reservable space after it. + * Before we reserve this reservable space, we need + * to make sure there is at least a free block inside this region. + * + * searching the first free bit on the block bitmap and copy of + * last committed bitmap alternatively, until we found a allocatable + * block. Search start from the start block of the reservable space + * we just found. + */ + first_free_block = bitmap_search_next_usable_block( + reservable_space_start - group_first_block, + bitmap_bh, group_end_block - group_first_block + 1); + + if (first_free_block < 0) { + /* + * no free block left on the bitmap, no point + * to reserve the space. return failed. + */ + goto failed; + } + start_block = first_free_block + group_first_block; + /* + * check if the first free block is within the + * free space we just found + */ + if ((start_block >= reservable_space_start) && + (start_block < reservable_space_start + size)) + goto found_rsv_window; + /* + * if the first free bit we found is out of the reservable space + * this means there is no free block on the reservable space + * we should continue search for next reservable space, + * start from where the free block is, + * we also shift the list head to where we stopped last time + */ + search_head = prev_rsv; + goto retry; + +found_rsv_window: + /* + * great! the reservable space contains some free blocks. + * Insert it to the list. + */ + rsv_window_add(my_rsv, prev_rsv); + my_rsv->rsv_start = reservable_space_start; + my_rsv->rsv_end = my_rsv->rsv_start + size - 1; + return 0; /* succeed */ +failed: + return -1; /* failed */ +} + +/* + * This is the main function used to allocate a new block and its reservation + * window. + * + * Each time when a new block allocation is need, first try to allocate from + * its own reservation. If it does not have a reservation window, instead of + * looking for a free bit on bitmap first, then look up the reservation list to + * see if it is inside somebody else's reservation window, we try to allocate a + * reservation window for it start from the goal first. Then do the block + * allocation within the reservation window. + * + * This will aviod keep searching the reservation list again and again when + * someboday is looking for a free block(without reservation), and there are + * lots of free blocks, but they are all being reserved + * + * We use a sorted double linked list for the per-filesystem reservation list. + * The insert, remove and find a free space(non-reserved) operations for the + * sorted double linked list should be fast. + * + */ +static int +ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, + unsigned int group, struct buffer_head *bitmap_bh, + int goal, struct reserve_window * my_rsv, + int *errp) +{ + spinlock_t *rsv_lock; + unsigned long group_first_block; + int ret = 0; + int fatal; + int credits = 0; + + *errp = 0; + + /* + * Make sure we use undo access for the bitmap, because it is critical + * that we do the frozen_data COW on bitmap buffers in all cases even + * if the buffer is in BJ_Forget state in the committing transaction. + */ + BUFFER_TRACE(bitmap_bh, "get undo access for new block"); + fatal = ext3_journal_get_undo_access(handle, bitmap_bh, &credits); if (fatal) { *errp = fatal; - goto fail; + return -1; + } + + /* + * we don't deal with reservation when + * filesystem is mounted without reservation + * or the file is not a regular file + * of last attemp of allocating a block with reservation turn on failed + */ + if (my_rsv == NULL ) { + ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, NULL); + goto out; + } + rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; + /* + * goal is a group relative block number (if there is a goal) + * 0 < goal < EXT3_BLOCKS_PER_GROUP(sb) + * first block is a filesystem wide block number + * first block is the block number of the first block in this group + */ + group_first_block = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + + group * EXT3_BLOCKS_PER_GROUP(sb); + + /* + * Basically we will allocate a new block from inode's reservation + * window. + * + * We need to allocate a new reservation window, if: + * a) inode does not have a reservation window; or + * b) last attemp of allocating a block from existing reservation + * failed; or + * c) we come here with a goal and with a reservation window + * + * We do not need to allocate a new reservation window if we come here + * at the beginning with a goal and the goal is inside the window, or + * or we don't have a goal but already have a reservation window. + * then we could go to allocate from the reservation window directly. + */ + while (1) { + if (rsv_is_empty(my_rsv) || (ret < 0) || + !goal_in_my_reservation(my_rsv, goal, group, sb)) { + spin_lock(rsv_lock); + ret = alloc_new_reservation(my_rsv, goal, sb, + group, bitmap_bh); + spin_unlock(rsv_lock); + if (ret < 0) + break; /* failed */ + + if (!goal_in_my_reservation(my_rsv, goal, group, sb)) + goal = -1; + } + ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, goal, + my_rsv); + if (ret >= 0) + break; /* succeed */ + } +out: + if (ret >= 0) { + BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " + "bitmap block"); + fatal = ext3_journal_dirty_metadata(handle, bitmap_bh); + if (fatal) { + *errp = fatal; + return -1; + } + return ret; } - return goal; -fail_access: BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); ext3_journal_release_buffer(handle, bitmap_bh, credits); -fail: - return -1; + return ret; } /* @@ -473,16 +951,16 @@ fail: * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ -int -ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, - u32 *prealloc_count, u32 *prealloc_block, int *errp) -{ - struct buffer_head *bitmap_bh = NULL; /* bh */ - struct buffer_head *gdp_bh; /* bh2 */ - int group_no; /* i */ - int ret_block; /* j */ - int bgi; /* blockgroup iteration index */ - int target_block; /* tmp */ +int ext3_new_block(handle_t *handle, struct inode *inode, + unsigned long goal, int *errp) +{ + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *gdp_bh; + int group_no; + int goal_group; + int ret_block; + int bgi; /* blockgroup iteration index */ + int target_block; int fatal = 0, err; int performed_allocation = 0; int free_blocks, root_blocks; @@ -490,6 +968,7 @@ ext3_new_block(handle_t *handle, struct struct ext3_group_desc *gdp; struct ext3_super_block *es; struct ext3_sb_info *sbi; + struct reserve_window *my_rsv = NULL; #ifdef EXT3FS_DEBUG static int goal_hits, goal_attempts; #endif @@ -511,7 +990,10 @@ ext3_new_block(handle_t *handle, struct sbi = EXT3_SB(sb); es = EXT3_SB(sb)->s_es; ext3_debug("goal=%lu.\n", goal); - +#ifdef EXT3_RESERVATION + if (test_opt(sb, RESERVATION) && S_ISREG(inode->i_mode)) + my_rsv = &EXT3_I(inode)->i_rsv_window; +#endif free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); root_blocks = le32_to_cpu(es->s_r_blocks_count); if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && @@ -533,6 +1015,8 @@ ext3_new_block(handle_t *handle, struct if (!gdp) goto io_error; + goal_group = group_no; +retry: free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); if (free_blocks > 0) { ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % @@ -540,8 +1024,8 @@ ext3_new_block(handle_t *handle, struct bitmap_bh = read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; - ret_block = ext3_try_to_allocate(sb, handle, group_no, - bitmap_bh, ret_block, &fatal); + ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no, + bitmap_bh, ret_block, my_rsv, &fatal); if (fatal) goto out; if (ret_block >= 0) @@ -569,14 +1053,33 @@ ext3_new_block(handle_t *handle, struct bitmap_bh = read_block_bitmap(sb, group_no); if (!bitmap_bh) goto io_error; - ret_block = ext3_try_to_allocate(sb, handle, group_no, - bitmap_bh, -1, &fatal); + ret_block = ext3_try_to_allocate_with_rsv(sb, handle, group_no, + bitmap_bh, -1, my_rsv, &fatal); if (fatal) goto out; if (ret_block >= 0) goto allocated; } - +#ifdef EXT3_RESERVATION + /* + * We may end up a bogus ealier ENOSPC error due to + * filesystem is "full" of reservations, but + * there maybe indeed free blocks avaliable on disk + * In this case, we just forget about the reservations + * just do block allocation as without reservations. + */ + if (my_rsv) { +#ifdef EXT3_RESERVATION_DEBUG + printk("filesystem is fully reserved. Actual free blocks: %d. " + "Try to do allocation without reservation, goal_group " + "is %d\n", + free_blocks, goal_group); +#endif + my_rsv = NULL; + group_no = goal_group; + goto retry; + } +#endif /* No space left on the device */ *errp = -ENOSPC; goto out; --- linux-2.6.6-rc1/fs/ext3/file.c 2003-10-08 15:07:09.000000000 -0700 +++ 25/fs/ext3/file.c 2004-04-18 22:25:53.175746536 -0700 @@ -34,7 +34,7 @@ static int ext3_release_file (struct inode * inode, struct file * filp) { if (filp->f_mode & FMODE_WRITE) - ext3_discard_prealloc (inode); + ext3_discard_reservation(inode); if (is_dx(inode) && filp->private_data) ext3_htree_free_dir_info(filp->private_data); --- linux-2.6.6-rc1/fs/ext3/ialloc.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/ext3/ialloc.c 2004-04-18 22:25:53.618679200 -0700 @@ -581,10 +581,11 @@ got: ei->i_file_acl = 0; ei->i_dir_acl = 0; ei->i_dtime = 0; -#ifdef EXT3_PREALLOCATE - ei->i_prealloc_block = 0; - ei->i_prealloc_count = 0; -#endif + ei->i_rsv_window.rsv_start = 0; + ei->i_rsv_window.rsv_end = 0; + atomic_set(&ei->i_rsv_window.rsv_goal_size, EXT3_DEFAULT_RESERVE_BLOCKS); + ei->i_rsv_window.rsv_alloc_hit = 0; + INIT_LIST_HEAD(&ei->i_rsv_window.rsv_list); ei->i_block_group = group; ext3_set_inode_flags(inode); --- linux-2.6.6-rc1/fs/ext3/inode.c 2004-02-03 20:42:38.000000000 -0800 +++ 25/fs/ext3/inode.c 2004-04-18 22:25:57.348112240 -0700 @@ -186,7 +186,7 @@ static int ext3_journal_test_restart(han void ext3_put_inode(struct inode *inode) { if (!is_bad_inode(inode)) - ext3_discard_prealloc(inode); + ext3_discard_reservation(inode); } /* @@ -244,62 +244,12 @@ no_delete: clear_inode(inode); /* We must guarantee clearing of inode... */ } -void ext3_discard_prealloc (struct inode * inode) -{ -#ifdef EXT3_PREALLOCATE - struct ext3_inode_info *ei = EXT3_I(inode); - /* Writer: ->i_prealloc* */ - if (ei->i_prealloc_count) { - unsigned short total = ei->i_prealloc_count; - unsigned long block = ei->i_prealloc_block; - ei->i_prealloc_count = 0; - ei->i_prealloc_block = 0; - /* Writer: end */ - ext3_free_blocks (inode, block, total); - } -#endif -} - static int ext3_alloc_block (handle_t *handle, struct inode * inode, unsigned long goal, int *err) { unsigned long result; -#ifdef EXT3_PREALLOCATE -#ifdef EXT3FS_DEBUG - static unsigned long alloc_hits, alloc_attempts; -#endif - struct ext3_inode_info *ei = EXT3_I(inode); - /* Writer: ->i_prealloc* */ - if (ei->i_prealloc_count && - (goal == ei->i_prealloc_block || - goal + 1 == ei->i_prealloc_block)) - { - result = ei->i_prealloc_block++; - ei->i_prealloc_count--; - /* Writer: end */ - ext3_debug ("preallocation hit (%lu/%lu).\n", - ++alloc_hits, ++alloc_attempts); - } else { - ext3_discard_prealloc (inode); - ext3_debug ("preallocation miss (%lu/%lu).\n", - alloc_hits, ++alloc_attempts); - if (S_ISREG(inode->i_mode)) - result = ext3_new_block (inode, goal, - &ei->i_prealloc_count, - &ei->i_prealloc_block, err); - else - result = ext3_new_block (inode, goal, 0, 0, err); - /* - * AKPM: this is somewhat sticky. I'm not surprised it was - * disabled in 2.2's ext3. Need to integrate b_committed_data - * guarding with preallocation, if indeed preallocation is - * effective. - */ - } -#else - result = ext3_new_block (handle, inode, goal, 0, 0, err); -#endif + result = ext3_new_block (handle, inode, goal, err); return result; } @@ -966,38 +916,6 @@ struct buffer_head *ext3_bread(handle_t bh = ext3_getblk (handle, inode, block, create, err); if (!bh) return bh; -#ifdef EXT3_PREALLOCATE - /* - * If the inode has grown, and this is a directory, then use a few - * more of the preallocated blocks to keep directory fragmentation - * down. The preallocated blocks are guaranteed to be contiguous. - */ - if (create && - S_ISDIR(inode->i_mode) && - inode->i_blocks > prev_blocks && - EXT3_HAS_COMPAT_FEATURE(inode->i_sb, - EXT3_FEATURE_COMPAT_DIR_PREALLOC)) { - int i; - struct buffer_head *tmp_bh; - - for (i = 1; - EXT3_I(inode)->i_prealloc_count && - i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks; - i++) { - /* - * ext3_getblk will zero out the contents of the - * directory for us - */ - tmp_bh = ext3_getblk(handle, inode, - block+i, create, err); - if (!tmp_bh) { - brelse (bh); - return 0; - } - brelse (tmp_bh); - } - } -#endif if (buffer_uptodate(bh)) return bh; ll_rw_block (READ, 1, &bh); @@ -1358,8 +1276,6 @@ static int ext3_ordered_writepage(struct } if (!page_has_buffers(page)) { - if (!PageUptodate(page)) - buffer_error(); create_empty_buffers(page, inode->i_sb->s_blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); } @@ -1527,7 +1443,7 @@ static int ext3_releasepage(struct page * If the O_DIRECT write is intantiating holes inside i_size and the machine * crashes then stale disk data _may_ be exposed inside the file. */ -static int ext3_direct_IO(int rw, struct kiocb *iocb, +static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { @@ -1535,7 +1451,7 @@ static int ext3_direct_IO(int rw, struct struct inode *inode = file->f_mapping->host; struct ext3_inode_info *ei = EXT3_I(inode); handle_t *handle = NULL; - int ret; + ssize_t ret; int orphan = 0; size_t count = iov_length(iov, nr_segs); @@ -2138,7 +2054,7 @@ void ext3_truncate(struct inode * inode) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; - ext3_discard_prealloc(inode); + ext3_discard_reservation(inode); /* * We have to lock the EOF page here, because lock_page() nests @@ -2531,11 +2447,11 @@ void ext3_read_inode(struct inode * inod } ei->i_disksize = inode->i_size; inode->i_generation = le32_to_cpu(raw_inode->i_generation); -#ifdef EXT3_PREALLOCATE - ei->i_prealloc_count = 0; -#endif ei->i_block_group = iloc.block_group; - + ei->i_rsv_window.rsv_start = 0; + ei->i_rsv_window.rsv_end= 0; + atomic_set(&ei->i_rsv_window.rsv_goal_size, EXT3_DEFAULT_RESERVE_BLOCKS); + INIT_LIST_HEAD(&ei->i_rsv_window.rsv_list); /* * NOTE! The in-memory inode i_data array is in little-endian order * even on big-endian machines: we do NOT byteswap the block numbers! @@ -2772,9 +2688,28 @@ int ext3_setattr(struct dentry *dentry, if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + handle_t *handle; + + /* (user+group)*(old+new) structure, inode write (sb, + * inode block, ? - but truncate inode update has it) */ + handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto err_out; + } error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; - if (error) + if (error) { + ext3_journal_stop(handle); return error; + } + /* Update corresponding info in inode so that everything is in + * one transaction */ + if (attr->ia_valid & ATTR_UID) + inode->i_uid = attr->ia_uid; + if (attr->ia_valid & ATTR_GID) + inode->i_gid = attr->ia_gid; + error = ext3_mark_inode_dirty(handle, inode); + ext3_journal_stop(handle); } if (S_ISREG(inode->i_mode) && @@ -2853,7 +2788,9 @@ int ext3_writepage_trans_blocks(struct i ret = 2 * (bpp + indirects) + 2; #ifdef CONFIG_QUOTA - ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; + /* We know that structure was already allocated during DQUOT_INIT so + * we will be updating only the data blocks + inodes */ + ret += 2*EXT3_QUOTA_TRANS_BLOCKS; #endif return ret; --- linux-2.6.6-rc1/fs/ext3/ioctl.c 2003-06-22 12:04:44.000000000 -0700 +++ 25/fs/ext3/ioctl.c 2004-04-18 22:25:53.418709600 -0700 @@ -20,6 +20,7 @@ int ext3_ioctl (struct inode * inode, st { struct ext3_inode_info *ei = EXT3_I(inode); unsigned int flags; + unsigned short rsv_window_size; ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); @@ -151,6 +152,25 @@ flags_err: return ret; } #endif +#ifdef EXT3_RESERVATION + case EXT3_IOC_GETRSVSZ: + rsv_window_size = atomic_read(&ei->i_rsv_window.rsv_goal_size); + return put_user(rsv_window_size, (int *)arg); + case EXT3_IOC_SETRSVSZ: + if (IS_RDONLY(inode)) + return -EROFS; + + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EACCES; + + if (get_user(rsv_window_size, (int *)arg)) + return -EFAULT; + + if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) + rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; + atomic_set(&ei->i_rsv_window.rsv_goal_size, rsv_window_size); + return 0; +#endif default: return -ENOTTY; } --- linux-2.6.6-rc1/fs/ext3/namei.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/ext3/namei.c 2004-04-18 22:25:24.923041600 -0700 @@ -1631,7 +1631,8 @@ static int ext3_create (struct inode * d int err; handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2*EXT3_QUOTA_INIT_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1661,7 +1662,8 @@ static int ext3_mknod (struct inode * di return -EINVAL; handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2*EXT3_QUOTA_INIT_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1693,7 +1695,8 @@ static int ext3_mkdir(struct inode * dir return -EMLINK; handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + + 2*EXT3_QUOTA_INIT_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1972,6 +1975,9 @@ static int ext3_rmdir (struct inode * di struct ext3_dir_entry_2 * de; handle_t *handle; + /* Initialize quotas before so that eventual writes go in + * separate transaction */ + DQUOT_INIT(dentry->d_inode); handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -1985,7 +1991,6 @@ static int ext3_rmdir (struct inode * di handle->h_sync = 1; inode = dentry->d_inode; - DQUOT_INIT(inode); retval = -EIO; if (le32_to_cpu(de->inode) != inode->i_ino) @@ -2029,6 +2034,9 @@ static int ext3_unlink(struct inode * di struct ext3_dir_entry_2 * de; handle_t *handle; + /* Initialize quotas before so that eventual writes go + * in separate transaction */ + DQUOT_INIT(dentry->d_inode); handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2042,7 +2050,6 @@ static int ext3_unlink(struct inode * di goto end_unlink; inode = dentry->d_inode; - DQUOT_INIT(inode); retval = -EIO; if (le32_to_cpu(de->inode) != inode->i_ino) @@ -2085,7 +2092,8 @@ static int ext3_symlink (struct inode * return -ENAMETOOLONG; handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + + 2*EXT3_QUOTA_INIT_BLOCKS); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2170,6 +2178,10 @@ static int ext3_rename (struct inode * o old_bh = new_bh = dir_bh = NULL; + /* Initialize quotas before so that eventual writes go + * in separate transaction */ + if (new_dentry->d_inode) + DQUOT_INIT(new_dentry->d_inode); handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); if (IS_ERR(handle)) @@ -2196,8 +2208,6 @@ static int ext3_rename (struct inode * o if (!new_inode) { brelse (new_bh); new_bh = NULL; - } else { - DQUOT_INIT(new_inode); } } if (S_ISDIR(old_inode->i_mode)) { --- linux-2.6.6-rc1/fs/ext3/super.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/ext3/super.c 2004-04-18 22:25:53.838645760 -0700 @@ -32,6 +32,9 @@ #include #include #include +#include +#include +#include #include #include "xattr.h" #include "acl.h" @@ -504,7 +507,43 @@ static void ext3_clear_inode(struct inod # define ext3_clear_inode NULL #endif -static struct dquot_operations ext3_qops; +#ifdef CONFIG_QUOTA + +#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") +#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) + +static int ext3_dquot_initialize(struct inode *inode, int type); +static int ext3_dquot_drop(struct inode *inode); +static int ext3_write_dquot(struct dquot *dquot); +static int ext3_mark_dquot_dirty(struct dquot *dquot); +static int ext3_write_info(struct super_block *sb, int type); +static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path); +static int ext3_quota_on_mount(struct super_block *sb, int type); +static int ext3_quota_off_mount(struct super_block *sb, int type); + +static struct dquot_operations ext3_quota_operations = { + .initialize = ext3_dquot_initialize, + .drop = ext3_dquot_drop, + .alloc_space = dquot_alloc_space, + .alloc_inode = dquot_alloc_inode, + .free_space = dquot_free_space, + .free_inode = dquot_free_inode, + .transfer = dquot_transfer, + .write_dquot = ext3_write_dquot, + .mark_dirty = ext3_mark_dquot_dirty, + .write_info = ext3_write_info +}; + +static struct quotactl_ops ext3_qctl_operations = { + .quota_on = ext3_quota_on, + .quota_off = vfs_quota_off, + .quota_sync = vfs_quota_sync, + .get_info = vfs_get_dqinfo, + .set_info = vfs_set_dqinfo, + .get_dqblk = vfs_get_dqblk, + .set_dqblk = vfs_set_dqblk +}; +#endif static struct super_operations ext3_sops = { .alloc_inode = ext3_alloc_inode, @@ -533,9 +572,12 @@ enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload, + Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, + Opt_reservation, Opt_noreservation, Opt_noload, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_ignore, Opt_err, }; @@ -563,6 +605,8 @@ static match_table_t tokens = { {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, + {Opt_reservation, "reservation"}, + {Opt_noreservation, "noreservation"}, {Opt_noload, "noload"}, {Opt_commit, "commit=%u"}, {Opt_journal_update, "journal=update"}, @@ -571,6 +615,12 @@ static match_table_t tokens = { {Opt_data_journal, "data=journal"}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, + {Opt_offusrjquota, "usrjquota="}, + {Opt_usrjquota, "usrjquota=%s"}, + {Opt_offgrpjquota, "grpjquota="}, + {Opt_grpjquota, "grpjquota=%s"}, + {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, + {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, {Opt_ignore, "grpquota"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, @@ -598,13 +648,17 @@ static unsigned long get_sb_block(void * return sb_block; } -static int parse_options (char * options, struct ext3_sb_info *sbi, +static int parse_options (char * options, struct super_block *sb, unsigned long * inum, int is_remount) { + struct ext3_sb_info *sbi = EXT3_SB(sb); char * p; substring_t args[MAX_OPT_ARGS]; int data_opt = 0; int option; +#ifdef CONFIG_QUOTA + int qtype; +#endif if (!options) return 1; @@ -706,6 +760,19 @@ static int parse_options (char * options printk("EXT3 (no)acl options not supported\n"); break; #endif +#ifdef EXT3_RESERVATION + case Opt_reservation: + set_opt(sbi->s_mount_opt, RESERVATION); + break; + case Opt_noreservation: + clear_opt(sbi->s_mount_opt, RESERVATION); + break; +#else + case Opt_reservation: + case Opt_noreservation: + printk("EXT3 block reservation options not supported\n"); + break; +#endif case Opt_journal_update: /* @@@ FIXME */ /* Eventually we will want to be able to create @@ -763,6 +830,76 @@ static int parse_options (char * options sbi->s_mount_opt |= data_opt; } break; +#ifdef CONFIG_QUOTA + case Opt_usrjquota: + qtype = USRQUOTA; + goto set_qf_name; + case Opt_grpjquota: + qtype = GRPQUOTA; +set_qf_name: + if (sb_any_quota_enabled(sb)) { + printk(KERN_ERR + "EXT3-fs: Cannot change journalled " + "quota options when quota turned on.\n"); + return 0; + } + if (sbi->s_qf_names[qtype]) { + printk(KERN_ERR + "EXT3-fs: %s quota file already " + "specified.\n", QTYPE2NAME(qtype)); + return 0; + } + sbi->s_qf_names[qtype] = match_strdup(&args[0]); + if (!sbi->s_qf_names[qtype]) { + printk(KERN_ERR + "EXT3-fs: not enough memory for " + "storing quotafile name.\n"); + return 0; + } + if (strchr(sbi->s_qf_names[qtype], '/')) { + printk(KERN_ERR + "EXT3-fs: quotafile must be on " + "filesystem root.\n"); + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + return 0; + } + break; + case Opt_offusrjquota: + qtype = USRQUOTA; + goto clear_qf_name; + case Opt_offgrpjquota: + qtype = GRPQUOTA; +clear_qf_name: + if (sb_any_quota_enabled(sb)) { + printk(KERN_ERR "EXT3-fs: Cannot change " + "journalled quota options when " + "quota turned on.\n"); + return 0; + } + if (sbi->s_qf_names[qtype]) { + kfree(sbi->s_qf_names[qtype]); + sbi->s_qf_names[qtype] = NULL; + } + break; + case Opt_jqfmt_vfsold: + sbi->s_jquota_fmt = QFMT_VFS_OLD; + break; + case Opt_jqfmt_vfsv0: + sbi->s_jquota_fmt = QFMT_VFS_V0; + break; +#else + case Opt_usrjquota: + case Opt_grpjquota: + case Opt_offusrjquota: + case Opt_offgrpjquota: + case Opt_jqfmt_vfsold: + case Opt_jqfmt_vfsv0: + printk(KERN_ERR + "EXT3-fs: journalled quota options not " + "supported.\n"); + break; +#endif case Opt_abort: set_opt(sbi->s_mount_opt, ABORT); break; @@ -775,6 +912,13 @@ static int parse_options (char * options return 0; } } +#ifdef CONFIG_QUOTA + if (!sbi->s_jquota_fmt && (sbi->s_qf_names[0] || sbi->s_qf_names[1])) { + printk(KERN_ERR + "EXT3-fs: journalled quota format not specified.\n"); + return 0; + } +#endif return 1; } @@ -934,6 +1078,9 @@ static void ext3_orphan_cleanup (struct { unsigned int s_flags = sb->s_flags; int nr_orphans = 0, nr_truncates = 0; +#ifdef CONFIG_QUOTA + int i; +#endif if (!es->s_last_orphan) { jbd_debug(4, "no orphan inodes to clean up\n"); return; @@ -953,6 +1100,20 @@ static void ext3_orphan_cleanup (struct sb->s_id); sb->s_flags &= ~MS_RDONLY; } +#ifdef CONFIG_QUOTA + /* Needed for iput() to work correctly and not trash data */ + sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ + for (i = 0; i < MAXQUOTAS; i++) { + if (EXT3_SB(sb)->s_qf_names[i]) { + int ret = ext3_quota_on_mount(sb, i); + if (ret < 0) + printk(KERN_ERR + "EXT3-fs: Cannot turn on journalled " + "quota: error %d\n", ret); + } + } +#endif while (es->s_last_orphan) { struct inode *inode; @@ -964,6 +1125,7 @@ static void ext3_orphan_cleanup (struct } list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); + DQUOT_INIT(inode); if (inode->i_nlink) { printk(KERN_DEBUG "%s: truncating inode %ld to %Ld bytes\n", @@ -991,6 +1153,13 @@ static void ext3_orphan_cleanup (struct if (nr_truncates) printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n", sb->s_id, PLURAL(nr_truncates)); +#ifdef CONFIG_QUOTA + /* Turn quotas off */ + for (i = 0; i < MAXQUOTAS; i++) { + if (sb_dqopt(sb)->files[i]) + ext3_quota_off_mount(sb, i); + } +#endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ } @@ -1124,7 +1293,9 @@ static int ext3_fill_super (struct super sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid); - if (!parse_options ((char *) data, sbi, &journal_inum, 0)) + set_opt(sbi->s_mount_opt, RESERVATION); + + if (!parse_options ((char *) data, sb, &journal_inum, 0)) goto failed_mount; sb->s_flags |= MS_ONE_SECOND; @@ -1298,12 +1469,23 @@ static int ext3_fill_super (struct super sbi->s_gdb_count = db_count; get_random_bytes(&sbi->s_next_generation, sizeof(u32)); spin_lock_init(&sbi->s_next_gen_lock); + /* per fileystem reservation list head & lock */ + spin_lock_init(&sbi->s_rsv_window_lock); + INIT_LIST_HEAD(&sbi->s_rsv_window_head.rsv_list); + sbi->s_rsv_window_head.rsv_start = 0; + sbi->s_rsv_window_head.rsv_end = 0; + sbi->s_rsv_window_head.rsv_alloc_hit = 0; + atomic_set(&sbi->s_rsv_window_head.rsv_goal_size, 0); + /* * set up enough so that it can read an inode */ sb->s_op = &ext3_sops; sb->s_export_op = &ext3_export_ops; - sb->dq_op = &ext3_qops; +#ifdef CONFIG_QUOTA + sb->s_qcop = &ext3_qctl_operations; + sb->dq_op = &ext3_quota_operations; +#endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ sb->s_root = 0; @@ -1413,6 +1595,12 @@ failed_mount2: brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); failed_mount: +#ifdef CONFIG_QUOTA + for (i = 0; i < MAXQUOTAS; i++) { + if (sbi->s_qf_names[i]) + kfree(sbi->s_qf_names[i]); + } +#endif ext3_blkdev_remove(sbi); brelse(bh); out_fail: @@ -1839,7 +2027,7 @@ int ext3_remount (struct super_block * s /* * Allow the "check" option to be passed as a remount option. */ - if (!parse_options(data, sbi, &tmp, 1)) + if (!parse_options(data, sb, &tmp, 1)) return -EINVAL; if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) @@ -1959,70 +2147,152 @@ int ext3_statfs (struct super_block * sb #ifdef CONFIG_QUOTA -/* Blocks: (2 data blocks) * (3 indirect + 1 descriptor + 1 bitmap) + superblock */ -#define EXT3_OLD_QFMT_BLOCKS 11 -/* Blocks: quota info + (4 pointer blocks + 1 entry block) * (3 indirect + 1 descriptor + 1 bitmap) + superblock */ -#define EXT3_V0_QFMT_BLOCKS 27 - -static int (*old_write_dquot)(struct dquot *dquot); -static void (*old_drop_dquot)(struct inode *inode); - -static int fmt_to_blocks(int fmt) -{ - switch (fmt) { - case QFMT_VFS_OLD: - return EXT3_OLD_QFMT_BLOCKS; - case QFMT_VFS_V0: - return EXT3_V0_QFMT_BLOCKS; - } - return EXT3_MAX_TRANS_DATA; +static inline struct inode *dquot_to_inode(struct dquot *dquot) +{ + return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]->f_dentry->d_inode; } -static int ext3_write_dquot(struct dquot *dquot) +static int ext3_dquot_initialize(struct inode *inode, int type) { - int nblocks; - int ret; - int err; handle_t *handle; - struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); - struct inode *qinode; + int ret, err; - nblocks = fmt_to_blocks(dqopt->info[dquot->dq_type].dqi_format->qf_fmt_id); - qinode = dqopt->files[dquot->dq_type]->f_dentry->d_inode; - handle = ext3_journal_start(qinode, nblocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - ret = old_write_dquot(dquot); + /* We may create quota structure so we need to reserve enough blocks */ + handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_initialize(inode, type); err = ext3_journal_stop(handle); - if (ret == 0) + if (!ret) ret = err; -out: return ret; } -static void ext3_drop_dquot(struct inode *inode) +static int ext3_dquot_drop(struct inode *inode) { - int nblocks, type; - struct quota_info *dqopt = sb_dqopt(inode->i_sb); handle_t *handle; + int ret, err; - for (type = 0; type < MAXQUOTAS; type++) { - if (sb_has_quota_enabled(inode->i_sb, type)) - break; - } - if (type < MAXQUOTAS) - nblocks = fmt_to_blocks(dqopt->info[type].dqi_format->qf_fmt_id); + /* We may delete quota structure so we need to reserve enough blocks */ + handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_drop(inode); + err = ext3_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +static int ext3_write_dquot(struct dquot *dquot) +{ + int ret, err; + handle_t *handle; + + handle = ext3_journal_start(dquot_to_inode(dquot), + EXT3_QUOTA_TRANS_BLOCKS); + if (IS_ERR(handle)) + return PTR_ERR(handle); + ret = dquot_commit(dquot); + err = ext3_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +static int ext3_mark_dquot_dirty(struct dquot * dquot) +{ + /* Are we journalling quotas? */ + if (EXT3_SB(dquot->dq_sb)->s_qf_names[0] || + EXT3_SB(dquot->dq_sb)->s_qf_names[1]) + return ext3_write_dquot(dquot); else - nblocks = 0; /* No quota => no drop */ - handle = ext3_journal_start(inode, 2*nblocks); + return dquot_mark_dquot_dirty(dquot); +} + +static int ext3_write_info(struct super_block *sb, int type) +{ + int ret, err; + handle_t *handle; + + /* Data block + inode block */ + handle = ext3_journal_start(sb->s_root->d_inode, 2); if (IS_ERR(handle)) - return; - old_drop_dquot(inode); - ext3_journal_stop(handle); - return; + return PTR_ERR(handle); + ret = dquot_commit_info(sb, type); + err = ext3_journal_stop(handle); + if (!ret) + ret = err; + return ret; +} + +/* + * Turn on quotas during mount time - we need to find + * the quota file and such... + */ +static int ext3_quota_on_mount(struct super_block *sb, int type) +{ + int err; + struct dentry *dentry; + struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type], + .hash = 0, + .len = strlen(EXT3_SB(sb)->s_qf_names[type])}; + + dentry = lookup_hash(&name, sb->s_root); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry); + if (err) + dput(dentry); + /* We keep the dentry reference if everything went ok - we drop it + * on quota_off time */ + return err; +} + +/* Turn quotas off during mount time */ +static int ext3_quota_off_mount(struct super_block *sb, int type) +{ + int err; + struct dentry *dentry; + + dentry = sb_dqopt(sb)->files[type]->f_dentry; + err = vfs_quota_off_mount(sb, type); + /* We invalidate dentry - it has at least wrong hash... */ + d_invalidate(dentry); + dput(dentry); + return err; } + +/* + * Standard function to be called on quota_on + */ +static int ext3_quota_on(struct super_block *sb, int type, int format_id, + char *path) +{ + int err; + struct nameidata nd; + + /* Not journalling quota? */ + if (!EXT3_SB(sb)->s_qf_names[0] && !EXT3_SB(sb)->s_qf_names[1]) + return vfs_quota_on(sb, type, format_id, path); + err = path_lookup(path, LOOKUP_FOLLOW, &nd); + if (err) + return err; + /* Quotafile not on the same filesystem? */ + if (nd.mnt->mnt_sb != sb) + return -EXDEV; + /* Quotafile not of fs root? */ + if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode) + printk(KERN_WARNING + "EXT3-fs: Quota file not on filesystem root. " + "Journalled quota will not work.\n"); + if (!ext3_should_journal_data(nd.dentry->d_inode)) + printk(KERN_WARNING "EXT3-fs: Quota file does not have " + "data-journalling. Journalled quota will not work.\n"); + path_release(&nd); + return vfs_quota_on(sb, type, format_id, path); +} + #endif static struct super_block *ext3_get_sb(struct file_system_type *fs_type, @@ -2047,13 +2317,6 @@ static int __init init_ext3_fs(void) err = init_inodecache(); if (err) goto out1; -#ifdef CONFIG_QUOTA - init_dquot_operations(&ext3_qops); - old_write_dquot = ext3_qops.write_dquot; - old_drop_dquot = ext3_qops.drop; - ext3_qops.write_dquot = ext3_write_dquot; - ext3_qops.drop = ext3_drop_dquot; -#endif err = register_filesystem(&ext3_fs_type); if (err) goto out; --- linux-2.6.6-rc1/fs/ext3/xattr.c 2004-02-17 20:48:45.000000000 -0800 +++ 25/fs/ext3/xattr.c 2004-04-18 22:25:52.937782712 -0700 @@ -787,7 +787,7 @@ ext3_xattr_set_handle2(handle_t *handle, EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); int block = ext3_new_block(handle, - inode, goal, 0, 0, &error); + inode, goal, &error); if (error) goto cleanup; ea_idebug(inode, "creating block %d", block); --- linux-2.6.6-rc1/fs/fat/inode.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/fat/inode.c 2004-04-18 22:25:24.928040840 -0700 @@ -734,6 +734,12 @@ void __exit fat_destroy_inodecache(void) printk(KERN_INFO "fat_inode_cache: not all structures were freed\n"); } +static int fat_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_NODIRATIME; + return 0; +} + static struct super_operations fat_sops = { .alloc_inode = fat_alloc_inode, .destroy_inode = fat_destroy_inode, @@ -742,6 +748,7 @@ static struct super_operations fat_sops .put_super = fat_put_super, .statfs = fat_statfs, .clear_inode = fat_clear_inode, + .remount_fs = fat_remount, .read_inode = make_bad_inode, --- linux-2.6.6-rc1/fs/fcntl.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/fcntl.c 2004-04-18 22:25:32.692860408 -0700 @@ -627,15 +627,12 @@ void kill_fasync(struct fasync_struct ** read_unlock(&fasync_lock); } } - EXPORT_SYMBOL(kill_fasync); static int __init fasync_init(void) { fasync_cache = kmem_cache_create("fasync_cache", - sizeof(struct fasync_struct), 0, 0, NULL, NULL); - if (!fasync_cache) - panic("cannot create fasync slab cache"); + sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL); return 0; } --- linux-2.6.6-rc1/fs/freevxfs/vxfs_super.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/freevxfs/vxfs_super.c 2004-04-18 22:25:24.928040840 -0700 @@ -56,12 +56,14 @@ MODULE_ALIAS("vxfs"); /* makes mount -t static void vxfs_put_super(struct super_block *); static int vxfs_statfs(struct super_block *, struct kstatfs *); +static int vxfs_remount(struct super_block *, int *, char *); static struct super_operations vxfs_super_ops = { .read_inode = vxfs_read_inode, .put_inode = vxfs_put_inode, .put_super = vxfs_put_super, .statfs = vxfs_statfs, + .remount_fs = vxfs_remount, }; /** @@ -121,6 +123,12 @@ vxfs_statfs(struct super_block *sbp, str return 0; } +static int vxfs_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_RDONLY; + return 0; +} + /** * vxfs_read_super - read superblock into memory and initalize filesystem * @sbp: VFS superblock (to fill) --- linux-2.6.6-rc1/fs/hfs/inode.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/hfs/inode.c 2004-04-18 22:25:57.348112240 -0700 @@ -114,8 +114,8 @@ static int hfs_get_blocks(struct inode * return ret; } -static int hfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode->i_mapping->host; --- linux-2.6.6-rc1/fs/hfsplus/inode.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/hfsplus/inode.c 2004-04-18 22:25:57.349112088 -0700 @@ -114,8 +114,8 @@ static int hfsplus_get_blocks(struct ino return ret; } -static int hfsplus_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode->i_mapping->host; --- linux-2.6.6-rc1/fs/hfs/super.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/hfs/super.c 2004-04-18 22:25:24.929040688 -0700 @@ -94,6 +94,7 @@ static int hfs_statfs(struct super_block int hfs_remount(struct super_block *sb, int *flags, char *data) { + *flags |= MS_NODIRATIME; if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; if (!(*flags & MS_RDONLY)) { --- linux-2.6.6-rc1/fs/hugetlbfs/inode.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/hugetlbfs/inode.c 2004-04-18 22:25:50.165204208 -0700 @@ -194,6 +194,7 @@ static void hugetlbfs_delete_inode(struc hlist_del_init(&inode->i_hash); list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); @@ -236,6 +237,7 @@ static void hugetlbfs_forget_inode(struc hlist_del_init(&inode->i_hash); out_truncate: list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); inode->i_state |= I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); @@ -375,6 +377,7 @@ static struct inode *hugetlbfs_get_inode inode = new_inode(sb); if (inode) { + struct hugetlbfs_inode_info *info; inode->i_mode = mode; inode->i_uid = uid; inode->i_gid = gid; @@ -383,6 +386,8 @@ static struct inode *hugetlbfs_get_inode inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + info = HUGETLBFS_I(inode); + mpol_shared_policy_init(&info->policy); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); @@ -510,6 +515,33 @@ static void hugetlbfs_put_super(struct s } } +static kmem_cache_t *hugetlbfs_inode_cachep; + +static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) +{ + struct hugetlbfs_inode_info *p; + + p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); + if (!p) + return NULL; + return &p->vfs_inode; +} + +static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +{ + struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + inode_init_once(&ei->vfs_inode); +} + +static void hugetlbfs_destroy_inode(struct inode *inode) +{ + mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); + kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); +} + static struct address_space_operations hugetlbfs_aops = { .readpage = hugetlbfs_readpage, .prepare_write = hugetlbfs_prepare_write, @@ -541,6 +573,8 @@ static struct inode_operations hugetlbfs }; static struct super_operations hugetlbfs_ops = { + .alloc_inode = hugetlbfs_alloc_inode, + .destroy_inode = hugetlbfs_destroy_inode, .statfs = hugetlbfs_statfs, .drop_inode = hugetlbfs_drop_inode, .put_super = hugetlbfs_put_super, @@ -755,9 +789,16 @@ static int __init init_hugetlbfs_fs(void int error; struct vfsmount *vfsmount; + hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", + sizeof(struct hugetlbfs_inode_info), + 0, SLAB_RECLAIM_ACCOUNT, + init_once, NULL); + if (hugetlbfs_inode_cachep == NULL) + return -ENOMEM; + error = register_filesystem(&hugetlbfs_fs_type); if (error) - return error; + goto out; vfsmount = kern_mount(&hugetlbfs_fs_type); @@ -767,11 +808,16 @@ static int __init init_hugetlbfs_fs(void } error = PTR_ERR(vfsmount); + + out: + if (error) + kmem_cache_destroy(hugetlbfs_inode_cachep); return error; } static void __exit exit_hugetlbfs_fs(void) { + kmem_cache_destroy(hugetlbfs_inode_cachep); unregister_filesystem(&hugetlbfs_fs_type); } --- linux-2.6.6-rc1/fs/inode.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/inode.c 2004-04-18 22:25:34.595571152 -0700 @@ -282,7 +282,7 @@ static void dispose_list(struct list_hea /* * Invalidate all inodes for a device. */ -static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) +static int invalidate_list(struct list_head *head, struct list_head *dispose) { struct list_head *next; int busy = 0, count = 0; @@ -295,12 +295,11 @@ static int invalidate_list(struct list_h next = next->next; if (tmp == head) break; - inode = list_entry(tmp, struct inode, i_list); - if (inode->i_sb != sb) - continue; + inode = list_entry(tmp, struct inode, i_sb_list); invalidate_inode_buffers(inode); if (!atomic_read(&inode->i_count)) { hlist_del_init(&inode->i_hash); + list_del(&inode->i_sb_list); list_move(&inode->i_list, dispose); inode->i_state |= I_FREEING; count++; @@ -336,10 +335,7 @@ int invalidate_inodes(struct super_block down(&iprune_sem); spin_lock(&inode_lock); - busy = invalidate_list(&inode_in_use, sb, &throw_away); - busy |= invalidate_list(&inode_unused, sb, &throw_away); - busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); - busy |= invalidate_list(&sb->s_io, sb, &throw_away); + busy = invalidate_list(&sb->s_inodes, &throw_away); spin_unlock(&inode_lock); dispose_list(&throw_away); @@ -439,6 +435,7 @@ static void prune_icache(int nr_to_scan) continue; } hlist_del_init(&inode->i_hash); + list_del_init(&inode->i_sb_list); list_move(&inode->i_list, &freeable); inode->i_state |= I_FREEING; nr_pruned++; @@ -549,6 +546,7 @@ struct inode *new_inode(struct super_blo spin_lock(&inode_lock); inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); @@ -597,6 +595,7 @@ static struct inode * get_new_inode(stru inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); hlist_add_head(&inode->i_hash, head); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -645,6 +644,7 @@ static struct inode * get_new_inode_fast inode->i_ino = ino; inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); hlist_add_head(&inode->i_hash, head); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -980,6 +980,7 @@ void generic_delete_inode(struct inode * struct super_operations *op = inode->i_sb->s_op; list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); inode->i_state|=I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); @@ -1025,6 +1026,7 @@ static void generic_forget_inode(struct hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); + list_del_init(&inode->i_sb_list); inode->i_state|=I_FREEING; inodes_stat.nr_inodes--; spin_unlock(&inode_lock); @@ -1213,36 +1215,23 @@ EXPORT_SYMBOL(inode_needs_sync); /* Function back in dquot.c */ int remove_inode_dquot_ref(struct inode *, int, struct list_head *); -void remove_dquot_ref(struct super_block *sb, int type, struct list_head *tofree_head) +void remove_dquot_ref(struct super_block *sb, int type, + struct list_head *tofree_head) { struct inode *inode; - struct list_head *act_head; if (!sb->dq_op) return; /* nothing to do */ spin_lock(&inode_lock); /* This lock is for inodes code */ - /* We don't have to lock against quota code - test IS_QUOTAINIT is just for speedup... */ - - list_for_each(act_head, &inode_in_use) { - inode = list_entry(act_head, struct inode, i_list); - if (inode->i_sb == sb && IS_QUOTAINIT(inode)) - remove_inode_dquot_ref(inode, type, tofree_head); - } - list_for_each(act_head, &inode_unused) { - inode = list_entry(act_head, struct inode, i_list); - if (inode->i_sb == sb && IS_QUOTAINIT(inode)) - remove_inode_dquot_ref(inode, type, tofree_head); - } - list_for_each(act_head, &sb->s_dirty) { - inode = list_entry(act_head, struct inode, i_list); - if (IS_QUOTAINIT(inode)) - remove_inode_dquot_ref(inode, type, tofree_head); - } - list_for_each(act_head, &sb->s_io) { - inode = list_entry(act_head, struct inode, i_list); + /* + * We don't have to lock against quota code - test IS_QUOTAINIT is + * just for speedup... + */ + + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) if (IS_QUOTAINIT(inode)) remove_inode_dquot_ref(inode, type, tofree_head); - } + spin_unlock(&inode_lock); } @@ -1383,11 +1372,8 @@ void __init inode_init(unsigned long mem /* inode slab cache */ inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode), - 0, SLAB_HWCACHE_ALIGN, init_once, - NULL); - if (!inode_cachep) - panic("cannot create inode slab cache"); - + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, init_once, + NULL); set_shrinker(DEFAULT_SEEKS, shrink_icache_memory); } @@ -1408,5 +1394,4 @@ void init_special_inode(struct inode *in printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o)\n", mode); } - EXPORT_SYMBOL(init_special_inode); --- linux-2.6.6-rc1/fs/isofs/inode.c 2004-01-09 00:04:32.000000000 -0800 +++ 25/fs/isofs/inode.c 2004-04-18 22:25:24.930040536 -0700 @@ -119,12 +119,20 @@ static void destroy_inodecache(void) printk(KERN_INFO "iso_inode_cache: not all structures were freed\n"); } +static int isofs_remount(struct super_block *sb, int *flags, char *data) +{ + /* we probably want a lot more here */ + *flags |= MS_RDONLY; + return 0; +} + static struct super_operations isofs_sops = { .alloc_inode = isofs_alloc_inode, .destroy_inode = isofs_destroy_inode, .read_inode = isofs_read_inode, .put_super = isofs_put_super, .statfs = isofs_statfs, + .remount_fs = isofs_remount, }; /* the export_operations structure for describing --- linux-2.6.6-rc1/fs/jbd/journal.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/jbd/journal.c 2004-04-18 22:25:57.059156168 -0700 @@ -599,6 +599,7 @@ struct journal_head * journal_get_descri return NULL; bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + memset(bh->b_data, 0, journal->j_blocksize); bh->b_state |= (1 << BH_Dirty); BUFFER_TRACE(bh, "return this buffer"); return journal_add_journal_head(bh); @@ -1673,9 +1674,17 @@ repeat: if (buffer_jbd(bh)) { jh = bh2jh(bh); } else { - J_ASSERT_BH(bh, - (atomic_read(&bh->b_count) > 0) || - (bh->b_page && bh->b_page->mapping)); + if (!(atomic_read(&bh->b_count) > 0 || + (bh->b_page && bh->b_page->mapping))) { + printk(KERN_EMERG "%s: bh->b_count=%d\n", + __FUNCTION__, atomic_read(&bh->b_count)); + printk(KERN_EMERG "%s: bh->b_page=%p\n", + __FUNCTION__, bh->b_page); + if (bh->b_page) + printk(KERN_EMERG "%s: " + "bh->b_page->mapping=%p\n", + __FUNCTION__, bh->b_page->mapping); + } if (!new_jh) { jbd_unlock_bh_journal_head(bh); --- linux-2.6.6-rc1/fs/jbd/transaction.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/jbd/transaction.c 2004-04-18 22:25:44.898004944 -0700 @@ -931,7 +931,6 @@ out: int journal_dirty_data(handle_t *handle, struct buffer_head *bh) { journal_t *journal = handle->h_transaction->t_journal; - int need_brelse = 0; struct journal_head *jh; if (is_handle_aborted(handle)) @@ -1016,24 +1015,6 @@ int journal_dirty_data(handle_t *handle, goto no_journal; } - /* - * This buffer may be undergoing writeout in commit. We - * can't return from here and let the caller dirty it - * again because that can cause the write-out loop in - * commit to never terminate. - */ - if (buffer_dirty(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - need_brelse = 1; - sync_dirty_buffer(bh); - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - /* The buffer may become locked again at any - time if it is redirtied */ - } - /* journal_clean_data_list() may have got there first */ if (jh->b_transaction != NULL) { JBUFFER_TRACE(jh, "unfile from commit"); @@ -1063,10 +1044,6 @@ int journal_dirty_data(handle_t *handle, no_journal: spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - if (need_brelse) { - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - } JBUFFER_TRACE(jh, "exit"); journal_put_journal_head(jh); return 0; @@ -1111,26 +1088,21 @@ int journal_dirty_metadata(handle_t *han * I _think_ we're OK here with SMP barriers - a mistaken decision will * result in this test being false, so we go in and take the locks. */ - if (jh->b_transaction == handle->h_transaction && - jh->b_jlist == BJ_Metadata) { + if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { JBUFFER_TRACE(jh, "fastpath"); J_ASSERT_JH(jh, jh->b_transaction == journal->j_running_transaction); goto out_unlock_bh; } - spin_lock(&journal->j_list_lock); set_buffer_jbddirty(bh); - J_ASSERT_JH(jh, jh->b_transaction != NULL); - /* * Metadata already on the current transaction list doesn't * need to be filed. Metadata on another transaction's list must * be committing, and will be refiled once the commit completes: * leave it alone for now. */ - if (jh->b_transaction != transaction) { JBUFFER_TRACE(jh, "already on other transaction"); J_ASSERT_JH(jh, jh->b_transaction == @@ -1138,17 +1110,15 @@ int journal_dirty_metadata(handle_t *han J_ASSERT_JH(jh, jh->b_next_transaction == transaction); /* And this case is illegal: we can't reuse another * transaction's data buffer, ever. */ - /* FIXME: writepage() should be journalled */ - goto out_unlock_list; + goto out_unlock_bh; } /* That test should have eliminated the following case: */ J_ASSERT_JH(jh, jh->b_frozen_data == 0); JBUFFER_TRACE(jh, "file as BJ_Metadata"); + spin_lock(&journal->j_list_lock); __journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); - -out_unlock_list: spin_unlock(&journal->j_list_lock); out_unlock_bh: jbd_unlock_bh_state(bh); --- linux-2.6.6-rc1/fs/jffs2/fs.c 2003-10-17 15:58:04.000000000 -0700 +++ 25/fs/jffs2/fs.c 2004-04-18 22:25:24.934039928 -0700 @@ -350,7 +350,7 @@ int jffs2_remount_fs (struct super_block if (!(*flags & MS_RDONLY)) jffs2_start_garbage_collect_thread(c); - sb->s_flags = (sb->s_flags & ~MS_RDONLY)|(*flags & MS_RDONLY); + *flags |= MS_NOATIME; return 0; } --- linux-2.6.6-rc1/fs/jffs2/super.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/jffs2/super.c 2004-04-18 22:25:24.934039928 -0700 @@ -129,7 +129,7 @@ static struct super_block *jffs2_get_sb_ mtd->index, mtd->name)); sb->s_op = &jffs2_super_operations; - sb->s_flags |= MS_NODIRATIME; + sb->s_flags |= MS_NOATIME; ret = jffs2_do_fill_super(sb, data, (flags&MS_VERBOSE)?1:0); --- linux-2.6.6-rc1/fs/jffs/inode-v23.c 2004-04-03 20:39:13.000000000 -0800 +++ 25/fs/jffs/inode-v23.c 2004-04-18 22:25:24.933040080 -0700 @@ -1771,6 +1771,12 @@ jffs_write_super(struct super_block *sb) unlock_kernel(); } +static int jffs_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_NODIRATIME; + return 0; +} + static struct super_operations jffs_ops = { .read_inode = jffs_read_inode, @@ -1778,6 +1784,7 @@ static struct super_operations jffs_ops .put_super = jffs_put_super, .write_super = jffs_write_super, .statfs = jffs_statfs, + .remount_fs = jffs_remount, }; static struct super_block *jffs_get_sb(struct file_system_type *fs_type, --- linux-2.6.6-rc1/fs/jfs/inode.c 2004-02-03 20:42:38.000000000 -0800 +++ 25/fs/jfs/inode.c 2004-04-18 22:25:57.350111936 -0700 @@ -302,8 +302,8 @@ static sector_t jfs_bmap(struct address_ return generic_block_bmap(mapping, block, jfs_get_block); } -static int jfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; --- linux-2.6.6-rc1/fs/Kconfig 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/Kconfig 2004-04-18 22:25:54.925480536 -0700 @@ -406,12 +406,15 @@ config QUOTA help If you say Y here, you will be able to set per user limits for disk usage (also called disk quotas). Currently, it works for the - ext2, ext3, and reiserfs file system. You need additional software - in order to use quota support (you can download sources from + ext2, ext3, and reiserfs file system. ext3 also supports journalled + quotas for which you don't need to run quotacheck(8) after an unclean + shutdown. You need additional software in order to use quota support + (you can download sources from ). For further details, read the Quota mini-HOWTO, available from - . Probably the quota - support is only useful for multi user systems. If unsure, say N. + , or the documentation provided + with the quota tools. Probably the quota support is only useful for + multi user systems. If unsure, say N. config QFMT_V1 tristate "Old quota format support" @@ -465,7 +468,7 @@ config AUTOFS4_FS automounter (amd), which is a pure user space daemon. To use the automounter you need the user-space tools from - ; you also + ; you also want to answer Y to "NFS file system support", below. To compile this support as a module, choose M here: the module will be --- linux-2.6.6-rc1/fs/locks.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/locks.c 2004-04-18 22:25:32.694860104 -0700 @@ -1994,15 +1994,13 @@ void steal_locks(fl_owner_t from) } unlock_kernel(); } - EXPORT_SYMBOL(steal_locks); static int __init filelock_init(void) { filelock_cache = kmem_cache_create("file_lock_cache", - sizeof(struct file_lock), 0, 0, init_once, NULL); - if (!filelock_cache) - panic("cannot create file lock slab cache"); + sizeof(struct file_lock), 0, SLAB_PANIC, + init_once, NULL); return 0; } --- linux-2.6.6-rc1/fs/mpage.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/mpage.c 2004-04-18 22:25:24.935039776 -0700 @@ -485,8 +485,7 @@ mpage_writepage(struct bio *bio, struct break; block_in_file++; } - if (page_block == 0) - buffer_error(); + BUG_ON(page_block == 0); first_unmapped = page_block; --- linux-2.6.6-rc1/fs/namespace.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/namespace.c 2004-04-18 22:25:55.336418064 -0700 @@ -36,6 +36,9 @@ static inline int sysfs_init(void) /* spinlock for vfsmount related operations, inplace of dcache_lock */ spinlock_t vfsmount_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + +EXPORT_SYMBOL(vfsmount_lock); + static struct list_head *mount_hashtable; static int hash_mask, hash_bits; static kmem_cache_t *mnt_cache; @@ -777,7 +780,7 @@ long do_mount(char * dev_name, char * di mnt_flags |= MNT_NODEV; if (flags & MS_NOEXEC) mnt_flags |= MNT_NOEXEC; - flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV); + flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE); /* ... and get the mountpoint */ retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); @@ -1142,9 +1145,7 @@ void __init mnt_init(unsigned long mempa int i; mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!mnt_cache) - panic("Cannot create vfsmount cache"); + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); order = 0; mount_hashtable = (struct list_head *) --- linux-2.6.6-rc1/fs/ncpfs/inode.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/ncpfs/inode.c 2004-04-18 22:25:24.937039472 -0700 @@ -85,6 +85,12 @@ static void destroy_inodecache(void) printk(KERN_INFO "ncp_inode_cache: not all structures were freed\n"); } +static int ncp_remount(struct super_block *sb, int *flags, char* data) +{ + *flags |= MS_NODIRATIME; + return 0; +} + static struct super_operations ncp_sops = { .alloc_inode = ncp_alloc_inode, @@ -93,6 +99,7 @@ static struct super_operations ncp_sops .delete_inode = ncp_delete_inode, .put_super = ncp_put_super, .statfs = ncp_statfs, + .remount_fs = ncp_remount, }; extern struct dentry_operations ncp_root_dentry_operations; --- linux-2.6.6-rc1/fs/nfs/direct.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/nfs/direct.c 2004-04-18 22:25:57.350111936 -0700 @@ -32,6 +32,7 @@ * 18 Dec 2001 Initial implementation for 2.4 --cel * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy * 08 Jun 2003 Port to 2.5 APIs --cel + * 31 Mar 2004 Handle direct I/O without VFS support --cel * */ @@ -191,16 +192,17 @@ nfs_direct_read_seg(struct inode *inode, * writes so that this read will see them when we read from the * server. */ -static int +static ssize_t nfs_direct_read(struct inode *inode, struct file *file, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { - int tot_bytes = 0; + ssize_t tot_bytes = 0; unsigned long seg = 0; while ((seg < nr_segs) && (tot_bytes >= 0)) { - int result, page_count; + ssize_t result; + int page_count; struct page **pages; const struct iovec *vec = &iov[seg++]; unsigned long user_addr = (unsigned long) vec->iov_base; @@ -359,16 +361,17 @@ sync_retry: * that non-direct readers might access, so they will pick up these * writes immediately. */ -static int +static ssize_t nfs_direct_write(struct inode *inode, struct file *file, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { - int tot_bytes = 0; + ssize_t tot_bytes = 0; unsigned long seg = 0; while ((seg < nr_segs) && (tot_bytes >= 0)) { - int result, page_count; + ssize_t result; + int page_count; struct page **pages; const struct iovec *vec = &iov[seg++]; unsigned long user_addr = (unsigned long) vec->iov_base; @@ -407,18 +410,12 @@ nfs_direct_write(struct inode *inode, st * file_offset: offset in file to begin the operation * nr_segs: size of iovec array * - * Usually a file system implements direct I/O by calling out to - * blockdev_direct_IO. The NFS client doesn't have a backing block - * device, so we do everything by hand instead. - * - * The inode's i_sem is no longer held by the VFS layer before it calls - * this function to do a write. */ -int +ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { - int result = -EINVAL; + ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; @@ -429,10 +426,6 @@ nfs_direct_IO(int rw, struct kiocb *iocb if (!is_sync_kiocb(iocb)) goto out; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (result < 0) - goto out; - switch (rw) { case READ: dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n", @@ -453,6 +446,163 @@ nfs_direct_IO(int rw, struct kiocb *iocb } out: - dprintk("NFS: direct_IO result=%d\n", result); + dprintk("NFS: direct_IO result=%zd\n", result); return result; } + +/** + * nfs_file_direct_read - file direct read operation for NFS files + * @iocb: target I/O control block + * @buf: user's buffer into which to read data + * count: number of bytes to read + * pos: byte offset in file where reading starts + * + * We use this function for direct reads instead of calling + * generic_file_aio_read() in order to avoid gfar's check to see if + * the request starts before the end of the file. For that check + * to work, we must generate a GETATTR before each direct read, and + * even then there is a window between the GETATTR and the subsequent + * READ where the file size could change. So our preference is simply + * to do all reads the application wants, and the server will take + * care of managing the end of file boundary. + * + * This function also eliminates unnecessarily updating the file's + * atime locally, as the NFS server sets the file's atime, and this + * client must read the updated atime from the server back into its + * cache. + */ +ssize_t +nfs_file_direct_read(struct kiocb *iocb, char *buf, size_t count, loff_t pos) +{ + ssize_t retval = -EINVAL; + loff_t *ppos = &iocb->ki_pos; + struct file *file = iocb->ki_filp; + struct dentry *dentry = file->f_dentry; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct iovec iov = { .iov_base = buf, .iov_len = count }; + + dprintk("nfs: direct read(%s/%s, %lu@%lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long) pos); + + if (!is_sync_kiocb(iocb)) + goto out; + if (count < 0) + goto out; + retval = -EFAULT; + if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len)) + goto out; + retval = 0; + if (!count) + goto out; + + /* XXX: why do this for reads? */ + if (mapping->nrpages) { + retval = filemap_fdatawrite(mapping); + if (retval == 0) + retval = filemap_fdatawait(mapping); + if (retval) + goto out; + } + + retval = nfs_direct_read(inode, file, &iov, pos, 1); + if (retval > 0) + *ppos = pos + retval; + +out: + return retval; +} + +/** + * nfs_file_direct_write - file direct write operation for NFS files + * @iocb: target I/O control block + * @buf: user's buffer from which to write data + * count: number of bytes to write + * pos: byte offset in file where writing starts + * + * We use this function for direct writes instead of calling + * generic_file_aio_write() in order to avoid taking the inode + * semaphor and updating the i_size. The NFS server will set + * the new i_size and this client must read the updated size + * back into its cache. We let the server do generic write + * parameter checking and report problems. + * + * We also avoid an unnecessary invocation of generic_osync_inode(), + * as it is fairly meaningless to sync the metadata of an NFS file. + * + * And we eliminate local atime updates, see direct read above. + * + * Note that O_APPEND is not supported for NFS direct writes, as there + * is no atomic O_APPEND write facility in the NFS protocol. + */ +ssize_t +nfs_file_direct_write(struct kiocb *iocb, const char *buf, size_t count, loff_t pos) +{ + ssize_t retval = -EINVAL; + loff_t *ppos = &iocb->ki_pos; + struct file *file = iocb->ki_filp; + struct dentry *dentry = file->f_dentry; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; + unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; + + dfprintk(VFS, "nfs: direct write(%s/%s(%ld), %lu@%lu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + inode->i_ino, (unsigned long) count, (unsigned long) pos); + + if (!is_sync_kiocb(iocb)) + goto out; + if (count < 0) + goto out; + if (pos < 0) + goto out; + retval = -EFAULT; + if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) + goto out; + if (file->f_error) { + retval = file->f_error; + file->f_error = 0; + goto out; + } + retval = -EFBIG; + if (limit != RLIM_INFINITY) { + if (pos >= limit) { + send_sig(SIGXFSZ, current, 0); + goto out; + } + if (count > limit - (unsigned long) pos) + count = limit - (unsigned long) pos; + } + retval = 0; + if (!count) + goto out; + + /* + * The server's file system may not be POSIX-compliant, so we want + * to ensure that the suid bit is gone. + */ + down(&inode->i_sem); + retval = remove_suid(dentry); + up(&inode->i_sem); + if (retval) + goto out; + + if (mapping->nrpages) { + retval = filemap_fdatawrite(mapping); + if (retval == 0) + retval = filemap_fdatawait(mapping); + if (retval) + goto out; + } + + retval = nfs_direct_write(inode, file, &iov, pos, 1); + if (mapping->nrpages) + invalidate_inode_pages2(mapping); + if (retval > 0) + *ppos = pos + retval; + +out: + return retval; +} --- linux-2.6.6-rc1/fs/nfsd/nfs4proc.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/nfsd/nfs4proc.c 2004-04-18 22:25:24.939039168 -0700 @@ -66,10 +66,31 @@ fh_dup2(struct svc_fh *dst, struct svc_f } static int +do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + int accmode, status; + + if (open->op_truncate && + !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) + return nfserr_inval; + + accmode = MAY_NOP; + if (open->op_share_access & NFS4_SHARE_ACCESS_READ) + accmode = MAY_READ; + if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE) + accmode |= (MAY_WRITE | MAY_TRUNC); + accmode |= MAY_OWNER_OVERRIDE; + + status = fh_verify(rqstp, current_fh, S_IFREG, accmode); + + return status; +} + +static int do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct svc_fh resfh; - int accmode, status; + int status; fh_init(&resfh, NFS4_FHSIZE); open->op_truncate = 0; @@ -92,6 +113,8 @@ do_open_lookup(struct svc_rqst *rqstp, s if (!status) { set_change_info(&open->op_cinfo, current_fh); + + /* set reply cache */ fh_dup2(current_fh, &resfh); /* XXXJBF: keep a saved svc_fh struct instead?? */ open->op_stateowner->so_replay.rp_openfh_len = @@ -100,30 +123,66 @@ do_open_lookup(struct svc_rqst *rqstp, s &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); - accmode = MAY_NOP; - if (open->op_share_access & NFS4_SHARE_ACCESS_READ) - accmode = MAY_READ; - if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE) - accmode |= (MAY_WRITE | MAY_TRUNC); - accmode |= MAY_OWNER_OVERRIDE; - status = fh_verify(rqstp, current_fh, S_IFREG, accmode); + status = do_open_permission(rqstp, current_fh, open); } fh_put(&resfh); return status; } +static int +do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + int status; + + dprintk("NFSD: do_open_fhandle\n"); + + /* we don't know the target directory, and therefore can not + * set the change info + */ + + memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); + + /* set replay cache */ + open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size; + memcpy(open->op_stateowner->so_replay.rp_openfh, + ¤t_fh->fh_handle.fh_base, + current_fh->fh_handle.fh_size); + + open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && + !open->op_iattr.ia_size; + + status = do_open_permission(rqstp, current_fh, open); + + return status; +} + + +/* + * nfs4_unlock_state() called in encode + */ static inline int nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { int status; - dprintk("NFSD: nfsd4_open filename %.*s\n", - (int)open->op_fname.len, open->op_fname.data); + dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", + (int)open->op_fname.len, open->op_fname.data, + open->op_stateowner); + + if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_grace; + + if (nfs4_in_no_grace() && + open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_no_grace; /* This check required by spec. */ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; + open->op_stateowner = NULL; + nfs4_lock_state(); + /* check seqid for replay. set nfs4_owner */ status = nfsd4_process_open1(open); if (status == NFSERR_REPLAY_ME) { @@ -141,16 +200,30 @@ nfsd4_open(struct svc_rqst *rqstp, struc } if (status) return status; + if (open->op_claim_type == NFS4_OPEN_CLAIM_NULL) { /* * This block of code will (1) set CURRENT_FH to the file being opened, * creating it if necessary, (2) set open->op_cinfo, * (3) set open->op_truncate if the file is to be truncated * after opening, (4) do permission checking. */ - status = do_open_lookup(rqstp, current_fh, open); - if (status) - return status; - + status = do_open_lookup(rqstp, current_fh, open); + if (status) + return status; + } else if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) { + /* + * The CURRENT_FH is already set to the file being opened. This + * block of code will (1) set open->op_cinfo, (2) set + * open->op_truncate if the file is to be truncated after opening, + * (3) do permission checking. + */ + status = do_open_fhandle(rqstp, current_fh, open); + if (status) + return status; + } else { + printk("NFSD: unsupported OPEN claim type\n"); + return nfserr_inval; + } /* * nfsd4_process_open2() does the actual opening of the file. If * successful, it (1) truncates the file if open->op_truncate was @@ -187,9 +260,14 @@ nfsd4_putfh(struct svc_rqst *rqstp, stru static inline int nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh) { + int status; + fh_put(current_fh); - return exp_pseudoroot(rqstp->rq_client, current_fh, + status = exp_pseudoroot(rqstp->rq_client, current_fh, &rqstp->rq_chandle); + if (!status) + status = nfsd_setuser(rqstp, current_fh->fh_export); + return status; } static inline int @@ -402,6 +480,8 @@ nfsd4_read(struct svc_rqst *rqstp, struc int status; /* no need to check permission - this will be done in nfsd_read() */ + if (nfs4_in_grace()) + return nfserr_grace; if (read->rd_offset >= OFFSET_MAX) return nfserr_inval; @@ -448,6 +528,9 @@ out: static inline int nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir) { + u64 cookie = readdir->rd_cookie; + static const nfs4_verifier zeroverf; + /* no need to check permission - this will be done in nfsd_readdir() */ if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) @@ -456,7 +539,8 @@ nfsd4_readdir(struct svc_rqst *rqstp, st readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; - if (readdir->rd_cookie > ~(u32)0) + if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || + (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) return nfserr_bad_cookie; readdir->rd_rqstp = rqstp; @@ -521,10 +605,13 @@ static inline int nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr) { struct nfs4_stateid *stp; - int status = nfserr_nofilehandle; + int status = nfs_ok; + + if (nfs4_in_grace()) + return nfserr_grace; if (!current_fh->fh_dentry) - goto out; + return nfserr_nofilehandle; status = nfs_ok; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { @@ -563,6 +650,9 @@ nfsd4_write(struct svc_rqst *rqstp, stru u32 *p; int status = nfs_ok; + if (nfs4_in_grace()) + return nfserr_grace; + /* no need to check permission - this will be done in nfsd_write() */ if (write->wr_offset >= OFFSET_MAX) @@ -757,7 +847,9 @@ nfsd4_proc_compound(struct svc_rqst *rqs break; case OP_CLOSE: op->status = nfsd4_close(rqstp, ¤t_fh, &op->u.close); - op->replay = &op->u.close.cl_stateowner->so_replay; + if (op->u.close.cl_stateowner) + op->replay = + &op->u.close.cl_stateowner->so_replay; break; case OP_COMMIT: op->status = nfsd4_commit(rqstp, ¤t_fh, &op->u.commit); @@ -776,13 +868,18 @@ nfsd4_proc_compound(struct svc_rqst *rqs break; case OP_LOCK: op->status = nfsd4_lock(rqstp, ¤t_fh, &op->u.lock); - op->replay = &op->u.lock.lk_stateowner->so_replay; + if (op->u.lock.lk_stateowner) + op->replay = + &op->u.lock.lk_stateowner->so_replay; break; case OP_LOCKT: op->status = nfsd4_lockt(rqstp, ¤t_fh, &op->u.lockt); break; case OP_LOCKU: op->status = nfsd4_locku(rqstp, ¤t_fh, &op->u.locku); + if (op->u.locku.lu_stateowner) + op->replay = + &op->u.locku.lu_stateowner->so_replay; break; case OP_LOOKUP: op->status = nfsd4_lookup(rqstp, ¤t_fh, &op->u.lookup); @@ -797,15 +894,21 @@ nfsd4_proc_compound(struct svc_rqst *rqs break; case OP_OPEN: op->status = nfsd4_open(rqstp, ¤t_fh, &op->u.open); - op->replay = &op->u.open.op_stateowner->so_replay; + if (op->u.open.op_stateowner) + op->replay = + &op->u.open.op_stateowner->so_replay; break; case OP_OPEN_CONFIRM: op->status = nfsd4_open_confirm(rqstp, ¤t_fh, &op->u.open_confirm); - op->replay = &op->u.open_confirm.oc_stateowner->so_replay; + if (op->u.open_confirm.oc_stateowner) + op->replay = + &op->u.open_confirm.oc_stateowner->so_replay; break; case OP_OPEN_DOWNGRADE: op->status = nfsd4_open_downgrade(rqstp, ¤t_fh, &op->u.open_downgrade); - op->replay = &op->u.open_downgrade.od_stateowner->so_replay; + if (op->u.open_downgrade.od_stateowner) + op->replay = + &op->u.open_downgrade.od_stateowner->so_replay; break; case OP_PUTFH: op->status = nfsd4_putfh(rqstp, ¤t_fh, &op->u.putfh); --- linux-2.6.6-rc1/fs/nfsd/nfs4state.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/nfsd/nfs4state.c 2004-04-18 22:25:24.944038408 -0700 @@ -52,6 +52,7 @@ /* Globals */ time_t boot_time; +static time_t grace_end = 0; static u32 current_clientid = 1; static u32 current_ownerid; static u32 current_fileid; @@ -89,6 +90,9 @@ nfs4_lock_state(void) down(&client_sema); } +/* + * nfs4_unlock_state(); called in encode + */ void nfs4_unlock_state(void) { @@ -136,12 +140,16 @@ static void release_file(struct nfs4_fil * * client_lru holds client queue ordered by nfs4_client.cl_time * for lease renewal. + * + * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time + * for last close replay. */ static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; static struct list_head client_lru; +static struct list_head close_lru; static inline void renew_client(struct nfs4_client *clp) @@ -376,7 +384,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp unsigned int strhashval; struct nfs4_client * conf, * unconf, * new, * clp; int status; - struct list_head *pos, *next; status = nfserr_inval; if (!check_name(clname)) @@ -391,8 +398,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp conf = NULL; nfs4_lock_state(); - list_for_each_safe(pos, next, &conf_str_hashtbl[strhashval]) { - clp = list_entry(pos, struct nfs4_client, cl_strhash); + list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) { if (!cmp_name(&clp->cl_name, &clname)) continue; /* @@ -422,8 +428,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp break; } unconf = NULL; - list_for_each_safe(pos, next, &unconf_str_hashtbl[strhashval]) { - clp = list_entry(pos, struct nfs4_client, cl_strhash); + list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) { if (!cmp_name(&clp->cl_name, &clname)) continue; /* cl_name match from a previous SETCLIENTID operation */ @@ -549,7 +554,6 @@ nfsd4_setclientid_confirm(struct svc_rqs struct nfs4_client *clp, *conf = NULL, *unconf = NULL; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; - struct list_head *pos, *next; int status; status = nfserr_stale_clientid; @@ -562,8 +566,7 @@ nfsd4_setclientid_confirm(struct svc_rqs idhashval = clientid_hashval(clid->cl_id); nfs4_lock_state(); - list_for_each_safe(pos, next, &conf_id_hashtbl[idhashval]) { - clp = list_entry(pos, struct nfs4_client, cl_idhash); + list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { if (!cmp_clid(&clp->cl_clientid, clid)) continue; @@ -582,8 +585,7 @@ nfsd4_setclientid_confirm(struct svc_rqs conf = clp; break; } - list_for_each_safe(pos, next, &unconf_id_hashtbl[idhashval]) { - clp = list_entry(pos, struct nfs4_client, cl_idhash); + list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { if (!cmp_clid(&clp->cl_clientid, clid)) continue; status = nfserr_inval; @@ -774,6 +776,8 @@ alloc_init_open_stateowner(unsigned int INIT_LIST_HEAD(&sop->so_perclient); INIT_LIST_HEAD(&sop->so_perfilestate); INIT_LIST_HEAD(&sop->so_perlockowner); /* not used */ + INIT_LIST_HEAD(&sop->so_close_lru); + sop->so_time = 0; list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); list_add(&sop->so_perclient, &clp->cl_perclient); @@ -814,6 +818,7 @@ release_stateowner(struct nfs4_stateowne list_del(&sop->so_strhash); list_del(&sop->so_perclient); list_del(&sop->so_perlockowner); + list_del(&sop->so_close_lru); del_perclient++; while (!list_empty(&sop->so_perfilestate)) { stp = list_entry(sop->so_perfilestate.next, @@ -882,6 +887,19 @@ release_file(struct nfs4_file *fp) } void +move_to_close_lru(struct nfs4_stateowner *sop) +{ + dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); + /* remove stateowner from all other hash lists except perclient */ + list_del_init(&sop->so_idhash); + list_del_init(&sop->so_strhash); + list_del_init(&sop->so_perlockowner); + + list_add_tail(&sop->so_close_lru, &close_lru); + sop->so_time = get_seconds(); +} + +void release_state_owner(struct nfs4_stateid *stp, struct nfs4_stateowner **sopp, int flag) { @@ -890,16 +908,13 @@ release_state_owner(struct nfs4_stateid dprintk("NFSD: release_state_owner\n"); release_stateid(stp, flag); - /* - * release unused nfs4_stateowners. - * XXX will need to be placed on an open_stateid_lru list to be + + /* place unused nfs4_stateowners on so_close_lru list to be * released by the laundromat service after the lease period * to enable us to handle CLOSE replay */ - if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) { - release_stateowner(sop); - *sopp = NULL; - } + if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) + move_to_close_lru(sop); /* unused nfs4_file's are releseed. XXX slab cache? */ if (list_empty(&fp->fi_perfile)) { release_file(fp); @@ -916,11 +931,9 @@ cmp_owner_str(struct nfs4_stateowner *so /* search ownerstr_hashtbl[] for owner */ static int find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, struct nfs4_stateowner **op) { - struct list_head *pos, *next; struct nfs4_stateowner *local = NULL; - list_for_each_safe(pos, next, &ownerstr_hashtbl[hashval]) { - local = list_entry(pos, struct nfs4_stateowner, so_strhash); + list_for_each_entry(local, &ownerstr_hashtbl[hashval], so_strhash) { if(!cmp_owner_str(local, &open->op_owner, &open->op_clientid)) continue; *op = local; @@ -933,12 +946,10 @@ find_openstateowner_str(unsigned int has static int verify_clientid(struct nfs4_client **client, clientid_t *clid) { - struct list_head *pos, *next; struct nfs4_client *clp; unsigned int idhashval = clientid_hashval(clid->cl_id); - list_for_each_safe(pos, next, &conf_id_hashtbl[idhashval]) { - clp = list_entry(pos, struct nfs4_client, cl_idhash); + list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { if (!cmp_clid(&clp->cl_clientid, clid)) continue; *client = clp; @@ -951,11 +962,9 @@ verify_clientid(struct nfs4_client **cli /* search file_hashtbl[] for file */ static int find_file(unsigned int hashval, struct inode *ino, struct nfs4_file **fp) { - struct list_head *pos, *next; struct nfs4_file *local = NULL; - list_for_each_safe(pos, next, &file_hashtbl[hashval]) { - local = list_entry(pos, struct nfs4_file, fi_hash); + list_for_each_entry(local, &file_hashtbl[hashval], fi_hash) { if (local->fi_inode == ino) { *fp = local; return(1); @@ -1011,15 +1020,13 @@ nfs4_share_conflict(struct svc_fh *curre unsigned int fi_hashval; struct nfs4_file *fp; struct nfs4_stateid *stp; - struct list_head *pos, *next; dprintk("NFSD: nfs4_share_conflict\n"); fi_hashval = file_hashval(ino); if (find_file(fi_hashval, ino, &fp)) { /* Search for conflicting share reservations */ - list_for_each_safe(pos, next, &fp->fi_perfile) { - stp = list_entry(pos, struct nfs4_stateid, st_perfile); + list_for_each_entry(stp, &fp->fi_perfile, st_perfile) { if (test_bit(deny_type, &stp->st_deny_bmap) || test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) return nfserr_share_denied; @@ -1066,6 +1073,8 @@ nfs4_file_downgrade(struct file *filp, u * notfound: * verify clientid * create new owner + * + * called with nfs4_lock_state() held. */ int nfsd4_process_open1(struct nfsd4_open *open) @@ -1082,9 +1091,8 @@ nfsd4_process_open1(struct nfsd4_open *o status = nfserr_stale_clientid; if (STALE_CLIENTID(&open->op_clientid)) - goto out; + return status; - nfs4_lock_state(); strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); if (find_openstateowner_str(strhashval, open, &sop)) { open->op_stateowner = sop; @@ -1104,7 +1112,7 @@ nfsd4_process_open1(struct nfsd4_open *o } /* replay: indicate to calling function */ status = NFSERR_REPLAY_ME; - goto out; + return status; } if (sop->so_confirmed) { if (open->op_seqid == sop->so_seqid + 1) { @@ -1142,25 +1150,27 @@ instantiate_new_owner: renew: renew_client(sop->so_client); out: - nfs4_unlock_state(); + if (status && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + status = nfserr_reclaim_bad; return status; } - +/* + * called with nfs4_lock_state() held. + */ int nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { struct iattr iattr; struct nfs4_stateowner *sop = open->op_stateowner; - struct nfs4_file *fp; + struct nfs4_file *fp = NULL; struct inode *ino; unsigned int fi_hashval; - struct list_head *pos, *next; struct nfs4_stateid *stq, *stp = NULL; int status; status = nfserr_resource; if (!sop) - goto out; + return status; ino = current_fh->fh_dentry->d_inode; @@ -1168,13 +1178,11 @@ nfsd4_process_open2(struct svc_rqst *rqs if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) goto out; - nfs4_lock_state(); fi_hashval = file_hashval(ino); if (find_file(fi_hashval, ino, &fp)) { /* Search for conflicting share reservations */ status = nfserr_share_denied; - list_for_each_safe(pos, next, &fp->fi_perfile) { - stq = list_entry(pos, struct nfs4_stateid, st_perfile); + list_for_each_entry(stq, &fp->fi_perfile, st_perfile) { if(stq->st_stateowner == sop) { stp = stq; continue; @@ -1253,6 +1261,17 @@ out: if (fp && list_empty(&fp->fi_perfile)) release_file(fp); + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) { + if (status) + status = nfserr_reclaim_bad; + else { + /* successful reclaim. so_seqid is decremented because + * it will be bumped in encode_open + */ + open->op_stateowner->so_confirmed = 1; + open->op_stateowner->so_seqid--; + } + } /* * To finish the open response, we just need to set the rflags. */ @@ -1260,12 +1279,12 @@ out: if (!open->op_stateowner->so_confirmed) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; - nfs4_unlock_state(); return status; out_free: kfree(stp); goto out; } + static struct work_struct laundromat_work; static void laundromat_main(void *); static DECLARE_WORK(laundromat_work, laundromat_main, NULL); @@ -1274,7 +1293,6 @@ int nfsd4_renew(clientid_t *clid) { struct nfs4_client *clp; - struct list_head *pos, *next; unsigned int idhashval; int status; @@ -1286,15 +1304,13 @@ nfsd4_renew(clientid_t *clid) goto out; status = nfs_ok; idhashval = clientid_hashval(clid->cl_id); - list_for_each_safe(pos, next, &conf_id_hashtbl[idhashval]) { - clp = list_entry(pos, struct nfs4_client, cl_idhash); + list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { if (!cmp_clid(&clp->cl_clientid, clid)) continue; renew_client(clp); goto out; } - list_for_each_safe(pos, next, &unconf_id_hashtbl[idhashval]) { - clp = list_entry(pos, struct nfs4_client, cl_idhash); + list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { if (!cmp_clid(&clp->cl_clientid, clid)) continue; renew_client(clp); @@ -1316,9 +1332,11 @@ time_t nfs4_laundromat(void) { struct nfs4_client *clp; + struct nfs4_stateowner *sop; struct list_head *pos, *next; time_t cutoff = get_seconds() - NFSD_LEASE_TIME; - time_t t, return_val = NFSD_LEASE_TIME; + time_t t, clientid_val = NFSD_LEASE_TIME; + time_t u, close_val = NFSD_LEASE_TIME; nfs4_lock_state(); @@ -1327,18 +1345,30 @@ nfs4_laundromat(void) clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { t = clp->cl_time - cutoff; - if (return_val > t) - return_val = t; + if (clientid_val > t) + clientid_val = t; break; } dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); expire_client(clp); } - if (return_val < NFSD_LAUNDROMAT_MINTIMEOUT) - return_val = NFSD_LAUNDROMAT_MINTIMEOUT; + list_for_each_safe(pos, next, &close_lru) { + sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); + if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { + u = sop->so_time - cutoff; + if (close_val > u) + close_val = u; + break; + } + dprintk("NFSD: purging unused open stateowner (so_id %d)\n", + sop->so_id); + release_stateowner(sop); + } + if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) + clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; nfs4_unlock_state(); - return return_val; + return clientid_val; } void @@ -1351,17 +1381,19 @@ laundromat_main(void *not_used) schedule_delayed_work(&laundromat_work, t*HZ); } -/* search ownerid_hashtbl[] for stateid owner (stateid->si_stateownerid) */ +/* search ownerid_hashtbl[] and close_lru for stateid owner + * (stateid->si_stateownerid) + */ struct nfs4_stateowner * -find_openstateowner_id(u32 st_id) { - struct list_head *pos, *next; +find_openstateowner_id(u32 st_id, int flags) { struct nfs4_stateowner *local = NULL; - unsigned int hashval = ownerid_hashval(st_id); - list_for_each_safe(pos, next, &ownerid_hashtbl[hashval]) { - local = list_entry(pos, struct nfs4_stateowner, so_idhash); - if(local->so_id == st_id) - return local; + dprintk("NFSD: find_openstateowner_id %d\n", st_id); + if (flags & CLOSE_STATE) { + list_for_each_entry(local, &close_lru, so_close_lru) { + if(local->so_id == st_id) + return local; + } } return NULL; } @@ -1547,11 +1579,12 @@ no_nfs4_stateid: * starting by trying to look up the stateowner. * If stateowner is not found - stateid is bad. */ - if (!(sop = find_openstateowner_id(stateid->si_stateownerid))) { + if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) { printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n"); status = nfserr_bad_stateid; goto out; } + *sopp = sop; check_replay: if (seqid == sop->so_seqid) { @@ -1561,11 +1594,15 @@ check_replay: } else { printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid); + *sopp = NULL; status = nfserr_bad_seqid; } goto out; } +/* + * nfs4_unlock_state(); called in encode + */ int nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc) { @@ -1601,7 +1638,6 @@ nfsd4_open_confirm(struct svc_rqst *rqst stp->st_stateid.si_generation); status = nfs_ok; out: - nfs4_unlock_state(); return status; } @@ -1630,6 +1666,9 @@ reset_union_bmap_deny(unsigned long deny } } +/* + * nfs4_unlock_state(); called in encode + */ int nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od) @@ -1642,6 +1681,7 @@ nfsd4_open_downgrade(struct svc_rqst *rq (int)current_fh->fh_dentry->d_name.len, current_fh->fh_dentry->d_name.name); + od->od_stateowner = NULL; status = nfserr_inval; if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny)) goto out; @@ -1675,10 +1715,12 @@ nfsd4_open_downgrade(struct svc_rqst *rq memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t)); status = nfs_ok; out: - nfs4_unlock_state(); return status; } +/* + * nfs4_unlock_state() called after encode + */ int nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close) { @@ -1689,10 +1731,12 @@ nfsd4_close(struct svc_rqst *rqstp, stru (int)current_fh->fh_dentry->d_name.len, current_fh->fh_dentry->d_name.name); + close->cl_stateowner = NULL; nfs4_lock_state(); + /* check close_lru for replay */ if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid, &close->cl_stateid, - CHECK_FH | OPEN_STATE, + CHECK_FH | OPEN_STATE | CLOSE_STATE, &close->cl_stateowner, &stp, NULL))) goto out; /* @@ -1705,7 +1749,6 @@ nfsd4_close(struct svc_rqst *rqstp, stru /* release_state_owner() calls nfsd_close() if needed */ release_state_owner(stp, &close->cl_stateowner, OPEN_STATE); out: - nfs4_unlock_state(); return status; } @@ -1729,7 +1772,6 @@ static struct list_head lockstateid_hash struct nfs4_stateid * find_stateid(stateid_t *stid, int flags) { - struct list_head *pos, *next; struct nfs4_stateid *local = NULL; u32 st_id = stid->si_stateownerid; u32 f_id = stid->si_fileid; @@ -1738,8 +1780,7 @@ find_stateid(stateid_t *stid, int flags) dprintk("NFSD: find_stateid flags 0x%x\n",flags); if ((flags & LOCK_STATE) || (flags & RDWR_STATE)) { hashval = stateid_hashval(st_id, f_id); - list_for_each_safe(pos, next, &lockstateid_hashtbl[hashval]) { - local = list_entry(pos, struct nfs4_stateid, st_hash); + list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) { if((local->st_stateid.si_stateownerid == st_id) && (local->st_stateid.si_fileid == f_id)) return local; @@ -1747,8 +1788,7 @@ find_stateid(stateid_t *stid, int flags) } if ((flags & OPEN_STATE) || (flags & RDWR_STATE)) { hashval = stateid_hashval(st_id, f_id); - list_for_each_safe(pos, next, &stateid_hashtbl[hashval]) { - local = list_entry(pos, struct nfs4_stateid, st_hash); + list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) { if((local->st_stateid.si_stateownerid == st_id) && (local->st_stateid.si_fileid == f_id)) return local; @@ -1779,14 +1819,12 @@ nfs4_transform_lock_offset(struct file_l int nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval) { - struct list_head *pos, *next; struct nfs4_stateowner *local = NULL; int status = 0; if (hashval >= LOCK_HASH_SIZE) goto out; - list_for_each_safe(pos, next, &lock_ownerid_hashtbl[hashval]) { - local = list_entry(pos, struct nfs4_stateowner, so_idhash); + list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) { if (local == sop) { status = 1; goto out; @@ -1817,11 +1855,9 @@ nfs4_set_lock_denied(struct file_lock *f static int find_lockstateowner_str(unsigned int hashval, struct xdr_netobj *owner, clientid_t *clid, struct nfs4_stateowner **op) { - struct list_head *pos, *next; struct nfs4_stateowner *local = NULL; - list_for_each_safe(pos, next, &lock_ownerstr_hashtbl[hashval]) { - local = list_entry(pos, struct nfs4_stateowner, so_strhash); + list_for_each_entry(local, &lock_ownerstr_hashtbl[hashval], so_strhash) { if(!cmp_owner_str(local, owner, clid)) continue; *op = local; @@ -1854,6 +1890,8 @@ alloc_init_lock_stateowner(unsigned int INIT_LIST_HEAD(&sop->so_perclient); INIT_LIST_HEAD(&sop->so_perfilestate); INIT_LIST_HEAD(&sop->so_perlockowner); + INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ + sop->so_time = 0; list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); list_add(&sop->so_perclient, &clp->cl_perclient); @@ -1913,6 +1951,8 @@ check_lock_length(u64 offset, u64 length /* * LOCK operation + * + * nfs4_unlock_state(); called in encode */ int nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock) @@ -1929,6 +1969,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struc (long long) lock->lk_offset, (long long) lock->lk_length); + if (nfs4_in_grace() && !lock->lk_reclaim) + return nfserr_grace; + if (nfs4_in_no_grace() && lock->lk_reclaim) + return nfserr_no_grace; + if (check_lock_length(lock->lk_offset, lock->lk_length)) return nfserr_inval; @@ -1958,8 +2003,11 @@ nfsd4_lock(struct svc_rqst *rqstp, struc CHECK_FH | OPEN_STATE, &open_sop, &open_stp, &lock->v.new.clientid); - if (status) + if (status) { + if (lock->lk_reclaim) + status = nfserr_reclaim_bad; goto out; + } /* create lockowner and lock stateid */ fp = open_stp->st_file; strhashval = lock_ownerstr_hashval(fp->fi_inode, @@ -2077,7 +2125,6 @@ out_destroy_new_stateid: release_state_owner(lock_stp, &lock->lk_stateowner, LOCK_STATE); } out: - nfs4_unlock_state(); return status; } @@ -2095,6 +2142,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru unsigned int strhashval; int status; + if (nfs4_in_grace()) + return nfserr_grace; + if (check_lock_length(lockt->lt_offset, lockt->lt_length)) return nfserr_inval; @@ -2188,6 +2238,7 @@ nfsd4_locku(struct svc_rqst *rqstp, stru if (check_lock_length(locku->lu_offset, locku->lu_length)) return nfserr_inval; + locku->lu_stateowner = NULL; nfs4_lock_state(); if ((status = nfs4_preprocess_seqid_op(current_fh, @@ -2230,7 +2281,6 @@ nfsd4_locku(struct svc_rqst *rqstp, stru memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t)); out: - nfs4_unlock_state(); return status; out_nfserr: @@ -2265,7 +2315,6 @@ int nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner) { clientid_t *clid = &rlockowner->rl_clientid; - struct list_head *pos, *next; struct nfs4_stateowner *local = NULL; struct xdr_netobj *owner = &rlockowner->rl_owner; int status, i; @@ -2286,9 +2335,7 @@ nfsd4_release_lockowner(struct svc_rqst /* find the lockowner */ status = nfs_ok; for (i=0; i < LOCK_HASH_SIZE; i++) { - list_for_each_safe(pos, next, &lock_ownerstr_hashtbl[i]) { - local = list_entry(pos, struct nfs4_stateowner, - so_strhash); + list_for_each_entry(local, &lock_ownerstr_hashtbl[i], so_strhash) { if(cmp_owner_str(local, owner, clid)) break; } @@ -2299,9 +2346,7 @@ nfsd4_release_lockowner(struct svc_rqst /* check for any locks held by any stateid associated with the * (lock) stateowner */ status = nfserr_locks_held; - list_for_each_safe(pos, next, &local->so_perfilestate) { - stp = list_entry(pos, struct nfs4_stateid, - st_perfilestate); + list_for_each_entry(stp, &local->so_perfilestate, st_perfilestate) { if(stp->st_vfs_set) { if (check_for_locks(&stp->st_vfs_file, local)) goto out; @@ -2324,6 +2369,7 @@ void nfs4_state_init(void) { int i; + time_t start = get_seconds(); if (nfs4_init) return; @@ -2351,15 +2397,30 @@ nfs4_state_init(void) memset(&zerostateid, 0, sizeof(stateid_t)); memset(&onestateid, ~0, sizeof(stateid_t)); + INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); init_MUTEX(&client_sema); - boot_time = get_seconds(); + boot_time = start; + grace_end = start + NFSD_LEASE_TIME; INIT_WORK(&laundromat_work,laundromat_main, NULL); schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ); nfs4_init = 1; } +int +nfs4_in_grace(void) +{ + return time_before(get_seconds(), (unsigned long)grace_end); +} + +int +nfs4_in_no_grace(void) +{ + return (grace_end < get_seconds()); +} + + static void __nfs4_state_shutdown(void) { --- linux-2.6.6-rc1/fs/nfsd/nfs4xdr.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/nfsd/nfs4xdr.c 2004-04-18 22:25:24.946038104 -0700 @@ -484,11 +484,14 @@ nfsd4_decode_access(struct nfsd4_compoun DECODE_TAIL; } +#define NFS4_STATE_NOT_LOCKED ((void *)-1) + static int nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) { DECODE_HEAD; + close->cl_stateowner = NFS4_STATE_NOT_LOCKED; READ_BUF(4 + sizeof(stateid_t)); READ32(close->cl_seqid); READ32(close->cl_stateid.si_generation); @@ -579,6 +582,7 @@ nfsd4_decode_lock(struct nfsd4_compounda { DECODE_HEAD; + lock->lk_stateowner = NFS4_STATE_NOT_LOCKED; /* * type, reclaim(boolean), offset, length, new_lock_owner(boolean) */ @@ -636,6 +640,7 @@ nfsd4_decode_locku(struct nfsd4_compound { DECODE_HEAD; + locku->lu_stateowner = NFS4_STATE_NOT_LOCKED; READ_BUF(24 + sizeof(stateid_t)); READ32(locku->lu_type); if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) @@ -671,6 +676,7 @@ nfsd4_decode_open(struct nfsd4_compounda memset(open->op_bmval, 0, sizeof(open->op_bmval)); open->op_iattr.ia_valid = 0; + open->op_stateowner = NFS4_STATE_NOT_LOCKED; /* seqid, share_access, share_deny, clientid, ownerlen */ READ_BUF(16 + sizeof(clientid_t)); @@ -746,6 +752,7 @@ nfsd4_decode_open_confirm(struct nfsd4_c { DECODE_HEAD; + open_conf->oc_stateowner = NFS4_STATE_NOT_LOCKED; READ_BUF(4 + sizeof(stateid_t)); READ32(open_conf->oc_req_stateid.si_generation); COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); @@ -759,6 +766,7 @@ nfsd4_decode_open_downgrade(struct nfsd4 { DECODE_HEAD; + open_down->od_stateowner = NFS4_STATE_NOT_LOCKED; READ_BUF(4 + sizeof(stateid_t)); READ32(open_down->od_stateid.si_generation); COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); @@ -1259,7 +1267,8 @@ nfsd4_decode_compound(struct nfsd4_compo */ #define ENCODE_SEQID_OP_TAIL(stateowner) do { \ - if (seqid_mutating_err(nfserr) && stateowner) { \ + if (seqid_mutating_err(nfserr) && stateowner \ + && (stateowner != NFS4_STATE_NOT_LOCKED)) { \ if (stateowner->so_confirmed) \ stateowner->so_seqid++; \ stateowner->so_replay.rp_status = nfserr; \ @@ -1267,7 +1276,10 @@ nfsd4_decode_compound(struct nfsd4_compo (((char *)(resp)->p - (char *)save)); \ memcpy(stateowner->so_replay.rp_buf, save, \ stateowner->so_replay.rp_buflen); \ - } } while(0) + } \ + if (stateowner != NFS4_STATE_NOT_LOCKED) \ + nfs4_unlock_state(); \ + } while (0); static u32 nfs4_ftypes[16] = { @@ -1917,7 +1929,7 @@ nfsd4_encode_open(struct nfsd4_compoundr ENCODE_SEQID_OP_HEAD; if (nfserr) - return; + goto out; RESERVE_SPACE(36 + sizeof(stateid_t)); WRITE32(open->op_stateid.si_generation); @@ -1972,7 +1984,7 @@ nfsd4_encode_open(struct nfsd4_compoundr BUG(); } /* XXX save filehandle here */ - +out: ENCODE_SEQID_OP_TAIL(open->op_stateowner); } @@ -2179,6 +2191,8 @@ nfsd4_encode_readdir(struct nfsd4_compou readdir->common.err == nfserr_toosmall && readdir->buffer == page) nfserr = nfserr_toosmall; + if (nfserr == nfserr_symlink) + nfserr = nfserr_notdir; if (nfserr) goto err_no_verf; @@ -2295,14 +2309,8 @@ nfsd4_encode_operation(struct nfsd4_comp RESERVE_SPACE(8); WRITE32(op->opnum); - if ((op->opnum != OP_SETATTR) && (op->opnum != OP_LOCK) && (op->opnum != OP_LOCKT) && (op->opnum != OP_SETCLIENTID) && (op->status)) { - *p++ = op->status; - ADJUST_ARGS(); - return; - } else { - statp = p++; /* to be backfilled at the end */ - ADJUST_ARGS(); - } + statp = p++; /* to be backfilled at the end */ + ADJUST_ARGS(); switch (op->opnum) { case OP_ACCESS: @@ -2406,6 +2414,8 @@ nfsd4_encode_operation(struct nfsd4_comp * * XDR note: do not encode rp->rp_buflen: the buffer contains the * previously sent already encoded operation. + * + * called with nfs4_lock_state() held */ void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) @@ -2423,6 +2433,7 @@ nfsd4_encode_replay(struct nfsd4_compoun RESERVE_SPACE(rp->rp_buflen); WRITEMEM(rp->rp_buf, rp->rp_buflen); ADJUST_ARGS(); + nfs4_unlock_state(); } /* --- linux-2.6.6-rc1/fs/nfs/file.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/nfs/file.c 2004-04-18 22:25:46.849708240 -0700 @@ -154,6 +154,11 @@ nfs_file_read(struct kiocb *iocb, char * struct inode * inode = dentry->d_inode; ssize_t result; +#ifdef CONFIG_NFS_DIRECTIO + if (iocb->ki_filp->f_flags & O_DIRECT) + return nfs_file_direct_read(iocb, buf, count, pos); +#endif + dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); @@ -268,6 +273,11 @@ nfs_file_write(struct kiocb *iocb, const struct inode * inode = dentry->d_inode; ssize_t result; +#ifdef CONFIG_NFS_DIRECTIO + if (iocb->ki_filp->f_flags & O_DIRECT) + return nfs_file_direct_write(iocb, buf, count, pos); +#endif + dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, inode->i_ino, (unsigned long) count, (unsigned long) pos); @@ -275,6 +285,7 @@ nfs_file_write(struct kiocb *iocb, const result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; + result = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (result) goto out; --- linux-2.6.6-rc1/fs/nfs/nfsroot.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/nfs/nfsroot.c 2004-04-18 22:26:01.143535248 -0700 @@ -124,7 +124,7 @@ enum { Opt_broken_suid, Opt_err, }; -static match_table_t tokens = { +static match_table_t __initdata tokens = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, --- linux-2.6.6-rc1/fs/nfs/read.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/nfs/read.c 2004-04-18 22:25:46.164812360 -0700 @@ -43,7 +43,7 @@ static mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) -static __inline__ struct nfs_read_data *nfs_readdata_alloc(void) +static struct nfs_read_data *nfs_readdata_alloc(void) { struct nfs_read_data *p; p = (struct nfs_read_data *)mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); @@ -99,21 +99,19 @@ nfs_readpage_sync(struct file *file, str unsigned int rsize = NFS_SERVER(inode)->rsize; unsigned int count = PAGE_CACHE_SIZE; int result; - struct nfs_read_data rdata = { - .flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0), - .cred = NULL, - .inode = inode, - .args = { - .fh = NFS_FH(inode), - .lockowner = current->files, - .pages = &page, - .pgbase = 0UL, - .count = rsize, - }, - .res = { - .fattr = &rdata.fattr, - } - }; + struct nfs_read_data *rdata; + + rdata = nfs_readdata_alloc(); + if (!rdata) + return -ENOMEM; + + rdata->flags = (IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0); + rdata->inode = inode; + rdata->args.fh = NFS_FH(inode); + rdata->args.pages = &page; + rdata->args.count = rsize; + rdata->args.lockowner = current->files; + rdata->res.fattr = &rdata->fattr; dprintk("NFS: nfs_readpage_sync(%p)\n", page); @@ -123,19 +121,19 @@ nfs_readpage_sync(struct file *file, str */ do { if (count < rsize) - rdata.args.count = count; - rdata.res.count = rdata.args.count; - rdata.args.offset = page_offset(page) + rdata.args.pgbase; + rdata->args.count = count; + rdata->res.count = rdata->args.count; + rdata->args.offset = page_offset(page) + rdata->args.pgbase; dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", NFS_SERVER(inode)->hostname, inode->i_sb->s_id, (long long)NFS_FILEID(inode), - (unsigned long long)rdata.args.pgbase, - rdata.args.count); + (unsigned long long)rdata->args.pgbase, + rdata->args.count); lock_kernel(); - result = NFS_PROTO(inode)->read(&rdata, file); + result = NFS_PROTO(inode)->read(rdata, file); unlock_kernel(); /* @@ -148,17 +146,17 @@ nfs_readpage_sync(struct file *file, str goto io_error; } count -= result; - rdata.args.pgbase += result; + rdata->args.pgbase += result; /* Note: result == 0 should only happen if we're caching * a write that extends the file and punches a hole. */ - if (rdata.res.eof != 0 || result == 0) + if (rdata->res.eof != 0 || result == 0) break; } while (count); NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; if (count) - memclear_highpage_flush(page, rdata.args.pgbase, count); + memclear_highpage_flush(page, rdata->args.pgbase, count); SetPageUptodate(page); if (PageError(page)) ClearPageError(page); @@ -166,6 +164,7 @@ nfs_readpage_sync(struct file *file, str io_error: unlock_page(page); + nfs_readdata_free(rdata); return result; } --- linux-2.6.6-rc1/fs/ntfs/aops.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/ntfs/aops.c 2004-04-18 22:25:24.947037952 -0700 @@ -1340,8 +1340,6 @@ err_out: void *kaddr; clear_buffer_new(bh); - if (buffer_uptodate(bh)) - buffer_error(); kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + block_start, 0, bh->b_size); kunmap_atomic(kaddr, KM_USER0); --- linux-2.6.6-rc1/fs/openpromfs/inode.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/openpromfs/inode.c 2004-04-18 22:25:24.948037800 -0700 @@ -1018,16 +1018,23 @@ static void openprom_read_inode(struct i } } +static int openprom_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_NOATIME; + return 0; +} + static struct super_operations openprom_sops = { .read_inode = openprom_read_inode, .statfs = simple_statfs, + .remount_fs = openprom_remount, }; static int openprom_fill_super(struct super_block *s, void *data, int silent) { struct inode * root_inode; - s->s_flags |= MS_NODIRATIME; + s->s_flags |= MS_NOATIME; s->s_blocksize = 1024; s->s_blocksize_bits = 10; s->s_magic = OPENPROM_SUPER_MAGIC; --- linux-2.6.6-rc1/fs/proc/generic.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/proc/generic.c 2004-04-18 22:25:56.929175928 -0700 @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -275,24 +277,46 @@ static int xlate_proc_name(const char *n return 0; } -static unsigned long proc_alloc_map[(PROC_NDYNAMIC + BITS_PER_LONG - 1) / BITS_PER_LONG]; +static DEFINE_IDR(proc_inum_idr); +static spinlock_t proc_inum_lock = SPIN_LOCK_UNLOCKED; /* protects the above */ -spinlock_t proc_alloc_map_lock = SPIN_LOCK_UNLOCKED; +#define PROC_DYNAMIC_FIRST 0xF0000000UL -static int make_inode_number(void) +/* + * Return an inode number between PROC_DYNAMIC_FIRST and + * 0xffffffff, or zero on failure. + */ +static unsigned int get_inode_number(void) { - int i; - spin_lock(&proc_alloc_map_lock); - i = find_first_zero_bit(proc_alloc_map, PROC_NDYNAMIC); - if (i < 0 || i >= PROC_NDYNAMIC) { - i = -1; - goto out; - } - set_bit(i, proc_alloc_map); - i += PROC_DYNAMIC_FIRST; -out: - spin_unlock(&proc_alloc_map_lock); - return i; + unsigned int i, inum = 0; + +retry: + if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) + return 0; + + spin_lock(&proc_inum_lock); + i = idr_get_new(&proc_inum_idr, NULL); + spin_unlock(&proc_inum_lock); + + if (i == -1) + goto retry; + + inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; + + /* inum will never be more than 0xf0ffffff, so no check + * for overflow. + */ + + return inum; +} + +static void release_inode_number(unsigned int inum) +{ + int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; + + spin_lock(&proc_inum_lock); + idr_remove(&proc_inum_idr, id); + spin_unlock(&proc_inum_lock); } static int @@ -346,7 +370,8 @@ struct dentry *proc_lookup(struct inode if (de->namelen != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { - int ino = de->low_ino; + unsigned int ino = de->low_ino; + error = -EINVAL; inode = proc_get_inode(dir->i_sb, ino, de); break; @@ -452,10 +477,10 @@ static struct inode_operations proc_dir_ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { - int i; + unsigned int i; - i = make_inode_number(); - if (i < 0) + i = get_inode_number(); + if (i == 0) return -EAGAIN; dp->low_ino = i; dp->next = dir->subdir; @@ -621,11 +646,13 @@ struct proc_dir_entry *create_proc_entry void free_proc_entry(struct proc_dir_entry *de) { - int ino = de->low_ino; + unsigned int ino = de->low_ino; - if (ino < PROC_DYNAMIC_FIRST || - ino >= PROC_DYNAMIC_FIRST+PROC_NDYNAMIC) + if (ino < PROC_DYNAMIC_FIRST) return; + + release_inode_number(ino); + if (S_ISLNK(de->mode) && de->data) kfree(de->data); kfree(de); @@ -653,8 +680,6 @@ void remove_proc_entry(const char *name, de->next = NULL; if (S_ISDIR(de->mode)) parent->nlink--; - clear_bit(de->low_ino - PROC_DYNAMIC_FIRST, - proc_alloc_map); proc_kill_inodes(de); de->nlink = 0; WARN_ON(de->subdir); --- linux-2.6.6-rc1/fs/proc/inode-alloc.txt 2003-06-14 12:18:07.000000000 -0700 +++ 25/fs/proc/inode-alloc.txt 2004-04-18 22:25:56.786197664 -0700 @@ -4,9 +4,10 @@ Current inode allocations in the proc-fs 00000001-00000fff static entries (goners) 001 root-ino - 00001000-00001fff dynamic entries + 00001000-00001fff unused 0001xxxx-7fffxxxx pid-dir entries for pid 1-7fff - 80000000-ffffffff unused + 80000000-efffffff unused + f0000000-ffffffff dynamic entries Goal: a) once we'll split the thing into several virtual filesystems we --- linux-2.6.6-rc1/fs/proc/inode.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/proc/inode.c 2004-04-18 22:25:56.787197512 -0700 @@ -127,6 +127,12 @@ int __init proc_init_inodecache(void) return 0; } +static int proc_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_NODIRATIME; + return 0; +} + static struct super_operations proc_sops = { .alloc_inode = proc_alloc_inode, .destroy_inode = proc_destroy_inode, @@ -134,6 +140,7 @@ static struct super_operations proc_sops .drop_inode = generic_delete_inode, .delete_inode = proc_delete_inode, .statfs = simple_statfs, + .remount_fs = proc_remount, }; enum { @@ -181,8 +188,8 @@ static int parse_options(char *options,u return 1; } -struct inode * proc_get_inode(struct super_block * sb, int ino, - struct proc_dir_entry * de) +struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, + struct proc_dir_entry *de) { struct inode * inode; @@ -190,11 +197,8 @@ struct inode * proc_get_inode(struct sup * Increment the use count so the dir entry can't disappear. */ de_get(de); -#if 1 -/* shouldn't ever happen */ -if (de && de->deleted) -printk("proc_iget: using deleted entry %s, count=%d\n", de->name, atomic_read(&de->count)); -#endif + + WARN_ON(de && de->deleted); inode = iget(sb, ino); if (!inode) --- linux-2.6.6-rc1/fs/proc/kcore.c 2003-09-27 18:57:46.000000000 -0700 +++ 25/fs/proc/kcore.c 2004-04-18 22:25:35.903372336 -0700 @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -84,8 +85,6 @@ kclist_del(void *addr) return 0; } -extern char saved_command_line[]; - static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) { size_t try, size; --- linux-2.6.6-rc1/fs/proc/proc_misc.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/proc/proc_misc.c 2004-04-18 22:26:02.146382792 -0700 @@ -47,7 +47,6 @@ #include #include #include -#include #include #include @@ -286,6 +285,10 @@ static struct file_operations proc_vmsta .release = seq_release, }; +#ifdef CONFIG_SCHEDSTATS +extern struct file_operations proc_schedstat_operations; +#endif + #ifdef CONFIG_PROC_HARDWARE static int hardware_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -522,7 +525,6 @@ static int filesystems_read_proc(char *p static int cmdline_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - extern char saved_command_line[]; int len; len = sprintf(page, "%s\n", saved_command_line); @@ -651,6 +653,36 @@ static void create_seq_entry(char *name, entry->proc_fops = f; } +#ifdef CONFIG_LOCKMETER +extern ssize_t get_lockmeter_info(char *, size_t, loff_t *); +extern ssize_t put_lockmeter_info(const char *, size_t); +extern int get_lockmeter_info_size(void); + +/* + * This function accesses lock metering information. + */ +static ssize_t read_lockmeter(struct file *file, char *buf, + size_t count, loff_t *ppos) +{ + return get_lockmeter_info(buf, count, ppos); +} + +/* + * Writing to /proc/lockmeter resets the counters + */ +static ssize_t write_lockmeter(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + return put_lockmeter_info(buf, count); +} + +static struct file_operations proc_lockmeter_operations = { + NULL, /* lseek */ + read: read_lockmeter, + write: write_lockmeter, +}; +#endif /* CONFIG_LOCKMETER */ + void __init proc_misc_init(void) { struct proc_dir_entry *entry; @@ -698,6 +730,9 @@ void __init proc_misc_init(void) #ifdef CONFIG_MODULES create_seq_entry("modules", 0, &proc_modules_operations); #endif +#ifdef CONFIG_SCHEDSTATS + create_seq_entry("schedstat", 0, &proc_schedstat_operations); +#endif #ifdef CONFIG_PROC_KCORE proc_root_kcore = create_proc_entry("kcore", S_IRUSR, NULL); if (proc_root_kcore) { @@ -718,6 +753,13 @@ void __init proc_misc_init(void) if (entry) entry->proc_fops = &proc_sysrq_trigger_operations; #endif +#ifdef CONFIG_LOCKMETER + entry = create_proc_entry("lockmeter", S_IWUSR | S_IRUGO, NULL); + if (entry) { + entry->proc_fops = &proc_lockmeter_operations; + entry->size = get_lockmeter_info_size(); + } +#endif #ifdef CONFIG_PPC32 { extern struct file_operations ppc_htab_operations; --- linux-2.6.6-rc1/fs/qnx4/inode.c 2003-06-22 12:04:44.000000000 -0700 +++ 25/fs/qnx4/inode.c 2004-04-18 22:25:24.949037648 -0700 @@ -149,9 +149,13 @@ static int qnx4_remount(struct super_blo qs = qnx4_sb(sb); qs->Version = QNX4_VERSION; +#ifndef CONFIG_QNX4FS_RW + *flags |= MS_RDONLY; +#endif if (*flags & MS_RDONLY) { return 0; } + mark_buffer_dirty(qs->sb_buf); return 0; --- linux-2.6.6-rc1/fs/quota_v1.c 2003-10-08 15:07:10.000000000 -0700 +++ 25/fs/quota_v1.c 2004-04-18 22:25:24.950037496 -0700 @@ -60,7 +60,7 @@ static int v1_read_dqblk(struct dquot *d v1_disk2mem_dqblk(&dquot->dq_dqb, &dqblk); if (dquot->dq_dqb.dqb_bhardlimit == 0 && dquot->dq_dqb.dqb_bsoftlimit == 0 && dquot->dq_dqb.dqb_ihardlimit == 0 && dquot->dq_dqb.dqb_isoftlimit == 0) - dquot->dq_flags |= DQ_FAKE; + set_bit(DQ_FAKE_B, &dquot->dq_flags); dqstats.reads++; return 0; @@ -80,12 +80,7 @@ static int v1_commit_dqblk(struct dquot fs = get_fs(); set_fs(KERNEL_DS); - /* - * Note: clear the DQ_MOD flag unconditionally, - * so we don't loop forever on failure. - */ v1_mem2disk_dqblk(&dqblk, &dquot->dq_dqb); - dquot->dq_flags &= ~DQ_MOD; if (dquot->dq_id == 0) { dqblk.dqb_btime = sb_dqopt(dquot->dq_sb)->info[type].dqi_bgrace; dqblk.dqb_itime = sb_dqopt(dquot->dq_sb)->info[type].dqi_igrace; --- linux-2.6.6-rc1/fs/quota_v2.c 2003-06-14 12:18:30.000000000 -0700 +++ 25/fs/quota_v2.c 2004-04-18 22:25:24.952037192 -0700 @@ -65,7 +65,7 @@ static int v2_read_file_info(struct supe set_fs(fs); if (size != sizeof(struct v2_disk_dqinfo)) { printk(KERN_WARNING "Can't read info structure on device %s.\n", - f->f_vfsmnt->mnt_sb->s_id); + f->f_dentry->d_sb->s_id); return -1; } info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); @@ -87,10 +87,12 @@ static int v2_write_file_info(struct sup ssize_t size; loff_t offset = V2_DQINFOOFF; + spin_lock(&dq_data_lock); info->dqi_flags &= ~DQF_INFO_DIRTY; dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); + spin_unlock(&dq_data_lock); dinfo.dqi_blocks = cpu_to_le32(info->u.v2_i.dqi_blocks); dinfo.dqi_free_blk = cpu_to_le32(info->u.v2_i.dqi_free_blk); dinfo.dqi_free_entry = cpu_to_le32(info->u.v2_i.dqi_free_entry); @@ -100,7 +102,7 @@ static int v2_write_file_info(struct sup set_fs(fs); if (size != sizeof(struct v2_disk_dqinfo)) { printk(KERN_WARNING "Can't write info structure on device %s.\n", - f->f_vfsmnt->mnt_sb->s_id); + f->f_dentry->d_sb->s_id); return -1; } return 0; @@ -173,9 +175,10 @@ static ssize_t write_blk(struct file *fi } /* Remove empty block from list and return it */ -static int get_free_dqblk(struct file *filp, struct mem_dqinfo *info) +static int get_free_dqblk(struct file *filp, int type) { dqbuf_t buf = getdqbuf(); + struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int ret, blk; @@ -193,7 +196,7 @@ static int get_free_dqblk(struct file *f goto out_buf; blk = info->u.v2_i.dqi_blocks++; } - mark_info_dirty(info); + mark_info_dirty(filp->f_dentry->d_sb, type); ret = blk; out_buf: freedqbuf(buf); @@ -201,8 +204,9 @@ out_buf: } /* Insert empty block to the list */ -static int put_free_dqblk(struct file *filp, struct mem_dqinfo *info, dqbuf_t buf, uint blk) +static int put_free_dqblk(struct file *filp, int type, dqbuf_t buf, uint blk) { + struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int err; @@ -210,16 +214,17 @@ static int put_free_dqblk(struct file *f dh->dqdh_prev_free = cpu_to_le32(0); dh->dqdh_entries = cpu_to_le16(0); info->u.v2_i.dqi_free_blk = blk; - mark_info_dirty(info); + mark_info_dirty(filp->f_dentry->d_sb, type); if ((err = write_blk(filp, blk, buf)) < 0) /* Some strange block. We had better leave it... */ return err; return 0; } /* Remove given block from the list of blocks with free entries */ -static int remove_free_dqentry(struct file *filp, struct mem_dqinfo *info, dqbuf_t buf, uint blk) +static int remove_free_dqentry(struct file *filp, int type, dqbuf_t buf, uint blk) { dqbuf_t tmpbuf = getdqbuf(); + struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; uint nextblk = le32_to_cpu(dh->dqdh_next_free), prevblk = le32_to_cpu(dh->dqdh_prev_free); int err; @@ -242,7 +247,7 @@ static int remove_free_dqentry(struct fi } else { info->u.v2_i.dqi_free_entry = nextblk; - mark_info_dirty(info); + mark_info_dirty(filp->f_dentry->d_sb, type); } freedqbuf(tmpbuf); dh->dqdh_next_free = dh->dqdh_prev_free = cpu_to_le32(0); @@ -255,9 +260,10 @@ out_buf: } /* Insert given block to the beginning of list with free entries */ -static int insert_free_dqentry(struct file *filp, struct mem_dqinfo *info, dqbuf_t buf, uint blk) +static int insert_free_dqentry(struct file *filp, int type, dqbuf_t buf, uint blk) { dqbuf_t tmpbuf = getdqbuf(); + struct mem_dqinfo *info = sb_dqinfo(filp->f_dentry->d_sb, type); struct v2_disk_dqdbheader *dh = (struct v2_disk_dqdbheader *)buf; int err; @@ -276,7 +282,7 @@ static int insert_free_dqentry(struct fi } freedqbuf(tmpbuf); info->u.v2_i.dqi_free_entry = blk; - mark_info_dirty(info); + mark_info_dirty(filp->f_dentry->d_sb, type); return 0; out_buf: freedqbuf(tmpbuf); @@ -307,7 +313,7 @@ static uint find_free_dqentry(struct dqu goto out_buf; } else { - blk = get_free_dqblk(filp, info); + blk = get_free_dqblk(filp, dquot->dq_type); if ((int)blk < 0) { *err = blk; freedqbuf(buf); @@ -315,10 +321,10 @@ static uint find_free_dqentry(struct dqu } memset(buf, 0, V2_DQBLKSIZE); info->u.v2_i.dqi_free_entry = blk; /* This is enough as block is already zeroed and entry list is empty... */ - mark_info_dirty(info); + mark_info_dirty(dquot->dq_sb, dquot->dq_type); } if (le16_to_cpu(dh->dqdh_entries)+1 >= V2_DQSTRINBLK) /* Block will be full? */ - if ((*err = remove_free_dqentry(filp, info, buf, blk)) < 0) { + if ((*err = remove_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0) { printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk); goto out_buf; } @@ -349,7 +355,6 @@ out_buf: static int do_insert_tree(struct dquot *dquot, uint *treeblk, int depth) { struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; - struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info + dquot->dq_type; dqbuf_t buf; int ret = 0, newson = 0, newact = 0; u32 *ref; @@ -358,7 +363,7 @@ static int do_insert_tree(struct dquot * if (!(buf = getdqbuf())) return -ENOMEM; if (!*treeblk) { - ret = get_free_dqblk(filp, info); + ret = get_free_dqblk(filp, dquot->dq_type); if (ret < 0) goto out_buf; *treeblk = ret; @@ -392,7 +397,7 @@ static int do_insert_tree(struct dquot * ret = write_blk(filp, *treeblk, buf); } else if (newact && ret < 0) - put_free_dqblk(filp, info, buf, *treeblk); + put_free_dqblk(filp, dquot->dq_type, buf, *treeblk); out_buf: freedqbuf(buf); return ret; @@ -417,6 +422,7 @@ static int v2_write_dquot(struct dquot * ssize_t ret; struct v2_disk_dqblk ddquot; + /* dq_off is guarded by dqio_sem */ if (!dquot->dq_off) if ((ret = dq_insert_tree(dquot)) < 0) { printk(KERN_ERR "VFS: Error %Zd occurred while creating quota.\n", ret); @@ -424,7 +430,9 @@ static int v2_write_dquot(struct dquot * } filp = sb_dqopt(dquot->dq_sb)->files[type]; offset = dquot->dq_off; + spin_lock(&dq_data_lock); mem2diskdqb(&ddquot, &dquot->dq_dqb, dquot->dq_id); + spin_unlock(&dq_data_lock); fs = get_fs(); set_fs(KERNEL_DS); ret = filp->f_op->write(filp, (char *)&ddquot, sizeof(struct v2_disk_dqblk), &offset); @@ -445,7 +453,6 @@ static int v2_write_dquot(struct dquot * static int free_dqentry(struct dquot *dquot, uint blk) { struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; - struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info + dquot->dq_type; struct v2_disk_dqdbheader *dh; dqbuf_t buf = getdqbuf(); int ret = 0; @@ -463,8 +470,8 @@ static int free_dqentry(struct dquot *dq dh = (struct v2_disk_dqdbheader *)buf; dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1); if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */ - if ((ret = remove_free_dqentry(filp, info, buf, blk)) < 0 || - (ret = put_free_dqblk(filp, info, buf, blk)) < 0) { + if ((ret = remove_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0 || + (ret = put_free_dqblk(filp, dquot->dq_type, buf, blk)) < 0) { printk(KERN_ERR "VFS: Can't move quota data block (%u) to free list.\n", blk); goto out_buf; } @@ -473,7 +480,7 @@ static int free_dqentry(struct dquot *dq memset(buf+(dquot->dq_off & ((1 << V2_DQBLKSIZE_BITS)-1)), 0, sizeof(struct v2_disk_dqblk)); if (le16_to_cpu(dh->dqdh_entries) == V2_DQSTRINBLK-1) { /* Insert will write block itself */ - if ((ret = insert_free_dqentry(filp, info, buf, blk)) < 0) { + if ((ret = insert_free_dqentry(filp, dquot->dq_type, buf, blk)) < 0) { printk(KERN_ERR "VFS: Can't insert quota data block (%u) to free entry list.\n", blk); goto out_buf; } @@ -494,7 +501,6 @@ out_buf: static int remove_tree(struct dquot *dquot, uint *blk, int depth) { struct file *filp = sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; - struct mem_dqinfo *info = sb_dqopt(dquot->dq_sb)->info + dquot->dq_type; dqbuf_t buf = getdqbuf(); int ret = 0; uint newblk; @@ -518,7 +524,7 @@ static int remove_tree(struct dquot *dqu ref[GETIDINDEX(dquot->dq_id, depth)] = cpu_to_le32(0); for (i = 0; i < V2_DQBLKSIZE && !buf[i]; i++); /* Block got empty? */ if (i == V2_DQBLKSIZE) { - put_free_dqblk(filp, info, buf, *blk); + put_free_dqblk(filp, dquot->dq_type, buf, *blk); *blk = 0; } else @@ -632,7 +638,7 @@ static int v2_read_dquot(struct dquot *d if (offset < 0) printk(KERN_ERR "VFS: Can't read quota structure for id %u.\n", dquot->dq_id); dquot->dq_off = 0; - dquot->dq_flags |= DQ_FAKE; + set_bit(DQ_FAKE_B, &dquot->dq_flags); memset(&dquot->dq_dqb, 0, sizeof(struct mem_dqblk)); ret = offset; } @@ -650,21 +656,24 @@ static int v2_read_dquot(struct dquot *d ret = 0; set_fs(fs); disk2memdqb(&dquot->dq_dqb, &ddquot); + if (!dquot->dq_dqb.dqb_bhardlimit && + !dquot->dq_dqb.dqb_bsoftlimit && + !dquot->dq_dqb.dqb_ihardlimit && + !dquot->dq_dqb.dqb_isoftlimit) + set_bit(DQ_FAKE_B, &dquot->dq_flags); } dqstats.reads++; return ret; } -/* Commit changes of dquot to disk - it might also mean deleting it when quota became fake one and user has no blocks... */ -static int v2_commit_dquot(struct dquot *dquot) +/* Check whether dquot should not be deleted. We know we are + * the only one operating on dquot (thanks to dq_lock) */ +static int v2_release_dquot(struct dquot *dquot) { - /* We clear the flag everytime so we don't loop when there was an IO error... */ - dquot->dq_flags &= ~DQ_MOD; - if (dquot->dq_flags & DQ_FAKE && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace)) + if (test_bit(DQ_FAKE_B, &dquot->dq_flags) && !(dquot->dq_dqb.dqb_curinodes | dquot->dq_dqb.dqb_curspace)) return v2_delete_dquot(dquot); - else - return v2_write_dquot(dquot); + return 0; } static struct quota_format_ops v2_format_ops = { @@ -673,7 +682,8 @@ static struct quota_format_ops v2_format .write_file_info = v2_write_file_info, .free_file_info = NULL, .read_dqblk = v2_read_dquot, - .commit_dqblk = v2_commit_dquot, + .commit_dqblk = v2_write_dquot, + .release_dqblk = v2_release_dquot, }; static struct quota_format_type v2_quota_format = { --- linux-2.6.6-rc1/fs/read_write.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/read_write.c 2004-04-18 22:25:24.952037192 -0700 @@ -635,7 +635,7 @@ asmlinkage ssize_t sys_sendfile(int out_ return ret; } - return do_sendfile(out_fd, in_fd, NULL, count, MAX_NON_LFS); + return do_sendfile(out_fd, in_fd, NULL, count, 0); } asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) --- linux-2.6.6-rc1/fs/reiserfs/inode.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/reiserfs/inode.c 2004-04-18 22:25:57.352111632 -0700 @@ -1925,7 +1925,6 @@ static int map_block_for_writepage(struc th.t_trans_id = 0; if (!buffer_uptodate(bh_result)) { - buffer_error(); return -EIO; } @@ -2057,8 +2056,6 @@ static int reiserfs_write_full_page(stru * in the BH_Uptodate is just a sanity check. */ if (!page_has_buffers(page)) { - if (!PageUptodate(page)) - buffer_error(); create_empty_buffers(page, inode->i_sb->s_blocksize, (1 << BH_Dirty) | (1 << BH_Uptodate)); } @@ -2120,8 +2117,6 @@ static int reiserfs_write_full_page(stru } } if (test_clear_buffer_dirty(bh)) { - if (!buffer_uptodate(bh)) - buffer_error(); mark_buffer_async_write(bh); } else { unlock_buffer(bh); @@ -2503,14 +2498,14 @@ static int reiserfs_releasepage(struct p /* We thank Mingming Cao for helping us understand in great detail what to do in this section of the code. */ -static int reiserfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, unsigned long nr_segs) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, reiserfs_get_blocks_direct_io, NULL); + offset, nr_segs, reiserfs_get_blocks_direct_io, NULL); } --- linux-2.6.6-rc1/fs/reiserfs/journal.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/reiserfs/journal.c 2004-04-18 22:25:57.944021648 -0700 @@ -86,6 +86,7 @@ static struct workqueue_struct *commit_w /* journal list state bits */ #define LIST_TOUCHED 1 #define LIST_DIRTY 2 +#define LIST_COMMIT_PENDING 4 /* someone will commit this list */ /* flags for do_journal_end */ #define FLUSH_ALL 1 /* flush commit and real blocks */ @@ -2303,13 +2304,8 @@ int journal_init(struct super_block *p_s SB_JOURNAL_TRANS_MAX(p_s_sb) = le32_to_cpu (jh->jh_journal.jp_journal_trans_max); SB_JOURNAL_MAX_BATCH(p_s_sb) = le32_to_cpu (jh->jh_journal.jp_journal_max_batch); - if (commit_max_age != 0) { - SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb) = commit_max_age; - SB_JOURNAL_MAX_TRANS_AGE(p_s_sb) = commit_max_age; - } else { - SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb) = le32_to_cpu (jh->jh_journal.jp_journal_max_commit_age); - SB_JOURNAL_MAX_TRANS_AGE(p_s_sb) = JOURNAL_MAX_TRANS_AGE; - } + SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb) = le32_to_cpu (jh->jh_journal.jp_journal_max_commit_age); + SB_JOURNAL_MAX_TRANS_AGE(p_s_sb) = JOURNAL_MAX_TRANS_AGE; if (SB_JOURNAL_TRANS_MAX(p_s_sb)) { /* make sure these parameters are available, assign it if they are not */ @@ -2348,6 +2344,14 @@ int journal_init(struct super_block *p_s SB_JOURNAL_MAX_BATCH(p_s_sb) = (SB_JOURNAL_TRANS_MAX(p_s_sb)) * 9 / 10 ; } } + + SB_JOURNAL_DEFAULT_MAX_COMMIT_AGE(p_s_sb) = SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb); + + if (commit_max_age != 0) { + SB_JOURNAL_MAX_COMMIT_AGE(p_s_sb) = commit_max_age; + SB_JOURNAL_MAX_TRANS_AGE(p_s_sb) = commit_max_age; + } + printk ("Reiserfs journal params: device %s, size %u, " "journal first block %u, max trans len %u, max batch %u, " "max commit age %u, max trans age %u\n", @@ -2462,8 +2466,20 @@ void reiserfs_wait_on_write_block(struct } static void queue_log_writer(struct super_block *s) { + wait_queue_t wait; set_bit(WRITERS_QUEUED, &SB_JOURNAL(s)->j_state); - sleep_on(&SB_JOURNAL(s)->j_join_wait); + + /* + * we don't want to use wait_event here because + * we only want to wait once. + */ + init_waitqueue_entry(&wait, current); + add_wait_queue(&SB_JOURNAL(s)->j_join_wait, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + if (test_bit(WRITERS_QUEUED, &SB_JOURNAL(s)->j_state)) + schedule(); + current->state = TASK_RUNNING; + remove_wait_queue(&SB_JOURNAL(s)->j_join_wait, &wait); } static void wake_queued_writers(struct super_block *s) { @@ -2476,7 +2492,9 @@ static void let_transaction_grow(struct { unsigned long bcount = SB_JOURNAL(sb)->j_bcount; while(1) { - yield(); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + SB_JOURNAL(sb)->j_current_jl->j_state |= LIST_COMMIT_PENDING; while ((atomic_read(&SB_JOURNAL(sb)->j_wcount) > 0 || atomic_read(&SB_JOURNAL(sb)->j_jlock)) && SB_JOURNAL(sb)->j_trans_id == trans_id) { @@ -2909,9 +2927,15 @@ static void flush_async_commits(void *p) flush_commit_list(p_s_sb, jl, 1); } unlock_kernel(); - atomic_inc(&SB_JOURNAL(p_s_sb)->j_async_throttle); - filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); - atomic_dec(&SB_JOURNAL(p_s_sb)->j_async_throttle); + /* + * this is a little racey, but there's no harm in missing + * the filemap_fdata_write + */ + if (!atomic_read(&SB_JOURNAL(p_s_sb)->j_async_throttle)) { + atomic_inc(&SB_JOURNAL(p_s_sb)->j_async_throttle); + filemap_fdatawrite(p_s_sb->s_bdev->bd_inode->i_mapping); + atomic_dec(&SB_JOURNAL(p_s_sb)->j_async_throttle); + } } /* @@ -3000,7 +3024,8 @@ static int check_journal_end(struct reis jl = SB_JOURNAL(p_s_sb)->j_current_jl; trans_id = jl->j_trans_id; - + if (wait_on_commit) + jl->j_state |= LIST_COMMIT_PENDING; atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 1) ; if (flush) { SB_JOURNAL(p_s_sb)->j_next_full_flush = 1 ; @@ -3522,8 +3547,8 @@ static int do_journal_end(struct reiserf if (flush) { flush_commit_list(p_s_sb, jl, 1) ; flush_journal_list(p_s_sb, jl, 1) ; - } else - queue_work(commit_wq, &SB_JOURNAL(p_s_sb)->j_work); + } else if (!(jl->j_state & LIST_COMMIT_PENDING)) + queue_delayed_work(commit_wq, &SB_JOURNAL(p_s_sb)->j_work, HZ/10); /* if the next transaction has any chance of wrapping, flush --- linux-2.6.6-rc1/fs/reiserfs/super.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/reiserfs/super.c 2004-04-18 22:25:24.959036128 -0700 @@ -709,13 +709,11 @@ static int reiserfs_parse_options (struc char *p = 0; int val = simple_strtoul (arg, &p, 0); /* commit=NNN (time in seconds) */ - if ( *p != '\0' || val == 0) { + if ( *p != '\0' || val < 0) { printk ("reiserfs_parse_options: bad value %s\n", arg); return 0; } - if ( val > 0 ) { - *commit_max_age = val; - } + *commit_max_age = val; } if ( c == 'w' ) { @@ -821,8 +819,14 @@ static int reiserfs_remount (struct supe REISERFS_SB(s)->s_mount_opt = (REISERFS_SB(s)->s_mount_opt & ~safe_mask) | (mount_options & safe_mask); if(commit_max_age != 0) { - SB_JOURNAL_MAX_COMMIT_AGE(s) = commit_max_age; - SB_JOURNAL_MAX_TRANS_AGE(s) = commit_max_age; + SB_JOURNAL_MAX_COMMIT_AGE(s) = commit_max_age; + SB_JOURNAL_MAX_TRANS_AGE(s) = commit_max_age; + } + else + { + /* 0 means restore defaults. */ + SB_JOURNAL_MAX_COMMIT_AGE(s) = SB_JOURNAL_DEFAULT_MAX_COMMIT_AGE(s); + SB_JOURNAL_MAX_TRANS_AGE(s) = JOURNAL_MAX_TRANS_AGE; } if(blocks) { --- linux-2.6.6-rc1/fs/romfs/inode.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/romfs/inode.c 2004-04-18 22:25:24.960035976 -0700 @@ -592,11 +592,18 @@ static void destroy_inodecache(void) printk(KERN_INFO "romfs_inode_cache: not all structures were freed\n"); } +static int romfs_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_RDONLY; + return 0; +} + static struct super_operations romfs_ops = { .alloc_inode = romfs_alloc_inode, .destroy_inode = romfs_destroy_inode, .read_inode = romfs_read_inode, .statfs = romfs_statfs, + .remount_fs = romfs_remount, }; static struct super_block *romfs_get_sb(struct file_system_type *fs_type, --- linux-2.6.6-rc1/fs/select.c 2003-10-08 15:07:10.000000000 -0700 +++ 25/fs/select.c 2004-04-18 22:25:43.440226560 -0700 @@ -291,8 +291,6 @@ static void select_bits_free(void *bits, * Update: ERESTARTSYS breaks at least the xview clock binary, so * I'm trying ERESTARTNOHAND which restart only when you want to. */ -#define MAX_SELECT_SECONDS \ - ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) @@ -315,9 +313,11 @@ sys_select(int n, fd_set __user *inp, fd if (sec < 0 || usec < 0) goto out_nofds; - if ((unsigned long) sec < MAX_SELECT_SECONDS) { + if ((unsigned long) sec < (MAX_SCHEDULE_TIMEOUT-1) / HZ - 1) { timeout = ROUND_UP(usec, 1000000/HZ); timeout += sec * (unsigned long) HZ; + } else { + timeout = MAX_SCHEDULE_TIMEOUT-1; } } @@ -469,11 +469,17 @@ asmlinkage long sys_poll(struct pollfd _ return -EINVAL; if (timeout) { - /* Careful about overflow in the intermediate values */ - if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ) - timeout = (unsigned long)(timeout*HZ+999)/1000+1; - else /* Negative or overflow */ + if (timeout < 0) { timeout = MAX_SCHEDULE_TIMEOUT; + } else { + /* Careful about overflow in the intermediate values */ + long seconds = timeout/1000; + timeout = ((timeout - 1000*seconds)*HZ + 999)/1000 + 1; + if (seconds <= (MAX_SCHEDULE_TIMEOUT-2) / HZ - 1) + timeout += seconds*HZ; + else + timeout = MAX_SCHEDULE_TIMEOUT-1; + } } poll_initwait(&table); --- linux-2.6.6-rc1/fs/smbfs/inode.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/smbfs/inode.c 2004-04-18 22:25:24.961035824 -0700 @@ -93,6 +93,12 @@ static void destroy_inodecache(void) printk(KERN_INFO "smb_inode_cache: not all structures were freed\n"); } +static int smb_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_NODIRATIME; + return 0; +} + static struct super_operations smb_sops = { .alloc_inode = smb_alloc_inode, @@ -102,6 +108,7 @@ static struct super_operations smb_sops .put_super = smb_put_super, .statfs = smb_statfs, .show_options = smb_show_options, + .remount_fs = smb_remount, }; --- linux-2.6.6-rc1/fs/stat.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/stat.c 2004-04-18 22:25:24.961035824 -0700 @@ -398,6 +398,8 @@ EXPORT_SYMBOL(inode_get_bytes); void inode_set_bytes(struct inode *inode, loff_t bytes) { + /* Caller is here responsible for sufficient locking + * (ie. inode->i_lock) */ inode->i_blocks = bytes >> 9; inode->i_bytes = bytes & 511; } --- linux-2.6.6-rc1/fs/super.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/super.c 2004-04-18 22:26:00.419645296 -0700 @@ -68,6 +68,7 @@ static struct super_block *alloc_super(v INIT_LIST_HEAD(&s->s_files); INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); + INIT_LIST_HEAD(&s->s_inodes); init_rwsem(&s->s_umount); sema_init(&s->s_lock, 1); down_write(&s->s_umount); @@ -77,6 +78,7 @@ static struct super_block *alloc_super(v sema_init(&s->s_dquot.dqio_sem, 1); sema_init(&s->s_dquot.dqonoff_sem, 1); init_rwsem(&s->s_dquot.dqptr_sem); + init_waitqueue_head(&s->s_wait_unfrozen); s->s_maxbytes = MAX_NON_LFS; s->dq_op = sb_dquot_ops; s->s_qcop = sb_quotactl_ops; @@ -562,7 +564,9 @@ int set_anon_super(struct super_block *s spin_unlock(&unnamed_dev_lock); if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { + spin_lock(&unnamed_dev_lock); idr_remove(&unnamed_dev_idr, dev); + spin_unlock(&unnamed_dev_lock); return -EMFILE; } s->s_dev = MKDEV(0, dev & MINORMASK); @@ -621,7 +625,14 @@ struct super_block *get_sb_bdev(struct f if (IS_ERR(bdev)) return (struct super_block *)bdev; + /* + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting + */ + down(&bdev->bd_mount_sem); s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); + up(&bdev->bd_mount_sem); if (IS_ERR(s)) goto out; --- linux-2.6.6-rc1/fs/sysfs/bin.c 2003-11-09 16:45:05.000000000 -0800 +++ 25/fs/sysfs/bin.c 2004-04-18 22:25:54.534539968 -0700 @@ -94,7 +94,7 @@ static ssize_t write(struct file * file, static int open(struct inode * inode, struct file * file) { - struct kobject * kobj = kobject_get(file->f_dentry->d_parent->d_fsdata); + struct kobject *kobj = sysfs_get_kobject(file->f_dentry->d_parent); struct bin_attribute * attr = file->f_dentry->d_fsdata; int error = -EINVAL; --- linux-2.6.6-rc1/fs/sysfs/file.c 2003-11-09 16:45:05.000000000 -0800 +++ 25/fs/sysfs/file.c 2004-04-18 22:25:54.535539816 -0700 @@ -238,7 +238,7 @@ sysfs_write_file(struct file *file, cons static int check_perm(struct inode * inode, struct file * file) { - struct kobject * kobj = kobject_get(file->f_dentry->d_parent->d_fsdata); + struct kobject *kobj = sysfs_get_kobject(file->f_dentry->d_parent); struct attribute * attr = file->f_dentry->d_fsdata; struct sysfs_buffer * buffer; struct sysfs_ops * ops = NULL; --- linux-2.6.6-rc1/fs/sysfs/sysfs.h 2003-11-09 16:45:05.000000000 -0800 +++ 25/fs/sysfs/sysfs.h 2004-04-18 22:25:54.535539816 -0700 @@ -11,3 +11,16 @@ extern void sysfs_hash_and_remove(struct extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); extern void sysfs_remove_subdir(struct dentry *); + + +static inline struct kobject *sysfs_get_kobject(struct dentry *dentry) +{ + struct kobject * kobj = NULL; + + spin_lock(&dentry->d_lock); + if (!d_unhashed(dentry)) + kobj = kobject_get(dentry->d_fsdata); + spin_unlock(&dentry->d_lock); + + return kobj; +} --- linux-2.6.6-rc1/fs/sysv/inode.c 2003-09-08 13:58:59.000000000 -0700 +++ 25/fs/sysv/inode.c 2004-04-18 22:25:24.962035672 -0700 @@ -57,6 +57,16 @@ clean: unlock_kernel(); } +static int sysv_remount(struct super_block *sb, int *flags, char *data) +{ + struct sysv_sb_info *sbi = SYSV_SB(sb); + if (sbi->s_forced_ro) + *flags |= MS_RDONLY; + if (!(*flags & MS_RDONLY)) + sb->s_dirt = 1; + return 0; +} + static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); @@ -321,6 +331,7 @@ struct super_operations sysv_sops = { .delete_inode = sysv_delete_inode, .put_super = sysv_put_super, .write_super = sysv_write_super, + .remount_fs = sysv_remount, .statfs = sysv_statfs, }; --- linux-2.6.6-rc1/fs/sysv/super.c 2003-06-14 12:17:58.000000000 -0700 +++ 25/fs/sysv/super.c 2004-04-18 22:25:24.963035520 -0700 @@ -206,11 +206,11 @@ static int detect_sysv(struct sysv_sb_in if (fs16_to_cpu(sbi, sbd->s_nfree) == 0xffff) { sbi->s_type = FSTYPE_AFS; + sbi->s_forced_ro = 1; if (!(sb->s_flags & MS_RDONLY)) { printk("SysV FS: SCO EAFS on %s detected, " "forcing read-only mode.\n", sb->s_id); - sb->s_flags |= MS_RDONLY; } return sbd->s_type; } @@ -234,7 +234,7 @@ static int detect_sysv(struct sysv_sb_in if (sbd->s_type >= 0x10) { printk("SysV FS: can't handle long file names on %s, " "forcing read-only mode.\n", sb->s_id); - sb->s_flags |= MS_RDONLY; + sbi->s_forced_ro = 1; } sbi->s_type = FSTYPE_SYSV4; @@ -335,9 +335,10 @@ static int complete_read_super(struct su printk("SysV FS: get root dentry failed\n"); return 0; } + if (sbi->s_forced_ro) + sb->s_flags |= MS_RDONLY; if (sbi->s_truncate) sb->s_root->d_op = &sysv_dentry_operations; - sb->s_flags |= MS_RDONLY; sb->s_dirt = 1; return 1; } @@ -481,6 +482,7 @@ static int v7_fill_super(struct super_bl (fs32_to_cpu(sbi, v7i->i_size) & 017) != 0) goto failed; brelse(bh2); + bh2 = NULL; sbi->s_bh1 = bh; sbi->s_bh2 = bh; --- linux-2.6.6-rc1/fs/sysv/sysv.h 2003-06-14 12:18:22.000000000 -0700 +++ 25/fs/sysv/sysv.h 2004-04-18 22:25:24.963035520 -0700 @@ -54,6 +54,7 @@ struct sysv_sb_info { u32 s_ndatazones; /* total number of data zones */ u32 s_nzones; /* same as s_sbd->s_fsize */ u16 s_namelen; /* max length of dir entry */ + int s_forced_ro; }; /* --- linux-2.6.6-rc1/fs/udf/super.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/udf/super.c 2004-04-18 22:25:24.965035216 -0700 @@ -460,6 +460,12 @@ udf_remount_fs(struct super_block *sb, i UDF_SB(sb)->s_gid = uopt.gid; UDF_SB(sb)->s_umask = uopt.umask; + if (UDF_SB_LVIDBH(sb)) { + int write_rev = le16_to_cpu(UDF_SB_LVIDIU(sb)->minUDFWriteRev); + if (write_rev > UDF_MAX_WRITE_VERSION) + *flags |= MS_RDONLY; + } + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; if (*flags & MS_RDONLY) --- linux-2.6.6-rc1/fs/xfs/linux/xfs_aops.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/xfs/linux/xfs_aops.c 2004-04-18 22:25:57.353111480 -0700 @@ -1013,7 +1013,7 @@ linvfs_get_blocks_direct( create, 1, BMAPI_WRITE|BMAPI_DIRECT); } -STATIC int +STATIC ssize_t linvfs_direct_IO( int rw, struct kiocb *iocb, --- linux-2.6.6-rc1/fs/xfs/linux/xfs_buf.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/fs/xfs/linux/xfs_buf.c 2004-04-18 22:25:47.301639536 -0700 @@ -58,6 +58,8 @@ #include #include #include +#include +#include #include "xfs_linux.h" @@ -1574,7 +1576,7 @@ pagebuf_delwri_queue( } list_add_tail(&pb->pb_list, &pbd_delwrite_queue); - pb->pb_flushtime = jiffies + xfs_age_buffer; + pb->pb_creation_time = jiffies; spin_unlock(&pbd_delwrite_lock); if (unlock) @@ -1647,7 +1649,7 @@ pagebuf_daemon( if ((pb->pb_flags & PBF_DELWRI) && !pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) { if (!force_flush && - time_before(jiffies, pb->pb_flushtime)) { + time_before(jiffies, pb->pb_creation_time + xfs_age_buffer)) { pagebuf_unlock(pb); break; } --- linux-2.6.6-rc1/fs/xfs/linux/xfs_buf.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/linux/xfs_buf.h 2004-04-18 22:25:47.302639384 -0700 @@ -139,7 +139,7 @@ typedef int (*page_buf_bdstrat_t)(struct typedef struct page_buf_s { struct semaphore pb_sema; /* semaphore for lockables */ - unsigned long pb_flushtime; /* time to flush pagebuf */ + unsigned long pb_creation_time; /* time pagebuf was created */ atomic_t pb_pin_count; /* pin count */ wait_queue_head_t pb_waiters; /* unpin waiters */ struct list_head pb_list; --- linux-2.6.6-rc1/fs/xfs/linux/xfs_globals.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/linux/xfs_globals.c 2004-04-18 22:25:47.302639384 -0700 @@ -57,12 +57,15 @@ xfs_param_t xfs_params = { .panic_mask = { 0, 0, 127 }, .error_level = { 0, 3, 11 }, .sync_interval = { HZ, 30*HZ, 60*HZ }, + .lm_sync_interval + = { HZ, 600*HZ, INT_MAX }, .stats_clear = { 0, 0, 1 }, .inherit_sync = { 0, 1, 1 }, .inherit_nodump = { 0, 1, 1 }, .inherit_noatim = { 0, 1, 1 }, .flush_interval = { HZ/2, HZ, 30*HZ }, .age_buffer = { 1*HZ, 15*HZ, 300*HZ }, + .lm_age_buffer = { 1*HZ, 600*HZ, INT_MAX }, }; /* --- linux-2.6.6-rc1/fs/xfs/linux/xfs_ioctl.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/xfs/linux/xfs_ioctl.c 2004-04-18 22:26:00.601617632 -0700 @@ -825,13 +825,14 @@ xfs_ioctl( case XFS_IOC_FREEZE: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - xfs_fs_freeze(mp); + + freeze_bdev(inode->i_sb->s_bdev); return 0; case XFS_IOC_THAW: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - xfs_fs_thaw(mp); + thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); return 0; case XFS_IOC_GOINGDOWN: { --- linux-2.6.6-rc1/fs/xfs/linux/xfs_linux.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/linux/xfs_linux.h 2004-04-18 22:25:47.303639232 -0700 @@ -134,13 +134,13 @@ static inline void set_buffer_unwritten_ #define irix_symlink_mode xfs_params.symlink_mode.val #define xfs_panic_mask xfs_params.panic_mask.val #define xfs_error_level xfs_params.error_level.val -#define xfs_syncd_interval xfs_params.sync_interval.val +#define xfs_syncd_interval (unlikely(laptop_mode) ? xfs_params.lm_sync_interval.val : xfs_params.sync_interval.val) #define xfs_stats_clear xfs_params.stats_clear.val #define xfs_inherit_sync xfs_params.inherit_sync.val #define xfs_inherit_nodump xfs_params.inherit_nodump.val #define xfs_inherit_noatime xfs_params.inherit_noatim.val #define xfs_flush_interval xfs_params.flush_interval.val -#define xfs_age_buffer xfs_params.age_buffer.val +#define xfs_age_buffer (unlikely(laptop_mode) ? xfs_params.lm_age_buffer.val : xfs_params.age_buffer.val) #define current_cpu() smp_processor_id() #define current_pid() (current->pid) --- linux-2.6.6-rc1/fs/xfs/linux/xfs_lrw.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/linux/xfs_lrw.c 2004-04-18 22:26:00.602617480 -0700 @@ -682,8 +682,6 @@ xfs_write( io = &xip->i_iocore; mp = io->io_mount; - xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE); - if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } --- linux-2.6.6-rc1/fs/xfs/linux/xfs_super.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/linux/xfs_super.c 2004-04-18 22:26:00.603617328 -0700 @@ -72,6 +72,7 @@ #include #include #include +#include STATIC struct quotactl_ops linvfs_qops; STATIC struct super_operations linvfs_sops; @@ -470,6 +471,10 @@ syncd(void *arg) if (vfsp->vfs_flag & VFS_RDONLY) continue; VFS_SYNC(vfsp, SYNCD_FLAGS, NULL, error); + + vfsp->vfs_sync_seq++; + wmb(); + wake_up(&vfsp->vfs_wait_single_sync_task); } vfsp->vfs_sync_task = NULL; @@ -553,6 +558,24 @@ linvfs_sync_super( VFS_SYNC(vfsp, flags, NULL, error); sb->s_dirt = 0; + if (unlikely(laptop_mode)) + { + int prev_sync_seq = vfsp->vfs_sync_seq; + /* + * The disk must be active because we're syncing. + * We schedule syncd now (now that the disk is + * active) instead of later (when it might not be). + */ + wake_up_process(vfsp->vfs_sync_task); + /* + * We have to wait for the sync iteration to complete. + * If we don't, the disk activity caused by the sync + * will come after the sync is completed, and that + * triggers another sync from laptop mode. + */ + wait_event(vfsp->vfs_wait_single_sync_task, vfsp->vfs_sync_seq != prev_sync_seq); + } + return -error; } @@ -589,28 +612,7 @@ STATIC void linvfs_freeze_fs( struct super_block *sb) { - vfs_t *vfsp = LINVFS_GET_VFS(sb); - vnode_t *vp; - int error; - - if (sb->s_flags & MS_RDONLY) - return; - VFS_ROOT(vfsp, &vp, error); - VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_FREEZE, 0, error); - VN_RELE(vp); -} - -STATIC void -linvfs_unfreeze_fs( - struct super_block *sb) -{ - vfs_t *vfsp = LINVFS_GET_VFS(sb); - vnode_t *vp; - int error; - - VFS_ROOT(vfsp, &vp, error); - VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_THAW, 0, error); - VN_RELE(vp); + VFS_FREEZE(LINVFS_GET_VFS(sb)); } STATIC struct dentry * @@ -850,7 +852,6 @@ STATIC struct super_operations linvfs_so .write_super = linvfs_write_super, .sync_fs = linvfs_sync_super, .write_super_lockfs = linvfs_freeze_fs, - .unlockfs = linvfs_unfreeze_fs, .statfs = linvfs_statfs, .remount_fs = linvfs_remount, .show_options = linvfs_show_options, --- linux-2.6.6-rc1/fs/xfs/linux/xfs_sysctl.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/linux/xfs_sysctl.c 2004-04-18 22:25:47.304639080 -0700 @@ -103,6 +103,11 @@ STATIC ctl_table xfs_table[] = { &sysctl_intvec, NULL, &xfs_params.sync_interval.min, &xfs_params.sync_interval.max}, + {XFS_LM_SYNC_INTERVAL, "lm_sync_interval", &xfs_params.lm_sync_interval.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.lm_sync_interval.min, &xfs_params.lm_sync_interval.max}, + {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val, sizeof(int), 0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL, @@ -128,6 +133,11 @@ STATIC ctl_table xfs_table[] = { &sysctl_intvec, NULL, &xfs_params.age_buffer.min, &xfs_params.age_buffer.max}, + {XFS_LM_AGE_BUFFER, "lm_age_buffer", &xfs_params.lm_age_buffer.val, + sizeof(int), 0644, NULL, &proc_dointvec_minmax, + &sysctl_intvec, NULL, + &xfs_params.lm_age_buffer.min, &xfs_params.lm_age_buffer.max}, + /* please keep this the last entry */ #ifdef CONFIG_PROC_FS {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val, --- linux-2.6.6-rc1/fs/xfs/linux/xfs_sysctl.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/linux/xfs_sysctl.h 2004-04-18 22:25:47.305638928 -0700 @@ -54,6 +54,7 @@ typedef struct xfs_param { xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */ xfs_sysctl_val_t error_level; /* Degree of reporting for problems */ xfs_sysctl_val_t sync_interval; /* time between sync calls */ + xfs_sysctl_val_t lm_sync_interval; /* same, in laptop mode */ xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */ xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */ xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */ @@ -62,6 +63,9 @@ typedef struct xfs_param { * delwri flush daemon. */ xfs_sysctl_val_t age_buffer; /* time for buffer to age before * we flush it. */ + xfs_sysctl_val_t lm_age_buffer; /* time for buffer to age before + * we flush it when laptop mode is + * active. */ } xfs_param_t; /* @@ -86,12 +90,14 @@ enum { XFS_PANIC_MASK = 6, XFS_ERRLEVEL = 7, XFS_SYNC_INTERVAL = 8, + XFS_LM_SYNC_INTERVAL = 9, XFS_STATS_CLEAR = 12, XFS_INHERIT_SYNC = 13, XFS_INHERIT_NODUMP = 14, XFS_INHERIT_NOATIME = 15, XFS_FLUSH_INTERVAL = 16, XFS_AGE_BUFFER = 17, + XFS_LM_AGE_BUFFER = 3, }; extern xfs_param_t xfs_params; --- linux-2.6.6-rc1/fs/xfs/linux/xfs_vfs.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/linux/xfs_vfs.c 2004-04-18 22:26:00.604617176 -0700 @@ -230,6 +230,18 @@ vfs_force_shutdown( ((*bhvtovfsops(next)->vfs_force_shutdown)(next, fl, file, line)); } +void +vfs_freeze( + struct bhv_desc *bdp) +{ + struct bhv_desc *next = bdp; + + ASSERT(next); + while (! (bhvtovfsops(next))->vfs_freeze) + next = BHV_NEXT(next); + ((*bhvtovfsops(next)->vfs_freeze)(next)); +} + vfs_t * vfs_allocate( void ) { @@ -238,6 +250,7 @@ vfs_allocate( void ) vfsp = kmem_zalloc(sizeof(vfs_t), KM_SLEEP); bhv_head_init(VFS_BHVHEAD(vfsp), "vfs"); init_waitqueue_head(&vfsp->vfs_wait_sync_task); + init_waitqueue_head(&vfsp->vfs_wait_single_sync_task); return vfsp; } --- linux-2.6.6-rc1/fs/xfs/linux/xfs_vfs.h 2004-02-03 20:42:38.000000000 -0800 +++ 25/fs/xfs/linux/xfs_vfs.h 2004-04-18 22:26:00.605617024 -0700 @@ -52,6 +52,8 @@ typedef struct vfs { bhv_head_t vfs_bh; /* head of vfs behavior chain */ struct super_block *vfs_super; /* Linux superblock structure */ struct task_struct *vfs_sync_task; + int vfs_sync_seq; /* syncd iteration sequence number */ + wait_queue_head_t vfs_wait_single_sync_task; wait_queue_head_t vfs_wait_sync_task; } vfs_t; @@ -112,6 +114,7 @@ typedef int (*vfs_quotactl_t)(bhv_desc_t typedef void (*vfs_init_vnode_t)(bhv_desc_t *, struct vnode *, bhv_desc_t *, int); typedef void (*vfs_force_shutdown_t)(bhv_desc_t *, int, char *, int); +typedef void (*vfs_freeze_t)(bhv_desc_t *); typedef struct vfsops { bhv_position_t vf_position; /* behavior chain position */ @@ -128,6 +131,7 @@ typedef struct vfsops { vfs_quotactl_t vfs_quotactl; /* disk quota */ vfs_init_vnode_t vfs_init_vnode; /* initialize a new vnode */ vfs_force_shutdown_t vfs_force_shutdown; /* crash and burn */ + vfs_freeze_t vfs_freeze; /* freeze fs for snapshot */ } vfsops_t; /* @@ -147,6 +151,7 @@ typedef struct vfsops { #define VFS_QUOTACTL(v, c,id,p, rv) ((rv) = vfs_quotactl(VHEAD(v), c,id,p)) #define VFS_INIT_VNODE(v, vp,b,ul) ( vfs_init_vnode(VHEAD(v), vp,b,ul) ) #define VFS_FORCE_SHUTDOWN(v, fl,f,l) ( vfs_force_shutdown(VHEAD(v), fl,f,l) ) +#define VFS_FREEZE(v) ( vfs_freeze(VHEAD(v)) ) /* * PVFS's. Operates on behavior descriptor pointers. @@ -164,6 +169,7 @@ typedef struct vfsops { #define PVFS_QUOTACTL(b, c,id,p, rv) ((rv) = vfs_quotactl(b, c,id,p)) #define PVFS_INIT_VNODE(b, vp,b2,ul) ( vfs_init_vnode(b, vp,b2,ul) ) #define PVFS_FORCE_SHUTDOWN(b, fl,f,l) ( vfs_force_shutdown(b, fl,f,l) ) +#define PVFS_FREEZE(b) ( vfs_freeze(b) ) extern int vfs_mount(bhv_desc_t *, struct xfs_mount_args *, struct cred *); extern int vfs_parseargs(bhv_desc_t *, char *, struct xfs_mount_args *, int); @@ -178,6 +184,7 @@ extern int vfs_dmapiops(bhv_desc_t *, ca extern int vfs_quotactl(bhv_desc_t *, int, int, caddr_t); extern void vfs_init_vnode(bhv_desc_t *, struct vnode *, bhv_desc_t *, int); extern void vfs_force_shutdown(bhv_desc_t *, int, char *, int); +extern void vfs_freeze(bhv_desc_t *); typedef struct bhv_vfsops { struct vfsops bhv_common; --- linux-2.6.6-rc1/fs/xfs/xfs_fsops.c 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/xfs_fsops.c 2004-04-18 22:26:00.605617024 -0700 @@ -582,63 +582,25 @@ xfs_fs_log_dummy(xfs_mount_t *mp) } int -xfs_fs_freeze( - xfs_mount_t *mp) -{ - vfs_t *vfsp; - /*REFERENCED*/ - int error; - - vfsp = XFS_MTOVFS(mp); - - /* Stop new writers */ - xfs_start_freeze(mp, XFS_FREEZE_WRITE); - - /* Flush the refcache */ - xfs_refcache_purge_mp(mp); - - /* Flush delalloc and delwri data */ - VFS_SYNC(vfsp, SYNC_DELWRI|SYNC_WAIT, NULL, error); - - /* Pause transaction subsystem */ - xfs_start_freeze(mp, XFS_FREEZE_TRANS); - - /* Flush any remaining inodes into buffers */ - VFS_SYNC(vfsp, SYNC_ATTR|SYNC_WAIT, NULL, error); - - /* Push all buffers out to disk */ - xfs_binval(mp->m_ddev_targp); - if (mp->m_rtdev_targp) { - xfs_binval(mp->m_rtdev_targp); - } - - /* Push the superblock and write an unmount record */ - xfs_log_unmount_write(mp); - xfs_unmountfs_writesb(mp); - - return 0; -} - -int -xfs_fs_thaw( - xfs_mount_t *mp) -{ - xfs_finish_freeze(mp); - return 0; -} - -int xfs_fs_goingdown( xfs_mount_t *mp, __uint32_t inflags) { - switch (inflags) - { - case XFS_FSOP_GOING_FLAGS_DEFAULT: - xfs_fs_freeze(mp); - xfs_force_shutdown(mp, XFS_FORCE_UMOUNT); - xfs_fs_thaw(mp); + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (inflags) { + case XFS_FSOP_GOING_FLAGS_DEFAULT: { + struct vfs *vfsp = XFS_MTOVFS(mp); + struct super_block *sb = freeze_bdev(vfsp->vfs_super->s_bdev); + + if (sb) { + xfs_force_shutdown(mp, XFS_FORCE_UMOUNT); + thaw_bdev(sb->s_bdev, sb); + } + break; + } case XFS_FSOP_GOING_FLAGS_LOGFLUSH: xfs_force_shutdown(mp, XFS_FORCE_UMOUNT); break; --- linux-2.6.6-rc1/fs/xfs/xfs_fsops.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/xfs_fsops.h 2004-04-18 22:26:00.606616872 -0700 @@ -60,14 +60,6 @@ xfs_reserve_blocks( xfs_fsop_resblks_t *outval); int -xfs_fs_freeze( - xfs_mount_t *mp); - -int -xfs_fs_thaw( - xfs_mount_t *mp); - -int xfs_fs_goingdown( xfs_mount_t *mp, __uint32_t inflags); --- linux-2.6.6-rc1/fs/xfs/xfs_log.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/xfs/xfs_log.c 2004-04-18 22:26:00.610616264 -0700 @@ -820,7 +820,7 @@ xfs_log_need_covered(xfs_mount_t *mp) xlog_t *log = mp->m_log; vfs_t *vfsp = XFS_MTOVFS(mp); - if (mp->m_frozen || XFS_FORCED_SHUTDOWN(mp) || + if (vfsp->vfs_super->s_frozen || XFS_FORCED_SHUTDOWN(mp) || (vfsp->vfs_flag & VFS_RDONLY)) return 0; --- linux-2.6.6-rc1/fs/xfs/xfs_mount.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/xfs/xfs_mount.c 2004-04-18 22:26:00.611616112 -0700 @@ -140,9 +140,6 @@ xfs_mount_init(void) */ xfs_trans_ail_init(mp); - /* Init freeze sync structures */ - spinlock_init(&mp->m_freeze_lock, "xfs_freeze"); - init_sv(&mp->m_wait_unfreeze, SV_DEFAULT, "xfs_freeze", 0); atomic_set(&mp->m_active_trans, 0); return mp; @@ -192,8 +189,6 @@ xfs_mount_free( VFS_REMOVEBHV(vfsp, &mp->m_bhv); } - spinlock_destroy(&mp->m_freeze_lock); - sv_destroy(&mp->m_wait_unfreeze); kmem_free(mp, sizeof(xfs_mount_t)); } @@ -1586,59 +1581,3 @@ xfs_mount_log_sbunit( xfs_mod_sb(tp, fields); xfs_trans_commit(tp, 0, NULL); } - -/* Functions to lock access out of the filesystem for forced - * shutdown or snapshot. - */ - -void -xfs_start_freeze( - xfs_mount_t *mp, - int level) -{ - unsigned long s = mutex_spinlock(&mp->m_freeze_lock); - - mp->m_frozen = level; - mutex_spinunlock(&mp->m_freeze_lock, s); - - if (level == XFS_FREEZE_TRANS) { - while (atomic_read(&mp->m_active_trans) > 0) - delay(100); - } -} - -void -xfs_finish_freeze( - xfs_mount_t *mp) -{ - unsigned long s = mutex_spinlock(&mp->m_freeze_lock); - - if (mp->m_frozen) { - mp->m_frozen = 0; - sv_broadcast(&mp->m_wait_unfreeze); - } - - mutex_spinunlock(&mp->m_freeze_lock, s); -} - -void -xfs_check_frozen( - xfs_mount_t *mp, - bhv_desc_t *bdp, - int level) -{ - unsigned long s; - - if (mp->m_frozen) { - s = mutex_spinlock(&mp->m_freeze_lock); - - if (mp->m_frozen < level) { - mutex_spinunlock(&mp->m_freeze_lock, s); - } else { - sv_wait(&mp->m_wait_unfreeze, 0, &mp->m_freeze_lock, s); - } - } - - if (level == XFS_FREEZE_TRANS) - atomic_inc(&mp->m_active_trans); -} --- linux-2.6.6-rc1/fs/xfs/xfs_mount.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/fs/xfs/xfs_mount.h 2004-04-18 22:26:00.612615960 -0700 @@ -379,10 +379,6 @@ typedef struct xfs_mount { struct xfs_dmops m_dm_ops; /* vector of DMI ops */ struct xfs_qmops m_qm_ops; /* vector of XQM ops */ struct xfs_ioops m_io_ops; /* vector of I/O ops */ - lock_t m_freeze_lock; /* Lock for m_frozen */ - uint m_frozen; /* FS frozen for shutdown or - * snapshot */ - sv_t m_wait_unfreeze;/* waiting to unfreeze */ atomic_t m_active_trans; /* number trans frozen */ } xfs_mount_t; @@ -558,16 +554,6 @@ extern void xfs_initialize_perag(xfs_mou extern void xfs_xlatesb(void *, struct xfs_sb *, int, xfs_arch_t, __int64_t); -/* - * Flags for freeze operations. - */ -#define XFS_FREEZE_WRITE 1 -#define XFS_FREEZE_TRANS 2 - -extern void xfs_start_freeze(xfs_mount_t *, int); -extern void xfs_finish_freeze(xfs_mount_t *); -extern void xfs_check_frozen(xfs_mount_t *, bhv_desc_t *, int); - extern struct vfsops xfs_vfsops; extern struct vnodeops xfs_vnodeops; --- linux-2.6.6-rc1/fs/xfs/xfs_trans.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/xfs/xfs_trans.c 2004-04-18 22:26:00.613615808 -0700 @@ -131,7 +131,9 @@ xfs_trans_alloc( xfs_mount_t *mp, uint type) { - xfs_check_frozen(mp, NULL, XFS_FREEZE_TRANS); + vfs_check_frozen(XFS_MTOVFS(mp)->vfs_super, SB_FREEZE_TRANS); + atomic_inc(&mp->m_active_trans); + return (_xfs_trans_alloc(mp, type)); } --- linux-2.6.6-rc1/fs/xfs/xfs_vfsops.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/fs/xfs/xfs_vfsops.c 2004-04-18 22:26:00.615615504 -0700 @@ -1858,6 +1858,20 @@ xfs_showargs( return 0; } +STATIC void +xfs_freeze( + bhv_desc_t *bdp) +{ + xfs_mount_t *mp = XFS_BHVTOM(bdp); + + while (atomic_read(&mp->m_active_trans) > 0) + delay(100); + + /* Push the superblock and write an unmount record */ + xfs_log_unmount_write(mp); + xfs_unmountfs_writesb(mp); +} + vfsops_t xfs_vfsops = { BHV_IDENTITY_INIT(VFS_BHV_XFS,VFS_POSITION_XFS), @@ -1874,4 +1888,5 @@ vfsops_t xfs_vfsops = { .vfs_quotactl = (vfs_quotactl_t)fs_nosys, .vfs_init_vnode = xfs_initialize_vnode, .vfs_force_shutdown = xfs_do_force_shutdown, + .vfs_freeze = xfs_freeze, }; --- linux-2.6.6-rc1/include/asm-alpha/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-alpha/ide.h 2004-04-18 22:26:01.784437816 -0700 @@ -43,43 +43,11 @@ static inline unsigned long ide_default_ } } -static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -/* - * This registers the standard ports for this architecture with the IDE - * driver. - */ -static __inline__ void ide_init_default_hwifs(void) -{ -#ifndef CONFIG_PCI - hw_regs_t hw; - int index; - - for (index = 0; index < MAX_HWIFS; index++) { - ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); - hw.irq = ide_default_irq(ide_default_io_base(index)); - ide_register_hw(&hw, NULL); - } +#ifdef CONFIG_PCI +#define ide_init_default_irq(base) (0) +#else +#define ide_init_default_irq(base) ide_default_irq(base) #endif -} #include --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-alpha/lockmeter.h 2004-04-18 22:25:47.918545752 -0700 @@ -0,0 +1,84 @@ +/* + * Written by John Hawkes (hawkes@sgi.com) + * Based on klstat.h by Jack Steiner (steiner@sgi.com) + * + * Modified by Peter Rival (frival@zk3.dec.com) + */ + +#ifndef _ALPHA_LOCKMETER_H +#define _ALPHA_LOCKMETER_H + +#include +#define CPU_CYCLE_FREQUENCY hwrpb->cycle_freq + +#define get_cycles64() get_cycles() + +#define THIS_CPU_NUMBER smp_processor_id() + +#include + +#define SPINLOCK_MAGIC_INIT /**/ + +/* + * Macros to cache and retrieve an index value inside of a lock + * these macros assume that there are less than 65536 simultaneous + * (read mode) holders of a rwlock. + * We also assume that the hash table has less than 32767 entries. + * the high order bit is used for write locking a rw_lock + * Note: although these defines and macros are the same as what is being used + * in include/asm-i386/lockmeter.h, they are present here to easily + * allow an alternate Alpha implementation. + */ +/* + * instrumented spinlock structure -- never used to allocate storage + * only used in macros below to overlay a spinlock_t + */ +typedef struct inst_spinlock_s { + /* remember, Alpha is little endian */ + unsigned short lock; + unsigned short index; +} inst_spinlock_t; +#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv +#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index + +/* + * macros to cache and retrieve an index value in a read/write lock + * as well as the cpu where a reader busy period started + * we use the 2nd word (the debug word) for this, so require the + * debug word to be present + */ +/* + * instrumented rwlock structure -- never used to allocate storage + * only used in macros below to overlay a rwlock_t + */ +typedef struct inst_rwlock_s { + volatile int lock; + unsigned short index; + unsigned short cpu; +} inst_rwlock_t; +#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv +#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index +#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv +#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu + +/* + * return true if rwlock is write locked + * (note that other lock attempts can cause the lock value to be negative) + */ +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) (((inst_rwlock_t *)rwlock_ptr)->lock & 1) +#define IABS(x) ((x) > 0 ? (x) : -(x)) + +#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr) +extern inline int rwlock_readers(rwlock_t *rwlock_ptr) +{ + int tmp = (int) ((inst_rwlock_t *)rwlock_ptr)->lock; + /* readers subtract 2, so we have to: */ + /* - andnot off a possible writer (bit 0) */ + /* - get the absolute value */ + /* - divide by 2 (right shift by one) */ + /* to find the number of readers */ + if (tmp == 0) return(0); + else return(IABS(tmp & ~1)>>1); +} + +#endif /* _ALPHA_LOCKMETER_H */ --- linux-2.6.6-rc1/include/asm-alpha/pgtable.h 2003-10-08 15:07:10.000000000 -0700 +++ 25/include/asm-alpha/pgtable.h 2004-04-18 22:25:33.692708408 -0700 @@ -349,6 +349,4 @@ extern void paging_init(void); /* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */ #define HAVE_ARCH_UNMAPPED_AREA -typedef pte_t *pte_addr_t; - #endif /* _ALPHA_PGTABLE_H */ --- linux-2.6.6-rc1/include/asm-alpha/rmap.h 2003-06-14 12:18:22.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _ALPHA_RMAP_H -#define _ALPHA_RMAP_H - -/* nothing to see, move along */ -#include - -#endif --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-alpha/setup.h 2004-04-18 22:25:35.904372184 -0700 @@ -0,0 +1,6 @@ +#ifndef __ALPHA_SETUP_H +#define __ALPHA_SETUP_H + +#define COMMAND_LINE_SIZE 256 + +#endif --- linux-2.6.6-rc1/include/asm-alpha/spinlock.h 2003-11-09 16:45:05.000000000 -0800 +++ 25/include/asm-alpha/spinlock.h 2004-04-18 22:25:47.918545752 -0700 @@ -6,6 +6,10 @@ #include #include +#ifdef CONFIG_LOCKMETER +#undef DEBUG_SPINLOCK +#undef DEBUG_RWLOCK +#endif /* * Simple spin lock operations. There are two variants, one clears IRQ's @@ -95,9 +99,18 @@ static inline int _raw_spin_trylock(spin typedef struct { volatile int write_lock:1, read_counter:31; +#ifdef CONFIG_LOCKMETER + /* required for LOCKMETER since all bits in lock are used */ + /* need this storage for CPU and lock INDEX ............. */ + unsigned magic; +#endif } /*__attribute__((aligned(32)))*/ rwlock_t; +#ifdef CONFIG_LOCKMETER +#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0 } +#else #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } +#endif #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) #define rwlock_is_locked(x) (*(volatile int *)(x) != 0) @@ -169,4 +182,41 @@ static inline void _raw_read_unlock(rwlo : "m" (*lock) : "memory"); } +#ifdef CONFIG_LOCKMETER +static inline int _raw_write_trylock(rwlock_t *lock) +{ + long temp,result; + + __asm__ __volatile__( + " ldl_l %1,%0\n" + " mov $31,%2\n" + " bne %1,1f\n" + " or $31,1,%2\n" + " stl_c %2,%0\n" + "1: mb\n" + : "=m" (*(volatile int *)lock), "=&r" (temp), "=&r" (result) + : "m" (*(volatile int *)lock) + ); + + return (result); +} + +static inline int _raw_read_trylock(rwlock_t *lock) +{ + unsigned long temp,result; + + __asm__ __volatile__( + " ldl_l %1,%0\n" + " mov $31,%2\n" + " blbs %1,1f\n" + " subl %1,2,%2\n" + " stl_c %2,%0\n" + "1: mb\n" + : "=m" (*(volatile int *)lock), "=&r" (temp), "=&r" (result) + : "m" (*(volatile int *)lock) + ); + return (result); +} +#endif /* CONFIG_LOCKMETER */ + #endif /* _ALPHA_SPINLOCK_H */ --- linux-2.6.6-rc1/include/asm-alpha/system.h 2003-06-14 12:18:23.000000000 -0700 +++ 25/include/asm-alpha/system.h 2004-04-18 22:25:35.905372032 -0700 @@ -43,7 +43,6 @@ */ #define PARAM ZERO_PGE #define COMMAND_LINE ((char*)(PARAM + 0x0000)) -#define COMMAND_LINE_SIZE 256 #define INITRD_START (*(unsigned long *) (PARAM+0x100)) #define INITRD_SIZE (*(unsigned long *) (PARAM+0x108)) --- linux-2.6.6-rc1/include/asm-arm26/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-arm26/ide.h 2004-04-18 22:26:01.549473536 -0700 @@ -45,6 +45,8 @@ static inline void ide_init_hwif_ports(h *irq = 0; } +#define ide_init_default_irq(base) (0) + /* * This registers the standard ports for this architecture with the IDE * driver. --- linux-2.6.6-rc1/include/asm-arm26/pgtable.h 2003-10-08 15:07:10.000000000 -0700 +++ 25/include/asm-arm26/pgtable.h 2004-04-18 22:25:33.693708256 -0700 @@ -290,8 +290,6 @@ static inline pte_t mk_pte_phys(unsigned #define io_remap_page_range(vma,from,phys,size,prot) \ remap_page_range(vma,from,phys,size,prot) -typedef pte_t *pte_addr_t; - #endif /* !__ASSEMBLY__ */ #endif /* _ASMARM_PGTABLE_H */ --- linux-2.6.6-rc1/include/asm-arm26/rmap.h 2003-06-14 12:18:08.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,66 +0,0 @@ -#ifndef _ARM_RMAP_H -#define _ARM_RMAP_H - -/* - * linux/include/asm-arm26/proc-armv/rmap.h - * - * Architecture dependant parts of the reverse mapping code, - * - * ARM is different since hardware page tables are smaller than - * the page size and Linux uses a "duplicate" one with extra info. - * For rmap this means that the first 2 kB of a page are the hardware - * page tables and the last 2 kB are the software page tables. - */ - -static inline void pgtable_add_rmap(struct page *page, struct mm_struct * mm, unsigned long address) -{ - page->mapping = (void *)mm; - page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1); - inc_page_state(nr_page_table_pages); -} - -static inline void pgtable_remove_rmap(struct page *page) -{ - page->mapping = NULL; - page->index = 0; - dec_page_state(nr_page_table_pages); -} - -static inline struct mm_struct * ptep_to_mm(pte_t * ptep) -{ - struct page * page = virt_to_page(ptep); - return (struct mm_struct *)page->mapping; -} - -/* The page table takes half of the page */ -#define PTE_MASK ((PAGE_SIZE / 2) - 1) - -static inline unsigned long ptep_to_address(pte_t * ptep) -{ - struct page * page = virt_to_page(ptep); - unsigned long low_bits; - - low_bits = ((unsigned long)ptep & PTE_MASK) * PTRS_PER_PTE; - return page->index + low_bits; -} - -//FIXME!!! IS these correct? -static inline pte_addr_t ptep_to_paddr(pte_t *ptep) -{ - return (pte_addr_t)ptep; -} - -static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) -{ - return (pte_t *)pte_paddr; -} - -static inline void rmap_ptep_unmap(pte_t *pte) -{ - return; -} - - -//#include - -#endif /* _ARM_RMAP_H */ --- linux-2.6.6-rc1/include/asm-arm26/tlb.h 2003-06-14 12:18:51.000000000 -0700 +++ 25/include/asm-arm26/tlb.h 2004-04-18 22:26:02.147382640 -0700 @@ -1,6 +1,7 @@ #ifndef __ASMARM_TLB_H #define __ASMARM_TLB_H +#include #include /* --- linux-2.6.6-rc1/include/asm-arm/cacheflush.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-arm/cacheflush.h 2004-04-18 22:25:24.965035216 -0700 @@ -295,7 +295,9 @@ extern void __flush_dcache_page(struct p static inline void flush_dcache_page(struct page *page) { - if (page_mapping(page) && !mapping_mapped(page->mapping)) + struct address_space *mapping = page_mapping(page); + + if (mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else __flush_dcache_page(page); --- linux-2.6.6-rc1/include/asm-arm/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-arm/ide.h 2004-04-18 22:26:01.549473536 -0700 @@ -26,6 +26,8 @@ #define ide_default_io_base(i) (0) #define ide_default_irq(b) (0) +#define ide_init_default_irq(base) (0) + #define __ide_mm_insw(port,addr,len) readsw(port,addr,len) #define __ide_mm_insl(port,addr,len) readsl(port,addr,len) #define __ide_mm_outsw(port,addr,len) writesw(port,addr,len) --- linux-2.6.6-rc1/include/asm-arm/kmap_types.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/include/asm-arm/kmap_types.h 2004-04-18 22:25:33.693708256 -0700 @@ -14,7 +14,6 @@ enum km_type { KM_BIO_DST_IRQ, KM_PTE0, KM_PTE1, - KM_PTE2, KM_IRQ0, KM_IRQ1, KM_SOFTIRQ0, --- linux-2.6.6-rc1/include/asm-arm/pgtable.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/include/asm-arm/pgtable.h 2004-04-18 22:25:33.694708104 -0700 @@ -15,13 +15,62 @@ #include /* - * We pull a couple of tricks here: - * 1. We wrap the PMD into the PGD. - * 2. We lie about the size of the PTE and PGD. - * Even though we have 256 PTE entries and 4096 PGD entries, we tell - * Linux that we actually have 512 PTE entries and 2048 PGD entries. - * Each "Linux" PGD entry is made up of two hardware PGD entries, and - * each PTE table is actually two hardware PTE tables. + * Hardware-wise, we have a two level page table structure, where the first + * level has 4096 entries, and the second level has 256 entries. Each entry + * is one 32-bit word. Most of the bits in the second level entry are used + * by hardware, and there aren't any "accessed" and "dirty" bits. + * + * Linux on the other hand has a three level page table structure, which can + * be wrapped to fit a two level page table structure easily - using the PGD + * and PTE only. However, Linux also expects one "PTE" table per page, and + * at least a "dirty" bit. + * + * Therefore, we tweak the implementation slightly - we tell Linux that we + * have 2048 entries in the first level, each of which is 8 bytes (iow, two + * hardware pointers to the second level.) The second level contains two + * hardware PTE tables arranged contiguously, followed by Linux versions + * which contain the state information Linux needs. We, therefore, end up + * with 512 entries in the "PTE" level. + * + * This leads to the page tables having the following layout: + * + * pgd pte + * | | + * +--------+ +0 + * | |-----> +------------+ +0 + * +- - - - + +4 | h/w pt 0 | + * | |-----> +------------+ +1024 + * +--------+ +8 | h/w pt 1 | + * | | +------------+ +2048 + * +- - - - + | Linux pt 0 | + * | | +------------+ +3072 + * +--------+ | Linux pt 1 | + * | | +------------+ +4096 + * + * See L_PTE_xxx below for definitions of bits in the "Linux pt", and + * PTE_xxx for definitions of bits appearing in the "h/w pt". + * + * PMD_xxx definitions refer to bits in the first level page table. + * + * The "dirty" bit is emulated by only granting hardware write permission + * iff the page is marked "writable" and "dirty" in the Linux PTE. This + * means that a write to a clean page will cause a permission fault, and + * the Linux MM layer will mark the page dirty via handle_pte_fault(). + * For the hardware to notice the permission change, the TLB entry must + * be flushed, and ptep_establish() does that for us. + * + * The "accessed" or "young" bit is emulated by a similar method; we only + * allow accesses to the page if the "young" bit is set. Accesses to the + * page will cause a fault, and handle_pte_fault() will set the young bit + * for us as long as the page is marked present in the corresponding Linux + * PTE entry. Again, ptep_establish() will ensure that the TLB is up to + * date. + * + * However, when the "young" bit is cleared, we deny access to the page + * by clearing the hardware PTE. Currently Linux does not flush the TLB + * for us in this case, which means the TLB will retain the transation + * until either the TLB entry is evicted under pressure, or a context + * switch which changes the user space mapping occurs. */ #define PTRS_PER_PTE 512 #define PTRS_PER_PMD 1 @@ -353,8 +402,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD #define io_remap_page_range(vma,from,phys,size,prot) \ remap_page_range(vma,from,phys,size,prot) -typedef pte_t *pte_addr_t; - #define pgtable_cache_init() do { } while (0) #endif /* !__ASSEMBLY__ */ --- linux-2.6.6-rc1/include/asm-arm/rmap.h 2003-06-14 12:17:57.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,6 +0,0 @@ -#ifndef _ARM_RMAP_H -#define _ARM_RMAP_H - -#include - -#endif /* _ARM_RMAP_H */ --- linux-2.6.6-rc1/include/asm-arm/uaccess.h 2003-09-08 13:58:59.000000000 -0700 +++ 25/include/asm-arm/uaccess.h 2004-04-18 22:25:24.967034912 -0700 @@ -75,7 +75,7 @@ static inline void set_fs (mm_segment_t #define access_ok(type,addr,size) (__range_ok(addr,size) == 0) -static inline int verify_area(int type, const void * addr, unsigned long size) +static inline int verify_area(int type, const void __user *addr, unsigned long size) { return access_ok(type, addr, size) ? 0 : -EFAULT; } @@ -354,13 +354,13 @@ do { \ : "r" (x), "i" (-EFAULT) \ : "cc") -extern unsigned long __arch_copy_from_user(void *to, const void *from, unsigned long n); -extern unsigned long __arch_copy_to_user(void *to, const void *from, unsigned long n); -extern unsigned long __arch_clear_user(void *addr, unsigned long n); -extern unsigned long __arch_strncpy_from_user(char *to, const char *from, unsigned long count); -extern unsigned long __arch_strnlen_user(const char *s, long n); +extern unsigned long __arch_copy_from_user(void *to, const void __user *from, unsigned long n); +extern unsigned long __arch_copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __arch_clear_user(void __user *addr, unsigned long n); +extern unsigned long __arch_strncpy_from_user(char *to, const char __user *from, unsigned long count); +extern unsigned long __arch_strnlen_user(const char __user *s, long n); -static __inline__ unsigned long copy_from_user(void *to, const void *from, unsigned long n) +static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) n = __arch_copy_from_user(to, from, n); @@ -369,36 +369,36 @@ static __inline__ unsigned long copy_fro return n; } -static __inline__ unsigned long __copy_from_user(void *to, const void *from, unsigned long n) +static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { return __arch_copy_from_user(to, from, n); } -static __inline__ unsigned long copy_to_user(void *to, const void *from, unsigned long n) +static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { if (access_ok(VERIFY_WRITE, to, n)) n = __arch_copy_to_user(to, from, n); return n; } -static __inline__ unsigned long __copy_to_user(void *to, const void *from, unsigned long n) +static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { return __arch_copy_to_user(to, from, n); } -static __inline__ unsigned long clear_user (void *to, unsigned long n) +static inline unsigned long clear_user (void __user *to, unsigned long n) { if (access_ok(VERIFY_WRITE, to, n)) n = __arch_clear_user(to, n); return n; } -static __inline__ unsigned long __clear_user (void *to, unsigned long n) +static inline unsigned long __clear_user (void __user *to, unsigned long n) { return __arch_clear_user(to, n); } -static __inline__ long strncpy_from_user (char *dst, const char *src, long count) +static inline long strncpy_from_user (char *dst, const char __user *src, long count) { long res = -EFAULT; if (access_ok(VERIFY_READ, src, 1)) @@ -406,14 +406,14 @@ static __inline__ long strncpy_from_user return res; } -static __inline__ long __strncpy_from_user (char *dst, const char *src, long count) +static inline long __strncpy_from_user (char *dst, const char __user *src, long count) { return __arch_strncpy_from_user(dst, src, count); } #define strlen_user(s) strnlen_user(s, ~0UL >> 1) -static inline long strnlen_user(const char *s, long n) +static inline long strnlen_user(const char __user *s, long n) { unsigned long res = 0; --- linux-2.6.6-rc1/include/asm-cris/pgtable.h 2003-07-10 18:50:32.000000000 -0700 +++ 25/include/asm-cris/pgtable.h 2004-04-18 22:25:33.695707952 -0700 @@ -337,6 +337,4 @@ extern inline void update_mmu_cache(stru #define pte_to_pgoff(x) (pte_val(x) >> 6) #define pgoff_to_pte(x) __pte(((x) << 6) | _PAGE_FILE) -typedef pte_t *pte_addr_t; - #endif /* _CRIS_PGTABLE_H */ --- linux-2.6.6-rc1/include/asm-cris/rmap.h 2003-06-14 12:18:23.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _CRIS_RMAP_H -#define _CRIS_RMAP_H - -/* nothing to see, move along :) */ -#include - -#endif --- linux-2.6.6-rc1/include/asm-cris/setup.h 2003-07-10 18:50:32.000000000 -0700 +++ 25/include/asm-cris/setup.h 2004-04-18 22:25:35.905372032 -0700 @@ -1,3 +1,6 @@ #ifndef _CRIS_SETUP_H #define _CRIS_SETUP_H + +#define COMMAND_LINE_SIZE 256 + #endif --- linux-2.6.6-rc1/include/asm-generic/rmap.h 2003-06-14 12:18:23.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,90 +0,0 @@ -#ifndef _GENERIC_RMAP_H -#define _GENERIC_RMAP_H -/* - * linux/include/asm-generic/rmap.h - * - * Architecture dependent parts of the reverse mapping code, - * this version should work for most architectures with a - * 'normal' page table layout. - * - * We use the struct page of the page table page to find out - * the process and full address of a page table entry: - * - page->mapping points to the process' mm_struct - * - page->index has the high bits of the address - * - the lower bits of the address are calculated from the - * offset of the page table entry within the page table page - * - * For CONFIG_HIGHPTE, we need to represent the address of a pte in a - * scalar pte_addr_t. The pfn of the pte's page is shifted left by PAGE_SIZE - * bits and is then ORed with the byte offset of the pte within its page. - * - * For CONFIG_HIGHMEM4G, the pte_addr_t is 32 bits. 20 for the pfn, 12 for - * the offset. - * - * For CONFIG_HIGHMEM64G, the pte_addr_t is 64 bits. 52 for the pfn, 12 for - * the offset. - */ -#include - -static inline void pgtable_add_rmap(struct page * page, struct mm_struct * mm, unsigned long address) -{ -#ifdef BROKEN_PPC_PTE_ALLOC_ONE - /* OK, so PPC calls pte_alloc() before mem_map[] is setup ... ;( */ - extern int mem_init_done; - - if (!mem_init_done) - return; -#endif - page->mapping = (void *)mm; - page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1); - inc_page_state(nr_page_table_pages); -} - -static inline void pgtable_remove_rmap(struct page * page) -{ - page->mapping = NULL; - page->index = 0; - dec_page_state(nr_page_table_pages); -} - -static inline struct mm_struct * ptep_to_mm(pte_t * ptep) -{ - struct page * page = kmap_atomic_to_page(ptep); - return (struct mm_struct *) page->mapping; -} - -static inline unsigned long ptep_to_address(pte_t * ptep) -{ - struct page * page = kmap_atomic_to_page(ptep); - unsigned long low_bits; - low_bits = ((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE; - return page->index + low_bits; -} - -#ifdef CONFIG_HIGHPTE -static inline pte_addr_t ptep_to_paddr(pte_t *ptep) -{ - pte_addr_t paddr; - paddr = ((pte_addr_t)page_to_pfn(kmap_atomic_to_page(ptep))) << PAGE_SHIFT; - return paddr + (pte_addr_t)((unsigned long)ptep & ~PAGE_MASK); -} -#else -static inline pte_addr_t ptep_to_paddr(pte_t *ptep) -{ - return (pte_addr_t)ptep; -} -#endif - -#ifndef CONFIG_HIGHPTE -static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) -{ - return (pte_t *)pte_paddr; -} - -static inline void rmap_ptep_unmap(pte_t *pte) -{ - return; -} -#endif - -#endif /* _GENERIC_RMAP_H */ --- linux-2.6.6-rc1/include/asm-generic/tlb.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/include/asm-generic/tlb.h 2004-04-18 22:26:02.147382640 -0700 @@ -15,6 +15,7 @@ #include #include +#include #include /* @@ -146,4 +147,6 @@ static inline void tlb_remove_page(struc __pmd_free_tlb(tlb, pmdp); \ } while (0) +#define tlb_migrate_prepare(mm) do { } while(0) + #endif /* _ASM_GENERIC__TLB_H */ --- linux-2.6.6-rc1/include/asm-generic/vmlinux.lds.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-generic/vmlinux.lds.h 2004-04-18 22:25:54.260581616 -0700 @@ -53,6 +53,6 @@ } #define SCHED_TEXT \ - __scheduling_functions_start_here = .; \ + __sched_text_start = .; \ *(.sched.text) \ - __scheduling_functions_end_here = .; + __sched_text_end = .; --- linux-2.6.6-rc1/include/asm-h8300/ide.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/include/asm-h8300/ide.h 2004-04-18 22:26:01.550473384 -0700 @@ -25,10 +25,7 @@ static __inline__ void ide_init_hwif_por { } - -static inline void ide_init_default_hwifs(void) -{ -} +#define ide_init_default_irq(base) (0) #define MAX_HWIFS 1 --- linux-2.6.6-rc1/include/asm-h8300/pgtable.h 2003-08-08 22:55:13.000000000 -0700 +++ 25/include/asm-h8300/pgtable.h 2004-04-18 22:25:33.696707800 -0700 @@ -7,8 +7,6 @@ #include #include -typedef pte_t *pte_addr_t; - #define pgd_present(pgd) (1) /* pages are always present on NO_MM */ #define pgd_none(pgd) (0) #define pgd_bad(pgd) (0) --- linux-2.6.6-rc1/include/asm-h8300/setup.h 2003-06-14 12:18:34.000000000 -0700 +++ 25/include/asm-h8300/setup.h 2004-04-18 22:25:35.905372032 -0700 @@ -1 +1,6 @@ -/* Nothing do */ +#ifndef __H8300_SETUP_H +#define __H8300_SETUP_H + +#define COMMAND_LINE_SIZE 512 + +#endif --- linux-2.6.6-rc1/include/asm-i386/bugs.h 2003-11-09 16:45:05.000000000 -0800 +++ 25/include/asm-i386/bugs.h 2004-04-18 22:25:30.339218216 -0700 @@ -1,11 +1,11 @@ /* * include/asm-i386/bugs.h * - * Copyright (C) 1994 Linus Torvalds + * Copyright (C) 1994 Linus Torvalds * * Cyrix stuff, June 1998 by: * - Rafael R. Reilova (moved everything from head.S), - * + * * - Channing Corn (tests & fixes), * - Andrew D. Balsa (code cleanup). * @@ -25,7 +25,20 @@ #include #include #include - +#ifdef CONFIG_KGDB +/* + * Provied the command line "gdb" initial break + */ +int __init kgdb_initial_break(char * str) +{ + if (*str == '\0'){ + breakpoint(); + return 1; + } + return 0; +} +__setup("gdb",kgdb_initial_break); +#endif static int __init no_halt(char *s) { boot_cpu_data.hlt_works_ok = 0; @@ -140,7 +153,7 @@ static void __init check_popad(void) : "ecx", "edi" ); /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ if (res != 12345678) printk( "Buggy.\n" ); - else printk( "OK.\n" ); + else printk( "OK.\n" ); #endif } --- linux-2.6.6-rc1/include/asm-i386/ide.h 2003-06-14 12:17:57.000000000 -0700 +++ 25/include/asm-i386/ide.h 2004-04-18 22:26:01.785437664 -0700 @@ -59,29 +59,23 @@ static __inline__ unsigned long ide_defa } } +#ifdef CONFIG_X86_PC9800 static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, unsigned long ctrl_port, int *irq) { unsigned long reg = data_port; int i; -#ifdef CONFIG_X86_PC9800 + unsigned long increment = data_port == 0x640 ? 2 : 1; -#endif for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { hw->io_ports[i] = reg; -#ifdef CONFIG_X86_PC9800 reg += increment; -#else - reg += 1; -#endif } if (ctrl_port) { hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; -#ifdef CONFIG_X86_PC9800 } else if (data_port == 0x640) { hw->io_ports[IDE_CONTROL_OFFSET] = 0x74c; -#endif } else { hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; } @@ -89,21 +83,13 @@ static __inline__ void ide_init_hwif_por *irq = 0; hw->io_ports[IDE_IRQ_OFFSET] = 0; } +#endif -static __inline__ void ide_init_default_hwifs(void) -{ -#ifndef CONFIG_BLK_DEV_IDEPCI - hw_regs_t hw; - int index; - - for(index = 0; index < MAX_HWIFS; index++) { - memset(&hw, 0, sizeof hw); - ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); - hw.irq = ide_default_irq(ide_default_io_base(index)); - ide_register_hw(&hw, NULL); - } -#endif /* CONFIG_BLK_DEV_IDEPCI */ -} +#ifdef CONFIG_BLK_DEV_IDEPCI +#define ide_init_default_irq(base) (0) +#else +#define ide_init_default_irq(base) ide_default_irq(base) +#endif #include --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-i386/kgdb.h 2004-04-18 22:25:30.863138568 -0700 @@ -0,0 +1,69 @@ +#ifndef __KGDB +#define __KGDB + +/* + * This file should not include ANY others. This makes it usable + * most anywhere without the fear of include order or inclusion. + * Make it so! + * + * This file may be included all the time. It is only active if + * CONFIG_KGDB is defined, otherwise it stubs out all the macros + * and entry points. + */ +#if defined(CONFIG_KGDB) && !defined(__ASSEMBLY__) + +extern void breakpoint(void); +#define INIT_KGDB_INTS kgdb_enable_ints() + +#ifndef BREAKPOINT +#define BREAKPOINT asm(" int $3") +#endif + +extern void kgdb_schedule_breakpoint(void); +extern void kgdb_process_breakpoint(void); + +extern int kgdb_tty_hook(void); +extern int kgdb_eth_hook(void); +extern int kgdboe; + +/* + * GDB debug stub (or any debug stub) can point the 'linux_debug_hook' + * pointer to its routine and it will be entered as the first thing + * when a trap occurs. + * + * Return values are, at present, undefined. + * + * The debug hook routine does not necessarily return to its caller. + * It has the register image and thus may choose to resume execution + * anywhere it pleases. + */ +struct pt_regs; + +extern int kgdb_handle_exception(int trapno, + int signo, int err_code, struct pt_regs *regs); +extern int in_kgdb(struct pt_regs *regs); + +#ifdef CONFIG_KGDB_TS +void kgdb_tstamp(int line, char *source, int data0, int data1); +/* + * This is the time stamp function. The macro adds the source info and + * does a cast on the data to allow most any 32-bit value. + */ + +#define kgdb_ts(data0,data1) kgdb_tstamp(__LINE__,__FILE__,(int)data0,(int)data1) +#else +#define kgdb_ts(data0,data1) +#endif +#else /* CONFIG_KGDB && ! __ASSEMBLY__ ,stubs follow... */ +#ifndef BREAKPOINT +#define BREAKPOINT +#endif +#define kgdb_ts(data0,data1) +#define in_kgdb +#define kgdb_handle_exception +#define breakpoint +#define INIT_KGDB_INTS +#define kgdb_process_breakpoint() do {} while(0) + +#endif +#endif /* __KGDB */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-i386/kgdb_local.h 2004-04-18 22:25:30.341217912 -0700 @@ -0,0 +1,102 @@ +#ifndef __KGDB_LOCAL +#define ___KGDB_LOCAL +#include +#include +#include +#include +#include +#include +#include +#include + +#define PORT 0x3f8 +#ifdef CONFIG_KGDB_PORT +#undef PORT +#define PORT CONFIG_KGDB_PORT +#endif +#define IRQ 4 +#ifdef CONFIG_KGDB_IRQ +#undef IRQ +#define IRQ CONFIG_KGDB_IRQ +#endif +#define SB_CLOCK 1843200 +#define SB_BASE (SB_CLOCK/16) +#define SB_BAUD9600 SB_BASE/9600 +#define SB_BAUD192 SB_BASE/19200 +#define SB_BAUD384 SB_BASE/38400 +#define SB_BAUD576 SB_BASE/57600 +#define SB_BAUD1152 SB_BASE/115200 +#ifdef CONFIG_KGDB_9600BAUD +#define SB_BAUD SB_BAUD9600 +#endif +#ifdef CONFIG_KGDB_19200BAUD +#define SB_BAUD SB_BAUD192 +#endif +#ifdef CONFIG_KGDB_38400BAUD +#define SB_BAUD SB_BAUD384 +#endif +#ifdef CONFIG_KGDB_57600BAUD +#define SB_BAUD SB_BAUD576 +#endif +#ifdef CONFIG_KGDB_115200BAUD +#define SB_BAUD SB_BAUD1152 +#endif +#ifndef SB_BAUD +#define SB_BAUD SB_BAUD1152 /* Start with this if not given */ +#endif + +#ifndef CONFIG_X86_TSC +#undef rdtsc +#define rdtsc(a,b) if (a++ > 10000){a = 0; b++;} +#undef rdtscll +#define rdtscll(s) s++ +#endif + +#ifdef _raw_read_unlock /* must use a name that is "define"ed, not an inline */ +#undef spin_lock +#undef spin_trylock +#undef spin_unlock +#define spin_lock _raw_spin_lock +#define spin_trylock _raw_spin_trylock +#define spin_unlock _raw_spin_unlock +#else +#endif +#undef spin_unlock_wait +#define spin_unlock_wait(x) do { cpu_relax(); barrier();} \ + while(spin_is_locked(x)) + +#define SB_IER 1 +#define SB_MCR UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS + +#define FLAGS 0 +#define SB_STATE { \ + magic: SSTATE_MAGIC, \ + baud_base: SB_BASE, \ + port: PORT, \ + irq: IRQ, \ + flags: FLAGS, \ + custom_divisor:SB_BAUD} +#define SB_INFO { \ + magic: SERIAL_MAGIC, \ + port: PORT,0,FLAGS, \ + state: &state, \ + tty: (struct tty_struct *)&state, \ + IER: SB_IER, \ + MCR: SB_MCR} +extern void putDebugChar(int); +/* RTAI support needs us to really stop/start interrupts */ + +#define kgdb_sti() __asm__ __volatile__("sti": : :"memory") +#define kgdb_cli() __asm__ __volatile__("cli": : :"memory") +#define kgdb_local_save_flags(x) __asm__ __volatile__(\ + "pushfl ; popl %0":"=g" (x): /* no input */) +#define kgdb_local_irq_restore(x) __asm__ __volatile__(\ + "pushl %0 ; popfl": \ + /* no output */ :"g" (x):"memory", "cc") +#define kgdb_local_irq_save(x) kgdb_local_save_flags(x); kgdb_cli() + +#ifdef CONFIG_SERIAL +extern void shutdown_for_kgdb(struct async_struct *info); +#endif +#define INIT_KDEBUG putDebugChar("+"); +#endif /* __KGDB_LOCAL */ --- linux-2.6.6-rc1/include/asm-i386/kmap_types.h 2003-11-09 16:45:05.000000000 -0800 +++ 25/include/asm-i386/kmap_types.h 2004-04-18 22:25:33.696707800 -0700 @@ -19,12 +19,11 @@ D(5) KM_BIO_SRC_IRQ, D(6) KM_BIO_DST_IRQ, D(7) KM_PTE0, D(8) KM_PTE1, -D(9) KM_PTE2, -D(10) KM_IRQ0, -D(11) KM_IRQ1, -D(12) KM_SOFTIRQ0, -D(13) KM_SOFTIRQ1, -D(14) KM_TYPE_NR +D(9) KM_IRQ0, +D(10) KM_IRQ1, +D(11) KM_SOFTIRQ0, +D(12) KM_SOFTIRQ1, +D(13) KM_TYPE_NR }; #undef D --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-i386/lockmeter.h 2004-04-18 22:25:47.919545600 -0700 @@ -0,0 +1,115 @@ +/* + * Copyright (C) 1999,2000 Silicon Graphics, Inc. + * + * Written by John Hawkes (hawkes@sgi.com) + * Based on klstat.h by Jack Steiner (steiner@sgi.com) + * + * Modified by Ray Bryant (raybry@us.ibm.com) + * Changes Copyright (C) 2000 IBM, Inc. + * Added save of index in spinlock_t to improve efficiency + * of "hold" time reporting for spinlocks. + * Added support for hold time statistics for read and write + * locks. + * Moved machine dependent code here from include/lockmeter.h. + * + */ + +#ifndef _I386_LOCKMETER_H +#define _I386_LOCKMETER_H + +#include +#include + +#include + +#ifdef __KERNEL__ +extern unsigned long cpu_khz; +#define CPU_CYCLE_FREQUENCY (cpu_khz * 1000) +#else +#define CPU_CYCLE_FREQUENCY 450000000 +#endif + +#define THIS_CPU_NUMBER smp_processor_id() + +/* + * macros to cache and retrieve an index value inside of a spin lock + * these macros assume that there are less than 65536 simultaneous + * (read mode) holders of a rwlock. Not normally a problem!! + * we also assume that the hash table has less than 65535 entries. + */ +/* + * instrumented spinlock structure -- never used to allocate storage + * only used in macros below to overlay a spinlock_t + */ +typedef struct inst_spinlock_s { + /* remember, Intel is little endian */ + unsigned short lock; + unsigned short index; +} inst_spinlock_t; +#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv +#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index + +/* + * macros to cache and retrieve an index value in a read/write lock + * as well as the cpu where a reader busy period started + * we use the 2nd word (the debug word) for this, so require the + * debug word to be present + */ +/* + * instrumented rwlock structure -- never used to allocate storage + * only used in macros below to overlay a rwlock_t + */ +typedef struct inst_rwlock_s { + volatile int lock; + unsigned short index; + unsigned short cpu; +} inst_rwlock_t; +#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv +#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index +#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv +#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu + +/* + * return the number of readers for a rwlock_t + */ +#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr) + +extern inline int rwlock_readers(rwlock_t *rwlock_ptr) +{ + int tmp = (int) rwlock_ptr->lock; + /* read and write lock attempts may cause the lock value to temporarily */ + /* be negative. Until it is >= 0 we know nothing (i. e. can't tell if */ + /* is -1 because it was write locked and somebody tried to read lock it */ + /* or if it is -1 because it was read locked and somebody tried to write*/ + /* lock it. ........................................................... */ + do { + tmp = (int) rwlock_ptr->lock; + } while (tmp < 0); + if (tmp == 0) return(0); + else return(RW_LOCK_BIAS-tmp); +} + +/* + * return true if rwlock is write locked + * (note that other lock attempts can cause the lock value to be negative) + */ +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock <= 0) +#define IABS(x) ((x) > 0 ? (x) : -(x)) +#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((IABS((rwlock_ptr)->lock) % RW_LOCK_BIAS) != 0) + +/* this is a lot of typing just to get gcc to emit "rdtsc" */ +static inline long long get_cycles64 (void) +{ + union longlong_u { + long long intlong; + struct intint_s { + uint32_t eax; + uint32_t edx; + } intint; + } longlong; + + rdtsc(longlong.intint.eax,longlong.intint.edx); + return longlong.intlong; +} + +#endif /* _I386_LOCKMETER_H */ --- linux-2.6.6-rc1/include/asm-i386/param.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/include/asm-i386/param.h 2004-04-18 22:25:38.442986256 -0700 @@ -4,7 +4,9 @@ #ifdef __KERNEL__ # define HZ 1000 /* Internal kernel timer frequency */ # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ -# define CLOCKS_PER_SEC (USER_HZ) /* like times() */ +# define CLOCKS_PER_SEC (USER_HZ) /* like times() */ +# define JIFFIES_TO_MSEC(x) (x) +# define MSEC_TO_JIFFIES(x) (x) #endif #ifndef HZ @@ -18,5 +20,6 @@ #endif #define MAXHOSTNAMELEN 64 /* max length of hostname */ +#define COMMAND_LINE_SIZE 256 #endif --- linux-2.6.6-rc1/include/asm-i386/pgtable.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-i386/pgtable.h 2004-04-18 22:25:33.696707800 -0700 @@ -314,18 +314,6 @@ static inline pte_t pte_modify(pte_t pte #define pte_unmap_nested(pte) do { } while (0) #endif -#if defined(CONFIG_HIGHPTE) && defined(CONFIG_HIGHMEM4G) -typedef u32 pte_addr_t; -#endif - -#if defined(CONFIG_HIGHPTE) && defined(CONFIG_HIGHMEM64G) -typedef u64 pte_addr_t; -#endif - -#if !defined(CONFIG_HIGHPTE) -typedef pte_t *pte_addr_t; -#endif - /* * The i386 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. --- linux-2.6.6-rc1/include/asm-i386/processor.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-i386/processor.h 2004-04-18 22:25:38.442986256 -0700 @@ -648,4 +648,9 @@ extern inline void prefetchw(const void extern void select_idle_routine(const struct cpuinfo_x86 *c); +#ifdef CONFIG_SCHED_SMT +#define ARCH_HAS_SCHED_DOMAIN +#define ARCH_HAS_SCHED_WAKE_IDLE +#endif + #endif /* __ASM_I386_PROCESSOR_H */ --- linux-2.6.6-rc1/include/asm-i386/rmap.h 2003-06-14 12:18:24.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,21 +0,0 @@ -#ifndef _I386_RMAP_H -#define _I386_RMAP_H - -/* nothing to see, move along */ -#include - -#ifdef CONFIG_HIGHPTE -static inline pte_t *rmap_ptep_map(pte_addr_t pte_paddr) -{ - unsigned long pfn = (unsigned long)(pte_paddr >> PAGE_SHIFT); - unsigned long off = ((unsigned long)pte_paddr) & ~PAGE_MASK; - return (pte_t *)((char *)kmap_atomic(pfn_to_page(pfn), KM_PTE2) + off); -} - -static inline void rmap_ptep_unmap(pte_t *pte) -{ - kunmap_atomic(pte, KM_PTE2); -} -#endif - -#endif --- linux-2.6.6-rc1/include/asm-i386/rwlock.h 2003-11-09 16:45:05.000000000 -0800 +++ 25/include/asm-i386/rwlock.h 2004-04-18 22:25:34.744548504 -0700 @@ -20,28 +20,52 @@ #define RW_LOCK_BIAS 0x01000000 #define RW_LOCK_BIAS_STR "0x01000000" -#define __build_read_lock_ptr(rw, helper) \ - asm volatile(LOCK "subl $1,(%0)\n\t" \ - "js 2f\n" \ - "1:\n" \ - LOCK_SECTION_START("") \ - "2:\tcall " helper "\n\t" \ - "jmp 1b\n" \ - LOCK_SECTION_END \ - ::"a" (rw) : "memory") - -#define __build_read_lock_const(rw, helper) \ - asm volatile(LOCK "subl $1,%0\n\t" \ - "js 2f\n" \ - "1:\n" \ - LOCK_SECTION_START("") \ - "2:\tpushl %%eax\n\t" \ - "leal %0,%%eax\n\t" \ - "call " helper "\n\t" \ - "popl %%eax\n\t" \ - "jmp 1b\n" \ - LOCK_SECTION_END \ - :"=m" (*(volatile int *)rw) : : "memory") +#ifdef CONFIG_SPINLINE + + #define __build_read_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $1,(%0)\n\t" \ + "jns 1f\n\t" \ + "call " helper "\n\t" \ + "1:\t" \ + ::"a" (rw) : "memory") + + #define __build_read_lock_const(rw, helper) \ + asm volatile(LOCK "subl $1,%0\n\t" \ + "jns 1f\n\t" \ + "pushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "1:\t" \ + :"=m" (*(volatile int *)rw) : : "memory") + +#else /* !CONFIG_SPINLINE */ + + #define __build_read_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $1,(%0)\n\t" \ + "js 2f\n" \ + "1:\n" \ + LOCK_SECTION_START("") \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + LOCK_SECTION_END \ + ::"a" (rw) : "memory") + + #define __build_read_lock_const(rw, helper) \ + asm volatile(LOCK "subl $1,%0\n\t" \ + "js 2f\n" \ + "1:\n" \ + LOCK_SECTION_START("") \ + "2:\tpushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "jmp 1b\n" \ + LOCK_SECTION_END \ + :"=m" (*(volatile int *)rw) : : "memory") + +#endif /* CONFIG_SPINLINE */ + #define __build_read_lock(rw, helper) do { \ if (__builtin_constant_p(rw)) \ @@ -50,28 +74,51 @@ __build_read_lock_ptr(rw, helper); \ } while (0) -#define __build_write_lock_ptr(rw, helper) \ - asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ - "jnz 2f\n" \ - "1:\n" \ - LOCK_SECTION_START("") \ - "2:\tcall " helper "\n\t" \ - "jmp 1b\n" \ - LOCK_SECTION_END \ - ::"a" (rw) : "memory") - -#define __build_write_lock_const(rw, helper) \ - asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \ - "jnz 2f\n" \ - "1:\n" \ - LOCK_SECTION_START("") \ - "2:\tpushl %%eax\n\t" \ - "leal %0,%%eax\n\t" \ - "call " helper "\n\t" \ - "popl %%eax\n\t" \ - "jmp 1b\n" \ - LOCK_SECTION_END \ - :"=m" (*(volatile int *)rw) : : "memory") +#ifdef CONFIG_SPINLINE + + #define __build_write_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jz 1f\n\t" \ + "call " helper "\n\t" \ + "1:\n" \ + ::"a" (rw) : "memory") + + #define __build_write_lock_const(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \ + "jz 1f\n\t" \ + "pushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "1:\n" \ + :"=m" (*(volatile int *)rw) : : "memory") + +#else /* !CONFIG_SPINLINE */ + + #define __build_write_lock_ptr(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + LOCK_SECTION_START("") \ + "2:\tcall " helper "\n\t" \ + "jmp 1b\n" \ + LOCK_SECTION_END \ + ::"a" (rw) : "memory") + + #define __build_write_lock_const(rw, helper) \ + asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",%0\n\t" \ + "jnz 2f\n" \ + "1:\n" \ + LOCK_SECTION_START("") \ + "2:\tpushl %%eax\n\t" \ + "leal %0,%%eax\n\t" \ + "call " helper "\n\t" \ + "popl %%eax\n\t" \ + "jmp 1b\n" \ + LOCK_SECTION_END \ + :"=m" (*(volatile int *)rw) : : "memory") + +#endif /* CONFIG_SPINLINE */ #define __build_write_lock(rw, helper) do { \ if (__builtin_constant_p(rw)) \ --- linux-2.6.6-rc1/include/asm-i386/smp.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/include/asm-i386/smp.h 2004-04-18 22:25:36.871225200 -0700 @@ -34,7 +34,7 @@ extern void smp_alloc_memory(void); extern int pic_mode; extern int smp_num_siblings; -extern int cpu_sibling_map[]; +extern cpumask_t cpu_sibling_map[]; extern void smp_flush_tlb(void); extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); --- linux-2.6.6-rc1/include/asm-i386/spinlock.h 2004-03-10 20:41:30.000000000 -0800 +++ 25/include/asm-i386/spinlock.h 2004-04-18 22:25:47.920545448 -0700 @@ -43,18 +43,35 @@ typedef struct { #define spin_is_locked(x) (*(volatile signed char *)(&(x)->lock) <= 0) #define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) -#define spin_lock_string \ - "\n1:\t" \ - "lock ; decb %0\n\t" \ - "js 2f\n" \ - LOCK_SECTION_START("") \ - "2:\t" \ - "rep;nop\n\t" \ - "cmpb $0,%0\n\t" \ - "jle 2b\n\t" \ - "jmp 1b\n" \ - LOCK_SECTION_END +#ifdef CONFIG_SPINLINE + #define spin_lock_string \ + "\n1:\t" \ + "lock ; decb %0\n\t" \ + "js 2f\n" \ + "jmp 3f\n" \ + "2:\t" \ + "rep;nop\n\t" \ + "cmpb $0,%0\n\t" \ + "jle 2b\n\t" \ + "jmp 1b\n" \ + "3:\t" + +#else /* !CONFIG_SPINLINE */ + + #define spin_lock_string \ + "\n1:\t" \ + "lock ; decb %0\n\t" \ + "js 2f\n" \ + LOCK_SECTION_START("") \ + "2:\t" \ + "rep;nop\n\t" \ + "cmpb $0,%0\n\t" \ + "jle 2b\n\t" \ + "jmp 1b\n" \ + LOCK_SECTION_END + +#endif /* CONFIG_SPINLINE */ /* * This works. Despite all the confusion. * (except on PPro SMP or if we are using OOSTORE) @@ -138,6 +155,11 @@ here: */ typedef struct { volatile unsigned int lock; +#ifdef CONFIG_LOCKMETER + /* required for LOCKMETER since all bits in lock are used */ + /* and we need this storage for CPU and lock INDEX */ + unsigned lockmeter_magic; +#endif #ifdef CONFIG_DEBUG_SPINLOCK unsigned magic; #endif @@ -145,11 +167,19 @@ typedef struct { #define RWLOCK_MAGIC 0xdeaf1eed +#ifdef CONFIG_LOCKMETER +#ifdef CONFIG_DEBUG_SPINLOCK +#define RWLOCK_MAGIC_INIT , 0, RWLOCK_MAGIC +#else +#define RWLOCK_MAGIC_INIT , 0 +#endif +#else /* !CONFIG_LOCKMETER */ #ifdef CONFIG_DEBUG_SPINLOCK #define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC #else #define RWLOCK_MAGIC_INIT /* */ #endif +#endif /* !CONFIG_LOCKMETER */ #define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT } @@ -196,4 +226,60 @@ static inline int _raw_write_trylock(rwl return 0; } +#ifdef CONFIG_LOCKMETER +static inline int _raw_read_trylock(rwlock_t *lock) +{ +/* FIXME -- replace with assembler */ + atomic_t *count = (atomic_t *)lock; + atomic_dec(count); + if (count->counter > 0) + return 1; + atomic_inc(count); + return 0; +} +#endif + +#if defined(CONFIG_LOCKMETER) && defined(CONFIG_HAVE_DEC_LOCK) +extern void _metered_spin_lock (spinlock_t *lock); +extern void _metered_spin_unlock(spinlock_t *lock); + +/* + * Matches what is in arch/i386/lib/dec_and_lock.c, except this one is + * "static inline" so that the spin_lock(), if actually invoked, is charged + * against the real caller, not against the catch-all atomic_dec_and_lock + */ +static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +{ + int counter; + int newcount; + +repeat: + counter = atomic_read(atomic); + newcount = counter-1; + + if (!newcount) + goto slow_path; + + asm volatile("lock; cmpxchgl %1,%2" + :"=a" (newcount) + :"r" (newcount), "m" (atomic->counter), "0" (counter)); + + /* If the above failed, "eax" will have changed */ + if (newcount != counter) + goto repeat; + return 0; + +slow_path: + preempt_disable(); + _metered_spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + _metered_spin_unlock(lock); + preempt_enable(); + return 0; +} + +#define ATOMIC_DEC_AND_LOCK +#endif + #endif /* __ASM_SPINLOCK_H */ --- linux-2.6.6-rc1/include/asm-ia64/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-ia64/ide.h 2004-04-18 22:26:01.785437664 -0700 @@ -53,41 +53,11 @@ static inline unsigned long ide_default_ } } -static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -static __inline__ void -ide_init_default_hwifs (void) -{ -#ifndef CONFIG_PCI - hw_regs_t hw; - int index; - - for(index = 0; index < MAX_HWIFS; index++) { - memset(&hw, 0, sizeof hw); - ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); - hw.irq = ide_default_irq(ide_default_io_base(index)); - ide_register_hw(&hw, NULL); - } +#ifdef CONFIG_PCI +#define ide_init_default_irq(base) (0) +#else +#define ide_init_default_irq(base) ide_default_irq(base) #endif -} #include --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-ia64/lockmeter.h 2004-04-18 22:25:47.920545448 -0700 @@ -0,0 +1,72 @@ +/* + * Copyright (C) 1999,2000 Silicon Graphics, Inc. + * + * Written by John Hawkes (hawkes@sgi.com) + * Based on klstat.h by Jack Steiner (steiner@sgi.com) + */ + +#ifndef _IA64_LOCKMETER_H +#define _IA64_LOCKMETER_H + +#ifdef local_cpu_data +#define CPU_CYCLE_FREQUENCY local_cpu_data->itc_freq +#else +#define CPU_CYCLE_FREQUENCY my_cpu_data.itc_freq +#endif +#define get_cycles64() get_cycles() + +#define THIS_CPU_NUMBER smp_processor_id() + +/* + * macros to cache and retrieve an index value inside of a lock + * these macros assume that there are less than 65536 simultaneous + * (read mode) holders of a rwlock. + * we also assume that the hash table has less than 32767 entries. + */ +/* + * instrumented spinlock structure -- never used to allocate storage + * only used in macros below to overlay a spinlock_t + */ +typedef struct inst_spinlock_s { + /* remember, Intel is little endian */ + volatile unsigned short lock; + volatile unsigned short index; +} inst_spinlock_t; +#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv +#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index + +/* + * macros to cache and retrieve an index value in a read/write lock + * as well as the cpu where a reader busy period started + * we use the 2nd word (the debug word) for this, so require the + * debug word to be present + */ +/* + * instrumented rwlock structure -- never used to allocate storage + * only used in macros below to overlay a rwlock_t + */ +typedef struct inst_rwlock_s { + volatile int read_counter:31; + volatile int write_lock:1; + volatile unsigned short index; + volatile unsigned short cpu; +} inst_rwlock_t; +#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv +#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index +#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv +#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu + +/* + * return the number of readers for a rwlock_t + */ +#define RWLOCK_READERS(rwlock_ptr) ((rwlock_ptr)->read_counter) + +/* + * return true if rwlock is write locked + * (note that other lock attempts can cause the lock value to be negative) + */ +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->write_lock) +#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((rwlock_ptr)->read_counter) + +#endif /* _IA64_LOCKMETER_H */ + --- linux-2.6.6-rc1/include/asm-ia64/pgtable.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-ia64/pgtable.h 2004-04-18 22:25:33.830687432 -0700 @@ -102,7 +102,7 @@ * can map. */ #define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3)) -#define PMD_SIZE (__IA64_UL(1) << PMD_SHIFT) +#define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) #define PTRS_PER_PMD (__IA64_UL(1) << (PAGE_SHIFT-3)) @@ -469,8 +469,6 @@ extern void hugetlb_free_pgtables(struct struct vm_area_struct * prev, unsigned long start, unsigned long end); #endif -typedef pte_t *pte_addr_t; - /* * IA-64 doesn't have any external MMU info: the page tables contain all the necessary * information. However, we use this routine to take care of any (delayed) i-cache --- linux-2.6.6-rc1/include/asm-ia64/rmap.h 2003-06-14 12:18:09.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _ASM_IA64_RMAP_H -#define _ASM_IA64_RMAP_H - -/* nothing to see, move along */ -#include - -#endif /* _ASM_IA64_RMAP_H */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-ia64/setup.h 2004-04-18 22:25:35.906371880 -0700 @@ -0,0 +1,6 @@ +#ifndef __IA64_SETUP_H +#define __IA64_SETUP_H + +#define COMMAND_LINE_SIZE 512 + +#endif --- linux-2.6.6-rc1/include/asm-ia64/spinlock.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/asm-ia64/spinlock.h 2004-04-18 22:25:47.921545296 -0700 @@ -110,8 +110,18 @@ do { \ typedef struct { volatile int read_counter : 31; volatile int write_lock : 1; +#ifdef CONFIG_LOCKMETER + /* required for LOCKMETER since all bits in lock are used */ + /* and we need this storage for CPU and lock INDEX */ + unsigned lockmeter_magic; +#endif } rwlock_t; + +#ifdef CONFIG_LOCKMETER +#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0 } +#else #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } +#endif #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) #define rwlock_is_locked(x) (*(volatile int *) (x) != 0) @@ -127,6 +137,48 @@ do { \ } \ } while (0) +#ifdef CONFIG_LOCKMETER +/* + * HACK: This works, but still have a timing window that affects performance: + * we see that no one owns the Write lock, then someone * else grabs for Write + * lock before we do a read_lock(). + * This means that on rare occasions our read_lock() will stall and spin-wait + * until we acquire for Read, instead of simply returning a trylock failure. + */ +static inline int _raw_read_trylock(rwlock_t *rw) +{ + if (rw->write_lock) { + return 0; + } else { + _raw_read_lock(rw); + return 1; + } +} + +static inline int _raw_write_trylock(rwlock_t *rw) +{ + if (!(rw->write_lock)) { + /* isn't currently write-locked... that looks promising... */ + if (test_and_set_bit(31, rw) == 0) { + /* now it is write-locked by me... */ + if (rw->read_counter) { + /* really read-locked, so release write-lock and fail */ + clear_bit(31, rw); + } else { + /* we've the the write-lock, no read-lockers... success! */ + barrier(); + return 1; + } + + } + } + + /* falls through ... fails to write-lock */ + barrier(); + return 0; +} +#endif + #define _raw_read_unlock(rw) \ do { \ rwlock_t *__read_lock_ptr = (rw); \ @@ -190,4 +242,25 @@ do { \ clear_bit(31, (x)); \ }) +#ifdef CONFIG_LOCKMETER +extern void _metered_spin_lock (spinlock_t *lock); +extern void _metered_spin_unlock(spinlock_t *lock); + +/* + * Use a less efficient, and inline, atomic_dec_and_lock() if lockmetering + * so we can see the callerPC of who is actually doing the spin_lock(). + * Otherwise, all we see is the generic rollup of all locks done by + * atomic_dec_and_lock(). + */ +static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +{ + _metered_spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + _metered_spin_unlock(lock); + return 0; +} +#define ATOMIC_DEC_AND_LOCK +#endif + #endif /* _ASM_IA64_SPINLOCK_H */ --- linux-2.6.6-rc1/include/asm-ia64/tlb.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-ia64/tlb.h 2004-04-18 22:25:38.285010272 -0700 @@ -211,6 +211,8 @@ __tlb_remove_tlb_entry (struct mmu_gathe tlb->end_addr = address + PAGE_SIZE; } +#define tlb_migrate_prepare(mm) flush_tlb_mm(mm) + #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) --- linux-2.6.6-rc1/include/asm-ia64/unistd.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/asm-ia64/unistd.h 2004-04-18 22:25:48.698427192 -0700 @@ -248,9 +248,9 @@ #define __NR_clock_nanosleep 1256 #define __NR_fstatfs64 1257 #define __NR_statfs64 1258 -#define __NR_reserved1 1259 /* reserved for NUMA interface */ -#define __NR_reserved2 1260 /* reserved for NUMA interface */ -#define __NR_reserved3 1261 /* reserved for NUMA interface */ +#define __NR_mbind 1259 +#define __NR_get_mempolicy 1260 +#define __NR_set_mempolicy 1261 #ifdef __KERNEL__ --- linux-2.6.6-rc1/include/asm-m68k/ide.h 2003-06-14 12:17:56.000000000 -0700 +++ 25/include/asm-m68k/ide.h 2004-04-18 22:26:01.551473232 -0700 @@ -74,13 +74,7 @@ static __inline__ void ide_init_hwif_por printk("ide_init_hwif_ports: must not be called\n"); } -/* - * This registers the standard ports for this architecture with the IDE - * driver. - */ -static __inline__ void ide_init_default_hwifs(void) -{ -} +#define ide_init_default_irq(base) (0) /* * Get rid of defs from io.h - ide has its private and conflicting versions --- linux-2.6.6-rc1/include/asm-m68knommu/ide.h 2003-06-14 12:18:29.000000000 -0700 +++ 25/include/asm-m68knommu/ide.h 2004-04-18 22:26:01.551473232 -0700 @@ -139,28 +139,7 @@ static IDE_INLINE void ide_init_hwif_por } } - -/* - * This registers the standard ports for this architecture with the IDE - * driver. - */ -static IDE_INLINE void ide_init_default_hwifs(void) -{ - hw_regs_t hw; - ide_ioreg_t base; - int index; - - for (index = 0; index < MAX_HWIFS; index++) { - base = ide_default_io_base(index); - if (!base) - continue; - memset(&hw, 0, sizeof(hw)); - ide_init_hwif_ports(&hw, base, 0, NULL); - hw.irq = ide_default_irq(base); - ide_register_hw(&hw, NULL); - } -} - +#define ide_init_default_irq(base) ide_default_irq(base) static IDE_INLINE int ide_request_irq( --- linux-2.6.6-rc1/include/asm-m68knommu/pgtable.h 2003-06-14 12:18:23.000000000 -0700 +++ 25/include/asm-m68knommu/pgtable.h 2004-04-18 22:25:33.831687280 -0700 @@ -11,8 +11,6 @@ #include #include -typedef pte_t *pte_addr_t; - /* * Trivial page table functions. */ --- linux-2.6.6-rc1/include/asm-m68knommu/rmap.h 2003-06-14 12:18:07.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,2 +0,0 @@ -/* Do not need anything here */ - --- linux-2.6.6-rc1/include/asm-m68knommu/setup.h 2003-06-14 12:18:22.000000000 -0700 +++ 25/include/asm-m68knommu/setup.h 2004-04-18 22:25:35.906371880 -0700 @@ -1 +1,5 @@ #include + +/* We have a bigger command line buffer. */ +#undef COMMAND_LINE_SIZE +#define COMMAND_LINE_SIZE 512 --- linux-2.6.6-rc1/include/asm-m68k/pgtable.h 2004-02-03 20:42:38.000000000 -0800 +++ 25/include/asm-m68k/pgtable.h 2004-04-18 22:25:33.832687128 -0700 @@ -168,8 +168,6 @@ static inline void update_mmu_cache(stru ? (__pgprot((pgprot_val(prot) & _CACHEMASK040) | _PAGE_NOCACHE_S)) \ : (prot))) -typedef pte_t *pte_addr_t; - #endif /* !__ASSEMBLY__ */ /* --- linux-2.6.6-rc1/include/asm-m68k/rmap.h 2003-06-14 12:18:34.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _M68K_RMAP_H -#define _M68K_RMAP_H - -/* nothing to see, move along */ -#include - -#endif --- linux-2.6.6-rc1/include/asm-m68k/setup.h 2003-06-14 12:18:24.000000000 -0700 +++ 25/include/asm-m68k/setup.h 2004-04-18 22:25:35.907371728 -0700 @@ -357,6 +357,7 @@ extern int m68k_is040or060; #define NUM_MEMINFO 4 #define CL_SIZE 256 +#define COMMAND_LINE_SIZE CL_SIZE #ifndef __ASSEMBLY__ extern int m68k_num_memory; /* # of memory blocks found (and used) */ --- linux-2.6.6-rc1/include/asm-mips/bootinfo.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-mips/bootinfo.h 2004-04-18 22:25:35.907371728 -0700 @@ -12,6 +12,7 @@ #define _ASM_BOOTINFO_H #include +#include /* * The MACH_GROUP_ IDs are the equivalent to PCI vendor IDs; the remaining @@ -208,7 +209,7 @@ #define MACH_GROUP_TITAN 22 /* PMC-Sierra Titan */ #define MACH_TITAN_YOSEMITE 1 /* PMC-Sierra Yosemite */ -#define CL_SIZE (256) +#define CL_SIZE COMMAND_LINE_SIZE const char *get_system_type(void); --- linux-2.6.6-rc1/include/asm-mips/kmap_types.h 2003-07-02 14:53:17.000000000 -0700 +++ 25/include/asm-mips/kmap_types.h 2004-04-18 22:25:33.832687128 -0700 @@ -19,12 +19,11 @@ D(5) KM_BIO_SRC_IRQ, D(6) KM_BIO_DST_IRQ, D(7) KM_PTE0, D(8) KM_PTE1, -D(9) KM_PTE2, -D(10) KM_IRQ0, -D(11) KM_IRQ1, -D(12) KM_SOFTIRQ0, -D(13) KM_SOFTIRQ1, -D(14) KM_TYPE_NR +D(9) KM_IRQ0, +D(10) KM_IRQ1, +D(11) KM_SOFTIRQ0, +D(12) KM_SOFTIRQ1, +D(13) KM_TYPE_NR }; #undef D --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-mips/lockmeter.h 2004-04-18 22:25:47.922545144 -0700 @@ -0,0 +1,126 @@ +/* + * Copyright (C) 1999,2000 Silicon Graphics, Inc. + * + * Written by John Hawkes (hawkes@sgi.com) + * Based on klstat.h by Jack Steiner (steiner@sgi.com) + * Ported to mips32 for Asita Technologies + * by D.J. Barrow ( dj.barrow@asitatechnologies.com ) + */ +#ifndef _ASM_LOCKMETER_H +#define _ASM_LOCKMETER_H + +/* do_gettimeoffset is a function pointer on mips */ +/* & it is not included by */ +#include +#include +#include + +#define SPINLOCK_MAGIC_INIT /* */ + +#define CPU_CYCLE_FREQUENCY get_cpu_cycle_frequency() + +#define THIS_CPU_NUMBER smp_processor_id() + +static uint32_t cpu_cycle_frequency = 0; + +static uint32_t get_cpu_cycle_frequency(void) +{ + /* a total hack, slow and invasive, but ... it works */ + int sec; + uint32_t start_cycles; + struct timeval tv; + + if (cpu_cycle_frequency == 0) { /* uninitialized */ + do_gettimeofday(&tv); + sec = tv.tv_sec; /* set up to catch the tv_sec rollover */ + while (sec == tv.tv_sec) { do_gettimeofday(&tv); } + sec = tv.tv_sec; /* rolled over to a new sec value */ + start_cycles = get_cycles(); + while (sec == tv.tv_sec) { do_gettimeofday(&tv); } + cpu_cycle_frequency = get_cycles() - start_cycles; + } + + return cpu_cycle_frequency; +} + +extern struct timeval xtime; + +static uint64_t get_cycles64(void) +{ + static uint64_t last_get_cycles64 = 0; + uint64_t ret; + unsigned long sec; + unsigned long usec, usec_offset; + +again: + sec = xtime.tv_sec; + usec = xtime.tv_usec; + usec_offset = do_gettimeoffset(); + if ((xtime.tv_sec != sec) || + (xtime.tv_usec != usec)|| + (usec_offset >= 20000)) + goto again; + + ret = ((uint64_t)(usec + usec_offset) * cpu_cycle_frequency); + /* We can't do a normal 64 bit division on mips without libgcc.a */ + do_div(ret,1000000); + ret += ((uint64_t)sec * cpu_cycle_frequency); + + /* XXX why does time go backwards? do_gettimeoffset? general time adj? */ + if (ret <= last_get_cycles64) + ret = last_get_cycles64+1; + last_get_cycles64 = ret; + + return ret; +} + +/* + * macros to cache and retrieve an index value inside of a lock + * these macros assume that there are less than 65536 simultaneous + * (read mode) holders of a rwlock. + * we also assume that the hash table has less than 32767 entries. + * the high order bit is used for write locking a rw_lock + */ +#define INDEX_MASK 0x7FFF0000 +#define READERS_MASK 0x0000FFFF +#define INDEX_SHIFT 16 +#define PUT_INDEX(lockp,index) \ + lockp->lock = (((lockp->lock) & ~INDEX_MASK) | (index) << INDEX_SHIFT) +#define GET_INDEX(lockp) \ + (((lockp->lock) & INDEX_MASK) >> INDEX_SHIFT) + +/* + * macros to cache and retrieve an index value in a read/write lock + * as well as the cpu where a reader busy period started + * we use the 2nd word (the debug word) for this, so require the + * debug word to be present + */ +/* + * instrumented rwlock structure -- never used to allocate storage + * only used in macros below to overlay a rwlock_t + */ +typedef struct inst_rwlock_s { + volatile int lock; + unsigned short index; + unsigned short cpu; +} inst_rwlock_t; +#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv +#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index +#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv +#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu + +/* + * return the number of readers for a rwlock_t + */ +#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr) + +extern inline int rwlock_readers(rwlock_t *rwlock_ptr) +{ + int tmp = (int) rwlock_ptr->lock; + return (tmp >= 0) ? tmp : 0; +} + +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock < 0) +#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock > 0) + +#endif /* _ASM_LOCKMETER_H */ --- linux-2.6.6-rc1/include/asm-mips/mach-generic/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-mips/mach-generic/ide.h 2004-04-18 22:26:01.785437664 -0700 @@ -48,39 +48,10 @@ static inline unsigned long ide_default_ } } -static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -static inline void ide_init_default_hwifs(void) -{ -#ifndef CONFIG_BLK_DEV_IDEPCI - hw_regs_t hw; - int index; - - for(index = 0; index < MAX_HWIFS; index++) { - memset(&hw, 0, sizeof hw); - ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); - hw.irq = ide_default_irq(ide_default_io_base(index)); - ide_register_hw(&hw, NULL); - } -#endif /* CONFIG_BLK_DEV_IDEPCI */ -} +#ifdef CONFIG_BLK_DEV_IDEPCI +#define ide_init_default_irq(base) (0) +#else +#define ide_init_default_irq(base) ide_default_irq(base) +#endif #endif /* __ASM_MACH_GENERIC_IDE_H */ --- linux-2.6.6-rc1/include/asm-mips/pgtable-32.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-mips/pgtable-32.h 2004-04-18 22:25:33.833686976 -0700 @@ -216,10 +216,4 @@ static inline pmd_t *pmd_offset(pgd_t *d #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#ifdef CONFIG_64BIT_PHYS_ADDR -typedef u64 pte_addr_t; -#else -typedef pte_t *pte_addr_t; -#endif - #endif /* _ASM_PGTABLE_32_H */ --- linux-2.6.6-rc1/include/asm-mips/pgtable-64.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-mips/pgtable-64.h 2004-04-18 22:25:33.833686976 -0700 @@ -214,6 +214,4 @@ static inline pte_t mk_swap_pte(unsigned #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -typedef pte_t *pte_addr_t; - #endif /* _ASM_PGTABLE_64_H */ --- linux-2.6.6-rc1/include/asm-mips/rmap.h 2003-07-02 14:53:17.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef __ASM_RMAP_H -#define __ASM_RMAP_H - -/* nothing to see, move along */ -#include - -#endif /* __ASM_RMAP_H */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-mips/setup.h 2004-04-18 22:25:35.907371728 -0700 @@ -0,0 +1,8 @@ +#ifdef __KERNEL__ +#ifndef _MIPS_SETUP_H +#define _MIPS_SETUP_H + +#define COMMAND_LINE_SIZE 256 + +#endif /* __SETUP_H */ +#endif /* __KERNEL__ */ --- linux-2.6.6-rc1/include/asm-mips/siginfo.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-mips/siginfo.h 2004-04-18 22:25:24.968034760 -0700 @@ -175,7 +175,7 @@ typedef struct siginfo32 { #undef SI_MESGQ #define SI_ASYNCIO -2 /* sent by AIO completion */ #define SI_TIMER __SI_CODE(__SI_TIMER,-3) /* sent by timer expiration */ -#define SI_MESGQ -4 /* sent by real time mesq state change */ +#define SI_MESGQ __SI_CODE(__SI_MESGQ,-4) /* sent by real time mesq state change */ #ifdef __KERNEL__ --- linux-2.6.6-rc1/include/asm-mips/spinlock.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-mips/spinlock.h 2004-04-18 22:25:47.922545144 -0700 @@ -91,9 +91,18 @@ static inline unsigned int _raw_spin_try typedef struct { volatile unsigned int lock; +#ifdef CONFIG_LOCKMETER + /* required for LOCKMETER since all bits in lock are used */ + /* and we need this storage for CPU and lock INDEX */ + unsigned lockmeter_magic; +#endif } rwlock_t; +#ifdef CONFIG_LOCKMETER +#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 } +#else #define RW_LOCK_UNLOCKED (rwlock_t) { 0 } +#endif #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0) --- linux-2.6.6-rc1/include/asm-parisc/cacheflush.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-parisc/cacheflush.h 2004-04-18 22:25:24.968034760 -0700 @@ -69,7 +69,9 @@ extern void __flush_dcache_page(struct p static inline void flush_dcache_page(struct page *page) { - if (page_mapping(page) && !mapping_mapped(page->mapping)) { + struct address_space *mapping = page_mapping(page); + + if (mapping && !mapping_mapped(mapping)) { set_bit(PG_dcache_dirty, &page->flags); } else { __flush_dcache_page(page); --- linux-2.6.6-rc1/include/asm-parisc/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-parisc/ide.h 2004-04-18 22:26:01.786437512 -0700 @@ -22,28 +22,7 @@ #define ide_default_irq(base) (0) #define ide_default_io_base(index) (0) -static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -/* There are no standard ports. */ -static inline void ide_init_default_hwifs(void) { ; } +#define ide_init_default_irq(base) (0) #define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) #define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) --- linux-2.6.6-rc1/include/asm-parisc/pgtable.h 2004-02-03 20:42:38.000000000 -0800 +++ 25/include/asm-parisc/pgtable.h 2004-04-18 22:25:33.834686824 -0700 @@ -450,8 +450,6 @@ static inline void ptep_mkdirty(pte_t *p #define pte_same(A,B) (pte_val(A) == pte_val(B)) -typedef pte_t *pte_addr_t; - #endif /* !__ASSEMBLY__ */ #define io_remap_page_range remap_page_range --- linux-2.6.6-rc1/include/asm-parisc/rmap.h 2003-06-14 12:18:34.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _PARISC_RMAP_H -#define _PARISC_RMAP_H - -/* nothing to see, move along */ -#include - -#endif --- linux-2.6.6-rc1/include/asm-parisc/setup.h 2003-06-14 12:18:24.000000000 -0700 +++ 25/include/asm-parisc/setup.h 2004-04-18 22:25:35.908371576 -0700 @@ -1,10 +1,6 @@ -/* - * Just a place holder. We don't want to have to test x86 before - * we include stuff - */ +#ifndef _PARISC_SETUP_H +#define _PARISC_SETUP_H -#ifndef _i386_SETUP_H -#define _i386_SETUP_H +#define COMMAND_LINE_SIZE 1024 - -#endif /* _i386_SETUP_H */ +#endif /* _PARISC_SETUP_H */ --- linux-2.6.6-rc1/include/asm-ppc64/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-ppc64/ide.h 2004-04-18 22:26:01.786437512 -0700 @@ -25,29 +25,7 @@ static inline int ide_default_irq(unsigned long base) { return 0; } static inline unsigned long ide_default_io_base(int index) { return 0; } -static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -static __inline__ void ide_init_default_hwifs(void) -{ -} +#define ide_init_default_irq(base) (0) #endif /* __KERNEL__ */ --- linux-2.6.6-rc1/include/asm-ppc64/machdep.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-ppc64/machdep.h 2004-04-18 22:25:35.908371576 -0700 @@ -11,6 +11,7 @@ #include #include +#include #include struct pt_regs; @@ -112,9 +113,7 @@ struct machdep_calls { }; extern struct machdep_calls ppc_md; -#define COMMAND_LINE_SIZE 512 extern char cmd_line[COMMAND_LINE_SIZE]; -extern char saved_command_line[COMMAND_LINE_SIZE]; /* Functions to produce codes on the leds. * The SRC code should be unique for the message category and should --- linux-2.6.6-rc1/include/asm-ppc64/pgalloc.h 2004-02-03 20:42:38.000000000 -0800 +++ 25/include/asm-ppc64/pgalloc.h 2004-04-18 22:26:02.845276544 -0700 @@ -48,28 +48,43 @@ pmd_free(pmd_t *pmd) pmd_populate_kernel(mm, pmd, page_address(pte_page)) static inline pte_t * -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) +pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + pte_t *pte; + pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + if (pte) { + struct page *ptepage = virt_to_page(pte); + ptepage->mapping = (void *) mm; + ptepage->index = address & PMD_MASK; + } + return pte; } static inline struct page * pte_alloc_one(struct mm_struct *mm, unsigned long address) { - pte_t *pte = pte_alloc_one_kernel(mm, address); - - if (pte) - return virt_to_page(pte); - + pte_t *pte; + pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + if (pte) { + struct page *ptepage = virt_to_page(pte); + ptepage->mapping = (void *) mm; + ptepage->index = address & PMD_MASK; + return ptepage; + } return NULL; } static inline void pte_free_kernel(pte_t *pte) { + virt_to_page(pte)->mapping = NULL; kmem_cache_free(zero_cache, pte); } -#define pte_free(pte_page) pte_free_kernel(page_address(pte_page)) +static inline void pte_free(struct page *ptepage) +{ + ptepage->mapping = NULL; + kmem_cache_free(zero_cache, page_address(ptepage)); +} struct pte_freelist_batch { @@ -86,33 +101,7 @@ extern void pte_free_submit(struct pte_f DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) -{ - /* This is safe as we are holding page_table_lock */ - cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); - struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); - - if (atomic_read(&tlb->mm->mm_users) < 2 || - cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { - pte_free(ptepage); - return; - } - - if (*batchp == NULL) { - *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); - if (*batchp == NULL) { - pte_free_now(ptepage); - return; - } - (*batchp)->index = 0; - } - (*batchp)->pages[(*batchp)->index++] = ptepage; - if ((*batchp)->index == PTE_FREELIST_SIZE) { - pte_free_submit(*batchp); - *batchp = NULL; - } -} - +void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage); #define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd)) #define check_pgt_cache() do { } while (0) --- linux-2.6.6-rc1/include/asm-ppc64/pgtable.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-ppc64/pgtable.h 2004-04-18 22:25:33.835686672 -0700 @@ -313,7 +313,9 @@ static inline int ptep_test_and_clear_yo { unsigned long old; - old = pte_update(ptep, _PAGE_ACCESSED | _PAGE_HPTEFLAGS); + if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) + return 0; + old = pte_update(ptep, _PAGE_ACCESSED); if (old & _PAGE_HASHPTE) { hpte_update(ptep, old, 0); flush_tlb_pending(); /* XXX generic code doesn't flush */ @@ -326,12 +328,13 @@ static inline int ptep_test_and_clear_yo * moment we always flush but we need to fix hpte_update and test if the * optimisation is worth it. */ -#if 1 static inline int ptep_test_and_clear_dirty(pte_t *ptep) { unsigned long old; - old = pte_update(ptep, _PAGE_DIRTY | _PAGE_HPTEFLAGS); + if ((pte_val(*ptep) & _PAGE_DIRTY) == 0) + return 0; + old = pte_update(ptep, _PAGE_DIRTY); if (old & _PAGE_HASHPTE) hpte_update(ptep, old, 0); return (old & _PAGE_DIRTY) != 0; @@ -341,7 +344,9 @@ static inline void ptep_set_wrprotect(pt { unsigned long old; - old = pte_update(ptep, _PAGE_RW | _PAGE_HPTEFLAGS); + if ((pte_val(*ptep) & _PAGE_RW) == 0) + return; + old = pte_update(ptep, _PAGE_RW); if (old & _PAGE_HASHPTE) hpte_update(ptep, old, 0); } @@ -358,7 +363,6 @@ static inline void ptep_set_wrprotect(pt #define ptep_clear_flush_young(__vma, __address, __ptep) \ ({ \ int __young = ptep_test_and_clear_young(__ptep); \ - flush_tlb_page(__vma, __address); \ __young; \ }) @@ -370,27 +374,6 @@ static inline void ptep_set_wrprotect(pt __dirty; \ }) -#else -static inline int ptep_test_and_clear_dirty(pte_t *ptep) -{ - unsigned long old; - - old = pte_update(ptep, _PAGE_DIRTY); - if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0) - hpte_update(ptep, old, 1); - return (old & _PAGE_DIRTY) != 0; -} - -static inline void ptep_set_wrprotect(pte_t *ptep) -{ - unsigned long old; - - old = pte_update(ptep, _PAGE_RW); - if ((~old & (_PAGE_HASHPTE | _PAGE_RW | _PAGE_DIRTY)) == 0) - hpte_update(ptep, old, 1); -} -#endif - static inline pte_t ptep_get_and_clear(pte_t *ptep) { unsigned long old = pte_update(ptep, ~0UL); @@ -488,8 +471,6 @@ extern struct vm_struct * im_get_area(un int region_type); unsigned long im_free(void *addr); -typedef pte_t *pte_addr_t; - long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, int secondary, unsigned long hpteflags, --- linux-2.6.6-rc1/include/asm-ppc64/processor.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-ppc64/processor.h 2004-04-18 22:25:39.503824984 -0700 @@ -623,6 +623,11 @@ static inline void prefetchw(const void #define spin_lock_prefetch(x) prefetchw(x) +#ifdef CONFIG_SCHED_SMT +#define ARCH_HAS_SCHED_DOMAIN +#define ARCH_HAS_SCHED_WAKE_BALANCE +#endif + #endif /* ASSEMBLY */ #endif /* __ASM_PPC64_PROCESSOR_H */ --- linux-2.6.6-rc1/include/asm-ppc64/rmap.h 2003-06-14 12:18:51.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,9 +0,0 @@ -#ifndef _PPC64_RMAP_H -#define _PPC64_RMAP_H - -/* PPC64 calls pte_alloc() before mem_map[] is setup ... */ -#define BROKEN_PPC_PTE_ALLOC_ONE - -#include - -#endif --- linux-2.6.6-rc1/include/asm-ppc64/setup.h 2003-06-14 12:17:58.000000000 -0700 +++ 25/include/asm-ppc64/setup.h 2004-04-18 22:25:35.908371576 -0700 @@ -1,6 +1,6 @@ #ifndef _PPC_SETUP_H #define _PPC_SETUP_H -/* This is a place holder include */ +#define COMMAND_LINE_SIZE 512 #endif /* _PPC_SETUP_H */ --- linux-2.6.6-rc1/include/asm-ppc/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-ppc/ide.h 2004-04-18 22:26:01.786437512 -0700 @@ -57,50 +57,11 @@ static __inline__ unsigned long ide_defa return 0; } -/* - * This is only used for PC-style IDE controllers (e.g. as on PReP) - * or for PCI IDE devices, not for other types of IDE interface such - * as the pmac IDE interfaces. - */ -static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, - unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) - hw->io_ports[i] = reg++; - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = - hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; - if (ppc_ide_md.ide_init_hwif != NULL) - ppc_ide_md.ide_init_hwif(hw, data_port, ctrl_port, irq); -} - -static __inline__ void ide_init_default_hwifs(void) -{ -#ifndef CONFIG_PCI - hw_regs_t hw; - int index; - unsigned long base; - - for (index = 0; index < MAX_HWIFS; index++) { - base = ide_default_io_base(index); - if (base == 0) - continue; - ide_init_hwif_ports(&hw, base, 0, NULL); - hw.irq = ide_default_irq(base); - ide_register_hw(&hw, NULL); - } +#ifdef CONFIG_PCI +#define ide_init_default_irq(base) (0) +#else +#define ide_init_default_irq(base) ide_default_irq(base) #endif -} #if (defined CONFIG_APUS || defined CONFIG_BLK_DEV_MPC8xx_IDE ) #define IDE_ARCH_ACK_INTR 1 --- linux-2.6.6-rc1/include/asm-ppc/machdep.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/asm-ppc/machdep.h 2004-04-18 22:25:35.908371576 -0700 @@ -106,7 +106,6 @@ struct machdep_calls { }; extern struct machdep_calls ppc_md; -#define COMMAND_LINE_SIZE 512 extern char cmd_line[COMMAND_LINE_SIZE]; extern void setup_pci_ptrs(void); --- linux-2.6.6-rc1/include/asm-ppc/pgtable.h 2004-02-17 20:48:46.000000000 -0800 +++ 25/include/asm-ppc/pgtable.h 2004-04-18 22:25:33.836686520 -0700 @@ -670,8 +670,6 @@ extern void kernel_set_cachemode (unsign */ #define pgtable_cache_init() do { } while (0) -typedef pte_t *pte_addr_t; - #endif /* !__ASSEMBLY__ */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG --- linux-2.6.6-rc1/include/asm-ppc/rmap.h 2003-06-14 12:18:33.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,9 +0,0 @@ -#ifndef _PPC_RMAP_H -#define _PPC_RMAP_H - -/* PPC calls pte_alloc() before mem_map[] is setup ... */ -#define BROKEN_PPC_PTE_ALLOC_ONE - -#include - -#endif --- linux-2.6.6-rc1/include/asm-ppc/setup.h 2003-06-14 12:18:33.000000000 -0700 +++ 25/include/asm-ppc/setup.h 2004-04-18 22:25:35.909371424 -0700 @@ -6,6 +6,9 @@ #define m68k_memory memory #include +/* We have a bigger command line buffer. */ +#undef COMMAND_LINE_SIZE +#define COMMAND_LINE_SIZE 512 #endif /* _PPC_SETUP_H */ #endif /* __KERNEL__ */ --- linux-2.6.6-rc1/include/asm-s390/pgtable.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/asm-s390/pgtable.h 2004-04-18 22:25:33.837686368 -0700 @@ -760,8 +760,6 @@ extern inline pte_t mk_swap_pte(unsigned #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -typedef pte_t *pte_addr_t; - #ifndef __s390x__ # define PTE_FILE_MAX_BITS 26 #else /* __s390x__ */ --- linux-2.6.6-rc1/include/asm-s390/rmap.h 2003-06-14 12:18:23.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _S390_RMAP_H -#define _S390_RMAP_H - -/* nothing to see, move along */ -#include - -#endif --- linux-2.6.6-rc1/include/asm-s390/thread_info.h 2003-06-14 12:18:25.000000000 -0700 +++ 25/include/asm-s390/thread_info.h 2004-04-18 22:25:24.970034456 -0700 @@ -84,6 +84,7 @@ static inline struct thread_info *curren #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_RESTART_SVC 4 /* restart svc with new svc number */ +#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -94,6 +95,7 @@ static inline struct thread_info *curren #define _TIF_SIGPENDING (1<io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -static __inline__ void ide_init_default_hwifs(void) -{ -#ifndef CONFIG_PCI - hw_regs_t hw; - int index; - - for(index = 0; index < MAX_HWIFS; index++) { - memset(&hw, 0, sizeof hw); - ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); - hw.irq = ide_default_irq(ide_default_io_base(index)); - ide_register_hw(&hw, NULL); - } -#endif /* CONFIG_PCI */ -} +#ifdef CONFIG_PCI +#define ide_init_default_irq(base) (0) +#else +#define ide_init_default_irq(base) ide_default_irq(base) +#endif #include --- linux-2.6.6-rc1/include/asm-sh/pgalloc.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-sh/pgalloc.h 2004-04-18 22:25:24.971034304 -0700 @@ -97,12 +97,11 @@ static inline pte_t ptep_get_and_clear(p pte_clear(ptep); if (!pte_not_present(pte)) { - struct page *page; unsigned long pfn = pte_pfn(pte); if (pfn_valid(pfn)) { - page = pfn_to_page(pfn); - if (!page_mapping(page) || - !mapping_writably_mapped(page->mapping)) + struct page *page = pfn_to_page(pfn); + struct address_space *mapping = page_mapping(page); + if (!mapping || !mapping_writably_mapped(mapping)) __clear_bit(PG_mapped, &page->flags); } } --- linux-2.6.6-rc1/include/asm-sh/pgtable.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/asm-sh/pgtable.h 2004-04-18 22:25:33.837686368 -0700 @@ -274,8 +274,6 @@ extern void update_mmu_cache(struct vm_a #define pte_same(A,B) (pte_val(A) == pte_val(B)) -typedef pte_t *pte_addr_t; - #endif /* !__ASSEMBLY__ */ #define kern_addr_valid(addr) (1) --- linux-2.6.6-rc1/include/asm-sh/rmap.h 2003-06-14 12:18:34.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _SH_RMAP_H -#define _SH_RMAP_H - -/* nothing to see, move along */ -#include - -#endif --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-sh/setup.h 2004-04-18 22:25:35.909371424 -0700 @@ -0,0 +1,8 @@ +#ifdef __KERNEL__ +#ifndef _SH_SETUP_H +#define _SH_SETUP_H + +#define COMMAND_LINE_SIZE 256 + +#endif /* _SH_SETUP_H */ +#endif /* __KERNEL__ */ --- linux-2.6.6-rc1/include/asm-sparc64/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-sparc64/ide.h 2004-04-18 22:26:01.787437360 -0700 @@ -34,27 +34,7 @@ static __inline__ unsigned long ide_defa return 0; } -static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -/* There are no standard ports. */ -static inline void ide_init_default_hwifs(void) { ; } +#define ide_init_default_irq(base) (0) #define __ide_insl(data_reg, buffer, wcount) \ __ide_insw(data_reg, buffer, (wcount)<<1) --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-sparc64/lockmeter.h 2004-04-18 22:25:47.923544992 -0700 @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com) + * Copyright (C) 2003 David S. Miller (davem@redhat.com) + */ + +#ifndef _SPARC64_LOCKMETER_H +#define _SPARC64_LOCKMETER_H + +#include +#include +#include +#include + +/* Actually, this is not the CPU frequency by the system tick + * frequency which is good enough for lock metering. + */ +#define CPU_CYCLE_FREQUENCY (timer_tick_offset * HZ) +#define THIS_CPU_NUMBER smp_processor_id() + +#define PUT_INDEX(lock_ptr,indexv) (lock_ptr)->index = (indexv) +#define GET_INDEX(lock_ptr) (lock_ptr)->index + +#define PUT_RWINDEX(rwlock_ptr,indexv) (rwlock_ptr)->index = (indexv) +#define GET_RWINDEX(rwlock_ptr) (rwlock_ptr)->index +#define PUT_RW_CPU(rwlock_ptr,cpuv) (rwlock_ptr)->cpu = (cpuv) +#define GET_RW_CPU(rwlock_ptr) (rwlock_ptr)->cpu + +#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr) + +extern inline int rwlock_readers(rwlock_t *rwlock_ptr) +{ + signed int tmp = rwlock_ptr->lock; + + if (tmp > 0) + return tmp; + else + return 0; +} + +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((signed int)((rwlock_ptr)->lock) < 0) +#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((signed int)((rwlock_ptr)->lock) > 0) + +#define get_cycles64() get_cycles() + +#endif /* _SPARC64_LOCKMETER_H */ --- linux-2.6.6-rc1/include/asm-sparc64/pgtable.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/include/asm-sparc64/pgtable.h 2004-04-18 22:25:33.838686216 -0700 @@ -384,8 +384,6 @@ extern unsigned long get_fb_unmapped_are extern void check_pgt_cache(void); -typedef pte_t *pte_addr_t; - #endif /* !(__ASSEMBLY__) */ #endif /* !(_SPARC64_PGTABLE_H) */ --- linux-2.6.6-rc1/include/asm-sparc64/rmap.h 2003-06-14 12:18:07.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _SPARC64_RMAP_H -#define _SPARC64_RMAP_H - -/* nothing to see, move along */ -#include - -#endif --- linux-2.6.6-rc1/include/asm-sparc64/setup.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/include/asm-sparc64/setup.h 2004-04-18 22:25:35.909371424 -0700 @@ -5,5 +5,6 @@ #ifndef _SPARC64_SETUP_H #define _SPARC64_SETUP_H +#define COMMAND_LINE_SIZE 256 #endif /* _SPARC64_SETUP_H */ --- linux-2.6.6-rc1/include/asm-sparc64/spinlock.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-sparc64/spinlock.h 2004-04-18 22:25:47.923544992 -0700 @@ -31,15 +31,23 @@ #ifndef CONFIG_DEBUG_SPINLOCK -typedef unsigned char spinlock_t; -#define SPIN_LOCK_UNLOCKED 0 +typedef struct { + unsigned char lock; + unsigned int index; +} spinlock_t; -#define spin_lock_init(lock) (*((unsigned char *)(lock)) = 0) -#define spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0) +#ifdef CONFIG_LOCKMETER +#define SPIN_LOCK_UNLOCKED (spinlock_t) {0, 0} +#else +#define SPIN_LOCK_UNLOCKED (spinlock_t) { 0 } +#endif -#define spin_unlock_wait(lock) \ +#define spin_lock_init(__lock) do { *(__lock) = SPIN_LOCK_UNLOCKED; } while(0) +#define spin_is_locked(__lock) (*((volatile unsigned char *)(&((__lock)->lock))) != 0) + +#define spin_unlock_wait(__lock) \ do { membar("#LoadLoad"); \ -} while(*((volatile unsigned char *)lock)) +} while(*((volatile unsigned char *)(&(((spinlock_t *)__lock)->lock)))) static __inline__ void _raw_spin_lock(spinlock_t *lock) { @@ -110,17 +118,31 @@ extern int _spin_trylock (spinlock_t *lo #ifndef CONFIG_DEBUG_SPINLOCK -typedef unsigned int rwlock_t; -#define RW_LOCK_UNLOCKED 0 -#define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0) -#define rwlock_is_locked(x) (*(x) != RW_LOCK_UNLOCKED) +#ifdef CONFIG_LOCKMETER +typedef struct { + unsigned int lock; + unsigned int index; + unsigned int cpu; +} rwlock_t; +#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0xff } +#else +typedef struct { + unsigned int lock; +} rwlock_t; +#define RW_LOCK_UNLOCKED (rwlock_t) { 0 } +#endif + +#define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0) +#define rwlock_is_locked(x) ((x)->lock != 0) +extern int __read_trylock(rwlock_t *); extern void __read_lock(rwlock_t *); extern void __read_unlock(rwlock_t *); extern void __write_lock(rwlock_t *); extern void __write_unlock(rwlock_t *); extern int __write_trylock(rwlock_t *); +#define _raw_read_trylock(p) __read_trylock(p) #define _raw_read_lock(p) __read_lock(p) #define _raw_read_unlock(p) __read_unlock(p) #define _raw_write_lock(p) __write_lock(p) --- linux-2.6.6-rc1/include/asm-sparc/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-sparc/ide.h 2004-04-18 22:26:01.787437360 -0700 @@ -29,31 +29,7 @@ static __inline__ unsigned long ide_defa return 0; } -/* - * Doing any sort of ioremap() here does not work - * because this function may be called with null aguments. - */ -static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -/* There are no standard ports. */ -static inline void ide_init_default_hwifs(void) { ; } +#define ide_init_default_irq(base) (0) #define __ide_insl(data_reg, buffer, wcount) \ __ide_insw(data_reg, buffer, (wcount)<<1) --- linux-2.6.6-rc1/include/asm-sparc/kmap_types.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/include/asm-sparc/kmap_types.h 2004-04-18 22:25:33.839686064 -0700 @@ -11,7 +11,6 @@ enum km_type { KM_BIO_DST_IRQ, KM_PTE0, KM_PTE1, - KM_PTE2, KM_IRQ0, KM_IRQ1, KM_SOFTIRQ0, --- linux-2.6.6-rc1/include/asm-sparc/pgtable.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/asm-sparc/pgtable.h 2004-04-18 22:25:33.839686064 -0700 @@ -491,8 +491,6 @@ extern int io_remap_page_range(struct vm #include -typedef pte_t *pte_addr_t; - #endif /* !(__ASSEMBLY__) */ /* We provide our own get_unmapped_area to cope with VA holes for userland */ --- linux-2.6.6-rc1/include/asm-sparc/rmap.h 2003-06-14 12:18:34.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _SPARC_RMAP_H -#define _SPARC_RMAP_H - -/* nothing to see, move along */ -#include - -#endif --- linux-2.6.6-rc1/include/asm-sparc/setup.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/include/asm-sparc/setup.h 2004-04-18 22:25:35.909371424 -0700 @@ -5,5 +5,6 @@ #ifndef _SPARC_SETUP_H #define _SPARC_SETUP_H +#define COMMAND_LINE_SIZE 256 #endif /* _SPARC_SETUP_H */ --- linux-2.6.6-rc1/include/asm-um/pgtable.h 2003-10-08 15:07:10.000000000 -0700 +++ 25/include/asm-um/pgtable.h 2004-04-18 22:25:33.840685912 -0700 @@ -384,18 +384,6 @@ static inline pmd_t * pmd_offset(pgd_t * #define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) -#if defined(CONFIG_HIGHPTE) && defined(CONFIG_HIGHMEM4G) -typedef u32 pte_addr_t; -#endif - -#if defined(CONFIG_HIGHPTE) && defined(CONFIG_HIGHMEM64G) -typedef u64 pte_addr_t; -#endif - -#if !defined(CONFIG_HIGHPTE) -typedef pte_t *pte_addr_t; -#endif - #define update_mmu_cache(vma,address,pte) do ; while (0) /* Encode and de-code a swap entry */ --- linux-2.6.6-rc1/include/asm-um/rmap.h 2003-06-14 12:18:24.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,6 +0,0 @@ -#ifndef __UM_RMAP_H -#define __UM_RMAP_H - -#include "asm/arch/rmap.h" - -#endif --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-um/setup.h 2004-04-18 22:25:35.910371272 -0700 @@ -0,0 +1,6 @@ +#ifndef SETUP_H_INCLUDED +#define SETUP_H_INCLUDED + +#define COMMAND_LINE_SIZE 512 + +#endif /* SETUP_H_INCLUDED */ --- linux-2.6.6-rc1/include/asm-v850/pgtable.h 2003-06-14 12:18:25.000000000 -0700 +++ 25/include/asm-v850/pgtable.h 2004-04-18 22:25:33.840685912 -0700 @@ -5,8 +5,6 @@ #include -typedef pte_t *pte_addr_t; - #define pgd_present(pgd) (1) /* pages are always present on NO_MM */ #define pgd_none(pgd) (0) #define pgd_bad(pgd) (0) --- linux-2.6.6-rc1/include/asm-v850/rmap.h 2003-06-14 12:18:31.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1 +0,0 @@ -/* Do not need anything here */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-v850/setup.h 2004-04-18 22:25:35.910371272 -0700 @@ -0,0 +1,6 @@ +#ifndef _V850_SETUP_H +#define _V850_SETUP_H + +#define COMMAND_LINE_SIZE 512 + +#endif /* __SETUP_H */ --- linux-2.6.6-rc1/include/asm-x86_64/bootsetup.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/asm-x86_64/bootsetup.h 2004-04-18 22:25:35.910371272 -0700 @@ -30,7 +30,6 @@ extern char x86_boot_params[2048]; #define EDD_NR (*(unsigned char *) (PARAM+EDDNR)) #define EDD_BUF ((struct edd_info *) (PARAM+EDDBUF)) #define COMMAND_LINE saved_command_line -#define COMMAND_LINE_SIZE 256 #define RAMDISK_IMAGE_START_MASK 0x07FF #define RAMDISK_PROMPT_FLAG 0x8000 --- linux-2.6.6-rc1/include/asm-x86_64/ia32_unistd.h 2003-09-08 13:58:59.000000000 -0700 +++ 25/include/asm-x86_64/ia32_unistd.h 2004-04-18 22:25:24.971034304 -0700 @@ -278,7 +278,17 @@ #define __NR_ia32_tgkill 270 #define __NR_ia32_utimes 271 #define __NR_ia32_fadvise64_64 272 +#define __NR_ia32_vserver 273 +#define __NR_ia32_mbind 274 +#define __NR_ia32_get_mempolicy 275 +#define __NR_ia32_set_mempolicy 276 +#define __NR_ia32_mq_open 277 +#define __NR_ia32_mq_unlink (__NR_ia32_mq_open+1) +#define __NR_ia32_mq_timedsend (__NR_ia32_mq_open+2) +#define __NR_ia32_mq_timedreceive (__NR_ia32_mq_open+3) +#define __NR_ia32_mq_notify (__NR_ia32_mq_open+4) +#define __NR_ia32_mq_getsetattr (__NR_ia32_mq_open+5) -#define IA32_NR_syscalls 275 /* must be > than biggest syscall! */ +#define IA32_NR_syscalls 285 /* must be > than biggest syscall! */ #endif /* _ASM_X86_64_IA32_UNISTD_H_ */ --- linux-2.6.6-rc1/include/asm-x86_64/ide.h 2003-06-14 12:18:21.000000000 -0700 +++ 25/include/asm-x86_64/ide.h 2004-04-18 22:26:01.788437208 -0700 @@ -51,40 +51,11 @@ static __inline__ unsigned long ide_defa } } -static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - - for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) { - hw->io_ports[i] = reg; - reg += 1; - } - if (ctrl_port) { - hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port; - } else { - hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206; - } - if (irq != NULL) - *irq = 0; - hw->io_ports[IDE_IRQ_OFFSET] = 0; -} - -static __inline__ void ide_init_default_hwifs(void) -{ -#ifndef CONFIG_BLK_DEV_IDEPCI - hw_regs_t hw; - int index; - - for(index = 0; index < MAX_HWIFS; index++) { - memset(&hw, 0, sizeof hw); - ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL); - hw.irq = ide_default_irq(ide_default_io_base(index)); - ide_register_hw(&hw, NULL); - } -#endif /* CONFIG_BLK_DEV_IDEPCI */ -} +#ifdef CONFIG_BLK_DEV_IDEPCI +#define ide_init_default_irq(base) (0) +#else +#define ide_init_default_irq(base) ide_default_irq(base) +#endif #include --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-x86_64/kgdb.h 2004-04-18 22:25:31.604025936 -0700 @@ -0,0 +1,71 @@ +#ifndef __KGDB +#define __KGDB + +/* + * This file should not include ANY others. This makes it usable + * most anywhere without the fear of include order or inclusion. + * Make it so! + * + * This file may be included all the time. It is only active if + * CONFIG_KGDB is defined, otherwise it stubs out all the macros + * and entry points. + */ +#if defined(CONFIG_KGDB) && !defined(__ASSEMBLY__) + +extern void breakpoint(void); +#define INIT_KGDB_INTS kgdb_enable_ints() + +#ifndef BREAKPOINT +#define BREAKPOINT asm(" int $3") +#endif + +extern void kgdb_schedule_breakpoint(void); +extern void kgdb_process_breakpoint(void); + +extern int kgdb_tty_hook(void); +extern int kgdb_eth_hook(void); +extern int kgdboe; + +/* + * GDB debug stub (or any debug stub) can point the 'linux_debug_hook' + * pointer to its routine and it will be entered as the first thing + * when a trap occurs. + * + * Return values are, at present, undefined. + * + * The debug hook routine does not necessarily return to its caller. + * It has the register image and thus may choose to resume execution + * anywhere it pleases. + */ +struct pt_regs; + +extern int kgdb_handle_exception(int trapno, + int signo, int err_code, struct pt_regs *regs); +extern int in_kgdb(struct pt_regs *regs); + +extern void set_debug_traps(void); + +#ifdef CONFIG_KGDB_TS +void kgdb_tstamp(int line, char *source, int data0, int data1); +/* + * This is the time stamp function. The macro adds the source info and + * does a cast on the data to allow most any 32-bit value. + */ + +#define kgdb_ts(data0,data1) kgdb_tstamp(__LINE__,__FILE__,(int)data0,(int)data1) +#else +#define kgdb_ts(data0,data1) +#endif +#else /* CONFIG_KGDB && ! __ASSEMBLY__ ,stubs follow... */ +#ifndef BREAKPOINT +#define BREAKPOINT +#endif +#define kgdb_ts(data0,data1) +#define in_kgdb (0) +#define kgdb_handle_exception +#define breakpoint +#define INIT_KGDB_INTS +#define kgdb_process_breakpoint() do {} while(0) + +#endif +#endif /* __KGDB */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/asm-x86_64/kgdb_local.h 2004-04-18 22:25:31.605025784 -0700 @@ -0,0 +1,102 @@ +#ifndef __KGDB_LOCAL +#define ___KGDB_LOCAL +#include +#include +#include +#include +#include +#include +#include +#include + +#define PORT 0x3f8 +#ifdef CONFIG_KGDB_PORT +#undef PORT +#define PORT CONFIG_KGDB_PORT +#endif +#define IRQ 4 +#ifdef CONFIG_KGDB_IRQ +#undef IRQ +#define IRQ CONFIG_KGDB_IRQ +#endif +#define SB_CLOCK 1843200 +#define SB_BASE (SB_CLOCK/16) +#define SB_BAUD9600 SB_BASE/9600 +#define SB_BAUD192 SB_BASE/19200 +#define SB_BAUD384 SB_BASE/38400 +#define SB_BAUD576 SB_BASE/57600 +#define SB_BAUD1152 SB_BASE/115200 +#ifdef CONFIG_KGDB_9600BAUD +#define SB_BAUD SB_BAUD9600 +#endif +#ifdef CONFIG_KGDB_19200BAUD +#define SB_BAUD SB_BAUD192 +#endif +#ifdef CONFIG_KGDB_38400BAUD +#define SB_BAUD SB_BAUD384 +#endif +#ifdef CONFIG_KGDB_57600BAUD +#define SB_BAUD SB_BAUD576 +#endif +#ifdef CONFIG_KGDB_115200BAUD +#define SB_BAUD SB_BAUD1152 +#endif +#ifndef SB_BAUD +#define SB_BAUD SB_BAUD1152 /* Start with this if not given */ +#endif + +#ifndef CONFIG_X86_TSC +#undef rdtsc +#define rdtsc(a,b) if (a++ > 10000){a = 0; b++;} +#undef rdtscll +#define rdtscll(s) s++ +#endif + +#ifdef _raw_read_unlock /* must use a name that is "define"ed, not an inline */ +#undef spin_lock +#undef spin_trylock +#undef spin_unlock +#define spin_lock _raw_spin_lock +#define spin_trylock _raw_spin_trylock +#define spin_unlock _raw_spin_unlock +#else +#endif +#undef spin_unlock_wait +#define spin_unlock_wait(x) do { cpu_relax(); barrier();} \ + while(spin_is_locked(x)) + +#define SB_IER 1 +#define SB_MCR UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS + +#define FLAGS 0 +#define SB_STATE { \ + magic: SSTATE_MAGIC, \ + baud_base: SB_BASE, \ + port: PORT, \ + irq: IRQ, \ + flags: FLAGS, \ + custom_divisor:SB_BAUD} +#define SB_INFO { \ + magic: SERIAL_MAGIC, \ + port: PORT,0,FLAGS, \ + state: &state, \ + tty: (struct tty_struct *)&state, \ + IER: SB_IER, \ + MCR: SB_MCR} +extern void putDebugChar(int); +/* RTAI support needs us to really stop/start interrupts */ + +#define kgdb_sti() __asm__ __volatile__("sti": : :"memory") +#define kgdb_cli() __asm__ __volatile__("cli": : :"memory") +#define kgdb_local_save_flags(x) __asm__ __volatile__(\ + "pushfl ; popl %0":"=g" (x): /* no input */) +#define kgdb_local_irq_restore(x) __asm__ __volatile__(\ + "pushl %0 ; popfl": \ + /* no output */ :"g" (x):"memory", "cc") +#define kgdb_local_irq_save(x) kgdb_local_save_flags(x); kgdb_cli() + +#ifdef CONFIG_SERIAL +extern void shutdown_for_kgdb(struct async_struct *info); +#endif +#define INIT_KDEBUG putDebugChar("+"); +#endif /* __KGDB_LOCAL */ --- linux-2.6.6-rc1/include/asm-x86_64/pgtable.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-x86_64/pgtable.h 2004-04-18 22:25:33.841685760 -0700 @@ -390,8 +390,6 @@ extern inline pte_t pte_modify(pte_t pte #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -typedef pte_t *pte_addr_t; - #endif /* !__ASSEMBLY__ */ extern int kern_addr_valid(unsigned long addr); --- linux-2.6.6-rc1/include/asm-x86_64/rmap.h 2003-06-14 12:18:30.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,7 +0,0 @@ -#ifndef _X8664_RMAP_H -#define _X8664_RMAP_H - -/* nothing to see, move along */ -#include - -#endif --- linux-2.6.6-rc1/include/asm-x86_64/setup.h 2003-06-14 12:18:51.000000000 -0700 +++ 25/include/asm-x86_64/setup.h 2004-04-18 22:25:35.911371120 -0700 @@ -1,10 +1,6 @@ -/* - * Just a place holder. We don't want to have to test x86 before - * we include stuff - */ - #ifndef _x8664_SETUP_H #define _x8664_SETUP_H +#define COMMAND_LINE_SIZE 256 #endif --- linux-2.6.6-rc1/include/asm-x86_64/unistd.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/asm-x86_64/unistd.h 2004-04-18 22:25:48.302487384 -0700 @@ -532,10 +532,28 @@ __SYSCALL(__NR_tgkill, sys_tgkill) __SYSCALL(__NR_utimes, sys_utimes) #define __NR_vserver 236 __SYSCALL(__NR_vserver, sys_ni_syscall) +#define __NR_vserver 236 +__SYSCALL(__NR_vserver, sys_ni_syscall) +#define __NR_mbind 237 +__SYSCALL(__NR_mbind, sys_ni_syscall) +#define __NR_set_mempolicy 238 +__SYSCALL(__NR_set_mempolicy, sys_ni_syscall) +#define __NR_get_mempolicy 239 +__SYSCALL(__NR_get_mempolicy, sys_ni_syscall) +#define __NR_mq_open 240 +__SYSCALL(__NR_mq_open, sys_mq_open) +#define __NR_mq_unlink 241 +__SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#define __NR_mq_timedsend 242 +__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend) +#define __NR_mq_timedreceive 243 +__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive) +#define __NR_mq_notify 244 +__SYSCALL(__NR_mq_notify, sys_mq_notify) +#define __NR_mq_getsetattr 245 +__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr) -/* 237,238,239 reserved for NUMA API */ - -#define __NR_syscall_max __NR_vserver +#define __NR_syscall_max __NR_mq_getsetattr #ifndef __NO_STUBS /* user-visible error numbers are in the range -1 - -4095 */ --- linux-2.6.6-rc1/include/linux/atmdev.h 2004-02-17 20:48:46.000000000 -0800 +++ 25/include/linux/atmdev.h 2004-04-18 22:25:24.972034152 -0700 @@ -399,9 +399,9 @@ void vcc_remove_socket(struct sock *sk); * */ -static inline int atm_guess_pdu2truesize(int pdu_size) +static inline int atm_guess_pdu2truesize(int size) { - return ((pdu_size+15) & ~15) + sizeof(struct sk_buff); + return (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info)); } --- linux-2.6.6-rc1/include/linux/auto_fs4.h 2003-06-14 12:17:57.000000000 -0700 +++ 25/include/linux/auto_fs4.h 2004-04-18 22:25:56.218284000 -0700 @@ -23,6 +23,12 @@ #define AUTOFS_MIN_PROTO_VERSION 3 #define AUTOFS_MAX_PROTO_VERSION 4 +#define AUTOFS_PROTO_SUBVERSION 5 + +/* Mask for expire behaviour */ +#define AUTOFS_EXP_IMMEDIATE 1 +#define AUTOFS_EXP_LEAVES 2 + /* New message type */ #define autofs_ptype_expire_multi 2 /* Expire entry (umount request) */ @@ -41,7 +47,11 @@ union autofs_packet_union { struct autofs_packet_expire_multi expire_multi; }; -#define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93,0x66,int) +#define AUTOFS_IOC_EXPIRE_MULTI _IOW(0x93,0x66,int) +#define AUTOFS_IOC_PROTOSUBVER _IOR(0x93,0x67,int) +#define AUTOFS_IOC_ASKREGHOST _IOR(0x93,0x68,int) +#define AUTOFS_IOC_TOGGLEREGHOST _IOR(0x93,0x69,int) +#define AUTOFS_IOC_ASKUMOUNT _IOR(0x93,0x70,int) #endif /* _LINUX_AUTO_FS4_H */ --- linux-2.6.6-rc1/include/linux/binfmts.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/binfmts.h 2004-04-18 22:25:42.727334936 -0700 @@ -35,9 +35,13 @@ struct linux_binprm{ char * interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ + unsigned long interp_flags; unsigned long loader, exec; }; +#define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 +#define BINPRM_FLAGS_ENFORCE_NONDUMP (1 << BINPRM_FLAGS_ENFORCE_NONDUMP_BIT) + /* * This structure defines the functions that are used to load the binary formats that * linux accepts. --- linux-2.6.6-rc1/include/linux/bitmap.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/bitmap.h 2004-04-18 22:25:48.429468080 -0700 @@ -29,7 +29,8 @@ static inline void bitmap_fill(unsigned static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, int bits) { - memcpy(dst, src, BITS_TO_LONGS(bits)*sizeof(unsigned long)); + int len = BITS_TO_LONGS(bits)*sizeof(unsigned long); + memcpy(dst, src, len); } void bitmap_shift_right(unsigned long *dst, --- linux-2.6.6-rc1/include/linux/buffer_head.h 2004-02-03 20:42:38.000000000 -0800 +++ 25/include/linux/buffer_head.h 2004-04-18 22:26:00.420645144 -0700 @@ -62,13 +62,6 @@ struct buffer_head { }; /* - * Debug - */ - -void __buffer_error(char *file, int line); -#define buffer_error() __buffer_error(__FILE__, __LINE__) - -/* * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() * and buffer_foo() functions. */ @@ -164,6 +157,8 @@ void __wait_on_buffer(struct buffer_head wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); void wake_up_buffer(struct buffer_head *bh); int fsync_bdev(struct block_device *); +struct super_block *freeze_bdev(struct block_device *); +void thaw_bdev(struct block_device *, struct super_block *); int fsync_super(struct super_block *); int fsync_no_super(struct block_device *); struct buffer_head *__find_get_block(struct block_device *, sector_t, int); @@ -177,7 +172,7 @@ void free_buffer_head(struct buffer_head void FASTCALL(unlock_buffer(struct buffer_head *bh)); void ll_rw_block(int, int, struct buffer_head * bh[]); void sync_dirty_buffer(struct buffer_head *bh); -int submit_bh(int, struct buffer_head *); +void submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); --- linux-2.6.6-rc1/include/linux/compat.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/compat.h 2004-04-18 22:25:58.863881808 -0700 @@ -117,5 +117,18 @@ long compat_sys_shmat(int first, int sec long compat_sys_shmctl(int first, int second, void __user *uptr); long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, unsigned nsems, const struct compat_timespec __user *timeout); + +asmlinkage ssize_t compat_sys_readv(unsigned long fd, + const struct compat_iovec __user *vec, unsigned long vlen); +asmlinkage ssize_t compat_sys_writev(unsigned long fd, + const struct compat_iovec __user *vec, unsigned long vlen); + +int compat_do_execve(char * filename, compat_uptr_t __user *argv, + compat_uptr_t __user *envp, struct pt_regs * regs); + +asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + struct compat_timeval __user *tvp); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ --- linux-2.6.6-rc1/include/linux/compat_ioctl.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/linux/compat_ioctl.h 2004-04-18 22:25:24.974033848 -0700 @@ -123,6 +123,19 @@ COMPATIBLE_IOCTL(STOP_ARRAY) COMPATIBLE_IOCTL(STOP_ARRAY_RO) COMPATIBLE_IOCTL(RESTART_ARRAY_RW) /* DM */ +COMPATIBLE_IOCTL(DM_VERSION_32) +COMPATIBLE_IOCTL(DM_LIST_DEVICES_32) +COMPATIBLE_IOCTL(DM_DEV_CREATE_32) +COMPATIBLE_IOCTL(DM_DEV_REMOVE_32) +COMPATIBLE_IOCTL(DM_DEV_RENAME_32) +COMPATIBLE_IOCTL(DM_DEV_SUSPEND_32) +COMPATIBLE_IOCTL(DM_DEV_STATUS_32) +COMPATIBLE_IOCTL(DM_DEV_WAIT_32) +COMPATIBLE_IOCTL(DM_TABLE_LOAD_32) +COMPATIBLE_IOCTL(DM_TABLE_CLEAR_32) +COMPATIBLE_IOCTL(DM_TABLE_DEPS_32) +COMPATIBLE_IOCTL(DM_TABLE_STATUS_32) +COMPATIBLE_IOCTL(DM_LIST_VERSIONS_32) COMPATIBLE_IOCTL(DM_VERSION) COMPATIBLE_IOCTL(DM_LIST_DEVICES) COMPATIBLE_IOCTL(DM_DEV_CREATE) --- linux-2.6.6-rc1/include/linux/compiler-gcc.h 2003-11-09 16:45:05.000000000 -0800 +++ 25/include/linux/compiler-gcc.h 2004-04-18 22:25:34.461591520 -0700 @@ -13,5 +13,5 @@ shouldn't recognize the original var, and make assumptions about it */ #define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ - __asm__ ("" : "=g"(__ptr) : "0"(ptr)); \ + __asm__ ("" : "=r"(__ptr) : "0"(ptr)); \ (typeof(ptr)) (__ptr + (off)); }) --- linux-2.6.6-rc1/include/linux/compiler.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/linux/compiler.h 2004-04-18 22:25:24.974033848 -0700 @@ -4,9 +4,11 @@ #ifdef __CHECKER__ # define __user __attribute__((noderef, address_space(1))) # define __kernel /* default address space */ +# define __safe __attribute__((safe)) #else # define __user # define __kernel +# define __safe #endif #ifdef __KERNEL__ --- linux-2.6.6-rc1/include/linux/config.h 2003-11-09 16:45:05.000000000 -0800 +++ 25/include/linux/config.h 2004-04-18 22:25:31.605025784 -0700 @@ -2,5 +2,8 @@ #define _LINUX_CONFIG_H #include +#ifdef CONFIG_X86 +#include +#endif #endif --- linux-2.6.6-rc1/include/linux/delay.h 2003-08-08 22:55:14.000000000 -0700 +++ 25/include/linux/delay.h 2004-04-18 22:25:46.028833032 -0700 @@ -10,7 +10,7 @@ extern unsigned long loops_per_jiffy; #include - +#include /* * Using udelay() for intervals greater than a few milliseconds can * risk overflow for high loops_per_jiffy (high bogomips) machines. The @@ -25,14 +25,13 @@ extern unsigned long loops_per_jiffy; #define MAX_UDELAY_MS 5 #endif -#ifdef notdef -#define mdelay(n) (\ - {unsigned long __ms=(n); while (__ms--) udelay(1000);}) -#else -#define mdelay(n) (\ - (__builtin_constant_p(n) && (n)<=MAX_UDELAY_MS) ? udelay((n)*1000) : \ - ({unsigned long __ms=(n); while (__ms--) udelay(1000);})) -#endif +#define mdelay(n) ( \ + { \ + static int warned=0; \ + unsigned long __ms=(n); \ + WARN_ON(in_irq() && !(warned++)); \ + while (__ms--) udelay(1000); \ + }) #ifndef ndelay #define ndelay(x) udelay(((x)+999)/1000) --- linux-2.6.6-rc1/include/linux/divert.h 2003-06-14 12:18:30.000000000 -0700 +++ 25/include/linux/divert.h 2004-04-18 22:25:24.974033848 -0700 @@ -46,7 +46,7 @@ typedef union _divert_cf_arg u32 uint32; s64 int64; u64 uint64; - void *ptr; + void __user *ptr; } divert_cf_arg; @@ -111,7 +111,7 @@ struct divert_cf #ifdef CONFIG_NET_DIVERT int alloc_divert_blk(struct net_device *); void free_divert_blk(struct net_device *); -int divert_ioctl(unsigned int cmd, struct divert_cf *arg); +int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg); void divert_frame(struct sk_buff *skb); static inline void handle_diverter(struct sk_buff *skb) { --- linux-2.6.6-rc1/include/linux/dma-mapping.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/linux/dma-mapping.h 2004-04-18 22:25:24.975033696 -0700 @@ -10,6 +10,9 @@ enum dma_data_direction { DMA_NONE = 3, }; +#define DMA_64BIT_MASK 0xffffffffffffffffULL +#define DMA_32BIT_MASK 0x00000000ffffffffULL + #include /* Backwards compat, remove in 2.7.x */ --- linux-2.6.6-rc1/include/linux/dm-ioctl.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/linux/dm-ioctl.h 2004-04-18 22:25:24.975033696 -0700 @@ -129,8 +129,14 @@ struct dm_target_spec { int32_t status; /* used when reading from kernel only */ /* - * Offset in bytes (from the start of this struct) to - * next target_spec. + * Location of the next dm_target_spec. + * - When specifying targets on a DM_TABLE_LOAD command, this value is + * the number of bytes from the start of the "current" dm_target_spec + * to the start of the "next" dm_target_spec. + * - When retrieving targets on a DM_TABLE_STATUS command, this value + * is the number of bytes from the start of the first dm_target_spec + * (that follows the dm_ioctl struct) to the start of the "next" + * dm_target_spec. */ uint32_t next; @@ -200,6 +206,34 @@ enum { DM_LIST_VERSIONS_CMD, }; +/* + * The dm_ioctl struct passed into the ioctl is just the header + * on a larger chunk of memory. On x86-64 and other + * architectures the dm-ioctl struct will be padded to an 8 byte + * boundary so the size will be different, which would change the + * ioctl code - yes I really messed up. This hack forces these + * architectures to have the correct ioctl code. + */ +#ifdef CONFIG_COMPAT +typedef char ioctl_struct[308]; +#define DM_VERSION_32 _IOWR(DM_IOCTL, DM_VERSION_CMD, ioctl_struct) +#define DM_REMOVE_ALL_32 _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, ioctl_struct) +#define DM_LIST_DEVICES_32 _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, ioctl_struct) + +#define DM_DEV_CREATE_32 _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, ioctl_struct) +#define DM_DEV_REMOVE_32 _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, ioctl_struct) +#define DM_DEV_RENAME_32 _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, ioctl_struct) +#define DM_DEV_SUSPEND_32 _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, ioctl_struct) +#define DM_DEV_STATUS_32 _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, ioctl_struct) +#define DM_DEV_WAIT_32 _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, ioctl_struct) + +#define DM_TABLE_LOAD_32 _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, ioctl_struct) +#define DM_TABLE_CLEAR_32 _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, ioctl_struct) +#define DM_TABLE_DEPS_32 _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, ioctl_struct) +#define DM_TABLE_STATUS_32 _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, ioctl_struct) +#define DM_LIST_VERSIONS_32 _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, ioctl_struct) +#endif + #define DM_IOCTL 0xfd #define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/linux/dwarf2.h 2004-04-18 22:25:30.345217304 -0700 @@ -0,0 +1,738 @@ +/* Declarations and definitions of codes relating to the DWARF2 symbolic + debugging information format. + Copyright (C) 1992, 1993, 1995, 1996, 1997, 1999, 2000, 2001, 2002 + Free Software Foundation, Inc. + + Written by Gary Funck (gary@intrepid.com) The Ada Joint Program + Office (AJPO), Florida State Unviversity and Silicon Graphics Inc. + provided support for this effort -- June 21, 1995. + + Derived from the DWARF 1 implementation written by Ron Guilmette + (rfg@netcom.com), November 1990. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to the Free + Software Foundation, 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ + +/* This file is derived from the DWARF specification (a public document) + Revision 2.0.0 (July 27, 1993) developed by the UNIX International + Programming Languages Special Interest Group (UI/PLSIG) and distributed + by UNIX International. Copies of this specification are available from + UNIX International, 20 Waterview Boulevard, Parsippany, NJ, 07054. + + This file also now contains definitions from the DWARF 3 specification. */ + +/* This file is shared between GCC and GDB, and should not contain + prototypes. */ + +#ifndef _ELF_DWARF2_H +#define _ELF_DWARF2_H + +/* Structure found in the .debug_line section. */ +#ifndef __ASSEMBLY__ +typedef struct +{ + unsigned char li_length [4]; + unsigned char li_version [2]; + unsigned char li_prologue_length [4]; + unsigned char li_min_insn_length [1]; + unsigned char li_default_is_stmt [1]; + unsigned char li_line_base [1]; + unsigned char li_line_range [1]; + unsigned char li_opcode_base [1]; +} +DWARF2_External_LineInfo; + +typedef struct +{ + unsigned long li_length; + unsigned short li_version; + unsigned int li_prologue_length; + unsigned char li_min_insn_length; + unsigned char li_default_is_stmt; + int li_line_base; + unsigned char li_line_range; + unsigned char li_opcode_base; +} +DWARF2_Internal_LineInfo; + +/* Structure found in .debug_pubnames section. */ +typedef struct +{ + unsigned char pn_length [4]; + unsigned char pn_version [2]; + unsigned char pn_offset [4]; + unsigned char pn_size [4]; +} +DWARF2_External_PubNames; + +typedef struct +{ + unsigned long pn_length; + unsigned short pn_version; + unsigned long pn_offset; + unsigned long pn_size; +} +DWARF2_Internal_PubNames; + +/* Structure found in .debug_info section. */ +typedef struct +{ + unsigned char cu_length [4]; + unsigned char cu_version [2]; + unsigned char cu_abbrev_offset [4]; + unsigned char cu_pointer_size [1]; +} +DWARF2_External_CompUnit; + +typedef struct +{ + unsigned long cu_length; + unsigned short cu_version; + unsigned long cu_abbrev_offset; + unsigned char cu_pointer_size; +} +DWARF2_Internal_CompUnit; + +typedef struct +{ + unsigned char ar_length [4]; + unsigned char ar_version [2]; + unsigned char ar_info_offset [4]; + unsigned char ar_pointer_size [1]; + unsigned char ar_segment_size [1]; +} +DWARF2_External_ARange; + +typedef struct +{ + unsigned long ar_length; + unsigned short ar_version; + unsigned long ar_info_offset; + unsigned char ar_pointer_size; + unsigned char ar_segment_size; +} +DWARF2_Internal_ARange; + +#define ENUM(name) enum name { +#define IF_NOT_ASM(a) a +#define COMMA , +#else +#define ENUM(name) +#define IF_NOT_ASM(a) +#define COMMA + +#endif + +/* Tag names and codes. */ +ENUM(dwarf_tag) + + DW_TAG_padding = 0x00 COMMA + DW_TAG_array_type = 0x01 COMMA + DW_TAG_class_type = 0x02 COMMA + DW_TAG_entry_point = 0x03 COMMA + DW_TAG_enumeration_type = 0x04 COMMA + DW_TAG_formal_parameter = 0x05 COMMA + DW_TAG_imported_declaration = 0x08 COMMA + DW_TAG_label = 0x0a COMMA + DW_TAG_lexical_block = 0x0b COMMA + DW_TAG_member = 0x0d COMMA + DW_TAG_pointer_type = 0x0f COMMA + DW_TAG_reference_type = 0x10 COMMA + DW_TAG_compile_unit = 0x11 COMMA + DW_TAG_string_type = 0x12 COMMA + DW_TAG_structure_type = 0x13 COMMA + DW_TAG_subroutine_type = 0x15 COMMA + DW_TAG_typedef = 0x16 COMMA + DW_TAG_union_type = 0x17 COMMA + DW_TAG_unspecified_parameters = 0x18 COMMA + DW_TAG_variant = 0x19 COMMA + DW_TAG_common_block = 0x1a COMMA + DW_TAG_common_inclusion = 0x1b COMMA + DW_TAG_inheritance = 0x1c COMMA + DW_TAG_inlined_subroutine = 0x1d COMMA + DW_TAG_module = 0x1e COMMA + DW_TAG_ptr_to_member_type = 0x1f COMMA + DW_TAG_set_type = 0x20 COMMA + DW_TAG_subrange_type = 0x21 COMMA + DW_TAG_with_stmt = 0x22 COMMA + DW_TAG_access_declaration = 0x23 COMMA + DW_TAG_base_type = 0x24 COMMA + DW_TAG_catch_block = 0x25 COMMA + DW_TAG_const_type = 0x26 COMMA + DW_TAG_constant = 0x27 COMMA + DW_TAG_enumerator = 0x28 COMMA + DW_TAG_file_type = 0x29 COMMA + DW_TAG_friend = 0x2a COMMA + DW_TAG_namelist = 0x2b COMMA + DW_TAG_namelist_item = 0x2c COMMA + DW_TAG_packed_type = 0x2d COMMA + DW_TAG_subprogram = 0x2e COMMA + DW_TAG_template_type_param = 0x2f COMMA + DW_TAG_template_value_param = 0x30 COMMA + DW_TAG_thrown_type = 0x31 COMMA + DW_TAG_try_block = 0x32 COMMA + DW_TAG_variant_part = 0x33 COMMA + DW_TAG_variable = 0x34 COMMA + DW_TAG_volatile_type = 0x35 COMMA + /* DWARF 3. */ + DW_TAG_dwarf_procedure = 0x36 COMMA + DW_TAG_restrict_type = 0x37 COMMA + DW_TAG_interface_type = 0x38 COMMA + DW_TAG_namespace = 0x39 COMMA + DW_TAG_imported_module = 0x3a COMMA + DW_TAG_unspecified_type = 0x3b COMMA + DW_TAG_partial_unit = 0x3c COMMA + DW_TAG_imported_unit = 0x3d COMMA + /* SGI/MIPS Extensions. */ + DW_TAG_MIPS_loop = 0x4081 COMMA + /* GNU extensions. */ + DW_TAG_format_label = 0x4101 COMMA /* For FORTRAN 77 and Fortran 90. */ + DW_TAG_function_template = 0x4102 COMMA /* For C++. */ + DW_TAG_class_template = 0x4103 COMMA /* For C++. */ + DW_TAG_GNU_BINCL = 0x4104 COMMA + DW_TAG_GNU_EINCL = 0x4105 COMMA + /* Extensions for UPC. See: http://upc.gwu.edu/~upc. */ + DW_TAG_upc_shared_type = 0x8765 COMMA + DW_TAG_upc_strict_type = 0x8766 COMMA + DW_TAG_upc_relaxed_type = 0x8767 +IF_NOT_ASM(};) + +#define DW_TAG_lo_user 0x4080 +#define DW_TAG_hi_user 0xffff + +/* Flag that tells whether entry has a child or not. */ +#define DW_children_no 0 +#define DW_children_yes 1 + +/* Form names and codes. */ +ENUM(dwarf_form) + + DW_FORM_addr = 0x01 COMMA + DW_FORM_block2 = 0x03 COMMA + DW_FORM_block4 = 0x04 COMMA + DW_FORM_data2 = 0x05 COMMA + DW_FORM_data4 = 0x06 COMMA + DW_FORM_data8 = 0x07 COMMA + DW_FORM_string = 0x08 COMMA + DW_FORM_block = 0x09 COMMA + DW_FORM_block1 = 0x0a COMMA + DW_FORM_data1 = 0x0b COMMA + DW_FORM_flag = 0x0c COMMA + DW_FORM_sdata = 0x0d COMMA + DW_FORM_strp = 0x0e COMMA + DW_FORM_udata = 0x0f COMMA + DW_FORM_ref_addr = 0x10 COMMA + DW_FORM_ref1 = 0x11 COMMA + DW_FORM_ref2 = 0x12 COMMA + DW_FORM_ref4 = 0x13 COMMA + DW_FORM_ref8 = 0x14 COMMA + DW_FORM_ref_udata = 0x15 COMMA + DW_FORM_indirect = 0x16 +IF_NOT_ASM(};) + +/* Attribute names and codes. */ + +ENUM(dwarf_attribute) + + DW_AT_sibling = 0x01 COMMA + DW_AT_location = 0x02 COMMA + DW_AT_name = 0x03 COMMA + DW_AT_ordering = 0x09 COMMA + DW_AT_subscr_data = 0x0a COMMA + DW_AT_byte_size = 0x0b COMMA + DW_AT_bit_offset = 0x0c COMMA + DW_AT_bit_size = 0x0d COMMA + DW_AT_element_list = 0x0f COMMA + DW_AT_stmt_list = 0x10 COMMA + DW_AT_low_pc = 0x11 COMMA + DW_AT_high_pc = 0x12 COMMA + DW_AT_language = 0x13 COMMA + DW_AT_member = 0x14 COMMA + DW_AT_discr = 0x15 COMMA + DW_AT_discr_value = 0x16 COMMA + DW_AT_visibility = 0x17 COMMA + DW_AT_import = 0x18 COMMA + DW_AT_string_length = 0x19 COMMA + DW_AT_common_reference = 0x1a COMMA + DW_AT_comp_dir = 0x1b COMMA + DW_AT_const_value = 0x1c COMMA + DW_AT_containing_type = 0x1d COMMA + DW_AT_default_value = 0x1e COMMA + DW_AT_inline = 0x20 COMMA + DW_AT_is_optional = 0x21 COMMA + DW_AT_lower_bound = 0x22 COMMA + DW_AT_producer = 0x25 COMMA + DW_AT_prototyped = 0x27 COMMA + DW_AT_return_addr = 0x2a COMMA + DW_AT_start_scope = 0x2c COMMA + DW_AT_stride_size = 0x2e COMMA + DW_AT_upper_bound = 0x2f COMMA + DW_AT_abstract_origin = 0x31 COMMA + DW_AT_accessibility = 0x32 COMMA + DW_AT_address_class = 0x33 COMMA + DW_AT_artificial = 0x34 COMMA + DW_AT_base_types = 0x35 COMMA + DW_AT_calling_convention = 0x36 COMMA + DW_AT_count = 0x37 COMMA + DW_AT_data_member_location = 0x38 COMMA + DW_AT_decl_column = 0x39 COMMA + DW_AT_decl_file = 0x3a COMMA + DW_AT_decl_line = 0x3b COMMA + DW_AT_declaration = 0x3c COMMA + DW_AT_discr_list = 0x3d COMMA + DW_AT_encoding = 0x3e COMMA + DW_AT_external = 0x3f COMMA + DW_AT_frame_base = 0x40 COMMA + DW_AT_friend = 0x41 COMMA + DW_AT_identifier_case = 0x42 COMMA + DW_AT_macro_info = 0x43 COMMA + DW_AT_namelist_items = 0x44 COMMA + DW_AT_priority = 0x45 COMMA + DW_AT_segment = 0x46 COMMA + DW_AT_specification = 0x47 COMMA + DW_AT_static_link = 0x48 COMMA + DW_AT_type = 0x49 COMMA + DW_AT_use_location = 0x4a COMMA + DW_AT_variable_parameter = 0x4b COMMA + DW_AT_virtuality = 0x4c COMMA + DW_AT_vtable_elem_location = 0x4d COMMA + /* DWARF 3 values. */ + DW_AT_allocated = 0x4e COMMA + DW_AT_associated = 0x4f COMMA + DW_AT_data_location = 0x50 COMMA + DW_AT_stride = 0x51 COMMA + DW_AT_entry_pc = 0x52 COMMA + DW_AT_use_UTF8 = 0x53 COMMA + DW_AT_extension = 0x54 COMMA + DW_AT_ranges = 0x55 COMMA + DW_AT_trampoline = 0x56 COMMA + DW_AT_call_column = 0x57 COMMA + DW_AT_call_file = 0x58 COMMA + DW_AT_call_line = 0x59 COMMA + /* SGI/MIPS extensions. */ + DW_AT_MIPS_fde = 0x2001 COMMA + DW_AT_MIPS_loop_begin = 0x2002 COMMA + DW_AT_MIPS_tail_loop_begin = 0x2003 COMMA + DW_AT_MIPS_epilog_begin = 0x2004 COMMA + DW_AT_MIPS_loop_unroll_factor = 0x2005 COMMA + DW_AT_MIPS_software_pipeline_depth = 0x2006 COMMA + DW_AT_MIPS_linkage_name = 0x2007 COMMA + DW_AT_MIPS_stride = 0x2008 COMMA + DW_AT_MIPS_abstract_name = 0x2009 COMMA + DW_AT_MIPS_clone_origin = 0x200a COMMA + DW_AT_MIPS_has_inlines = 0x200b COMMA + /* GNU extensions. */ + DW_AT_sf_names = 0x2101 COMMA + DW_AT_src_info = 0x2102 COMMA + DW_AT_mac_info = 0x2103 COMMA + DW_AT_src_coords = 0x2104 COMMA + DW_AT_body_begin = 0x2105 COMMA + DW_AT_body_end = 0x2106 COMMA + DW_AT_GNU_vector = 0x2107 COMMA + /* VMS extensions. */ + DW_AT_VMS_rtnbeg_pd_address = 0x2201 COMMA + /* UPC extension. */ + DW_AT_upc_threads_scaled = 0x3210 +IF_NOT_ASM(};) + +#define DW_AT_lo_user 0x2000 /* Implementation-defined range start. */ +#define DW_AT_hi_user 0x3ff0 /* Implementation-defined range end. */ + +/* Location atom names and codes. */ +ENUM(dwarf_location_atom) + + DW_OP_addr = 0x03 COMMA + DW_OP_deref = 0x06 COMMA + DW_OP_const1u = 0x08 COMMA + DW_OP_const1s = 0x09 COMMA + DW_OP_const2u = 0x0a COMMA + DW_OP_const2s = 0x0b COMMA + DW_OP_const4u = 0x0c COMMA + DW_OP_const4s = 0x0d COMMA + DW_OP_const8u = 0x0e COMMA + DW_OP_const8s = 0x0f COMMA + DW_OP_constu = 0x10 COMMA + DW_OP_consts = 0x11 COMMA + DW_OP_dup = 0x12 COMMA + DW_OP_drop = 0x13 COMMA + DW_OP_over = 0x14 COMMA + DW_OP_pick = 0x15 COMMA + DW_OP_swap = 0x16 COMMA + DW_OP_rot = 0x17 COMMA + DW_OP_xderef = 0x18 COMMA + DW_OP_abs = 0x19 COMMA + DW_OP_and = 0x1a COMMA + DW_OP_div = 0x1b COMMA + DW_OP_minus = 0x1c COMMA + DW_OP_mod = 0x1d COMMA + DW_OP_mul = 0x1e COMMA + DW_OP_neg = 0x1f COMMA + DW_OP_not = 0x20 COMMA + DW_OP_or = 0x21 COMMA + DW_OP_plus = 0x22 COMMA + DW_OP_plus_uconst = 0x23 COMMA + DW_OP_shl = 0x24 COMMA + DW_OP_shr = 0x25 COMMA + DW_OP_shra = 0x26 COMMA + DW_OP_xor = 0x27 COMMA + DW_OP_bra = 0x28 COMMA + DW_OP_eq = 0x29 COMMA + DW_OP_ge = 0x2a COMMA + DW_OP_gt = 0x2b COMMA + DW_OP_le = 0x2c COMMA + DW_OP_lt = 0x2d COMMA + DW_OP_ne = 0x2e COMMA + DW_OP_skip = 0x2f COMMA + DW_OP_lit0 = 0x30 COMMA + DW_OP_lit1 = 0x31 COMMA + DW_OP_lit2 = 0x32 COMMA + DW_OP_lit3 = 0x33 COMMA + DW_OP_lit4 = 0x34 COMMA + DW_OP_lit5 = 0x35 COMMA + DW_OP_lit6 = 0x36 COMMA + DW_OP_lit7 = 0x37 COMMA + DW_OP_lit8 = 0x38 COMMA + DW_OP_lit9 = 0x39 COMMA + DW_OP_lit10 = 0x3a COMMA + DW_OP_lit11 = 0x3b COMMA + DW_OP_lit12 = 0x3c COMMA + DW_OP_lit13 = 0x3d COMMA + DW_OP_lit14 = 0x3e COMMA + DW_OP_lit15 = 0x3f COMMA + DW_OP_lit16 = 0x40 COMMA + DW_OP_lit17 = 0x41 COMMA + DW_OP_lit18 = 0x42 COMMA + DW_OP_lit19 = 0x43 COMMA + DW_OP_lit20 = 0x44 COMMA + DW_OP_lit21 = 0x45 COMMA + DW_OP_lit22 = 0x46 COMMA + DW_OP_lit23 = 0x47 COMMA + DW_OP_lit24 = 0x48 COMMA + DW_OP_lit25 = 0x49 COMMA + DW_OP_lit26 = 0x4a COMMA + DW_OP_lit27 = 0x4b COMMA + DW_OP_lit28 = 0x4c COMMA + DW_OP_lit29 = 0x4d COMMA + DW_OP_lit30 = 0x4e COMMA + DW_OP_lit31 = 0x4f COMMA + DW_OP_reg0 = 0x50 COMMA + DW_OP_reg1 = 0x51 COMMA + DW_OP_reg2 = 0x52 COMMA + DW_OP_reg3 = 0x53 COMMA + DW_OP_reg4 = 0x54 COMMA + DW_OP_reg5 = 0x55 COMMA + DW_OP_reg6 = 0x56 COMMA + DW_OP_reg7 = 0x57 COMMA + DW_OP_reg8 = 0x58 COMMA + DW_OP_reg9 = 0x59 COMMA + DW_OP_reg10 = 0x5a COMMA + DW_OP_reg11 = 0x5b COMMA + DW_OP_reg12 = 0x5c COMMA + DW_OP_reg13 = 0x5d COMMA + DW_OP_reg14 = 0x5e COMMA + DW_OP_reg15 = 0x5f COMMA + DW_OP_reg16 = 0x60 COMMA + DW_OP_reg17 = 0x61 COMMA + DW_OP_reg18 = 0x62 COMMA + DW_OP_reg19 = 0x63 COMMA + DW_OP_reg20 = 0x64 COMMA + DW_OP_reg21 = 0x65 COMMA + DW_OP_reg22 = 0x66 COMMA + DW_OP_reg23 = 0x67 COMMA + DW_OP_reg24 = 0x68 COMMA + DW_OP_reg25 = 0x69 COMMA + DW_OP_reg26 = 0x6a COMMA + DW_OP_reg27 = 0x6b COMMA + DW_OP_reg28 = 0x6c COMMA + DW_OP_reg29 = 0x6d COMMA + DW_OP_reg30 = 0x6e COMMA + DW_OP_reg31 = 0x6f COMMA + DW_OP_breg0 = 0x70 COMMA + DW_OP_breg1 = 0x71 COMMA + DW_OP_breg2 = 0x72 COMMA + DW_OP_breg3 = 0x73 COMMA + DW_OP_breg4 = 0x74 COMMA + DW_OP_breg5 = 0x75 COMMA + DW_OP_breg6 = 0x76 COMMA + DW_OP_breg7 = 0x77 COMMA + DW_OP_breg8 = 0x78 COMMA + DW_OP_breg9 = 0x79 COMMA + DW_OP_breg10 = 0x7a COMMA + DW_OP_breg11 = 0x7b COMMA + DW_OP_breg12 = 0x7c COMMA + DW_OP_breg13 = 0x7d COMMA + DW_OP_breg14 = 0x7e COMMA + DW_OP_breg15 = 0x7f COMMA + DW_OP_breg16 = 0x80 COMMA + DW_OP_breg17 = 0x81 COMMA + DW_OP_breg18 = 0x82 COMMA + DW_OP_breg19 = 0x83 COMMA + DW_OP_breg20 = 0x84 COMMA + DW_OP_breg21 = 0x85 COMMA + DW_OP_breg22 = 0x86 COMMA + DW_OP_breg23 = 0x87 COMMA + DW_OP_breg24 = 0x88 COMMA + DW_OP_breg25 = 0x89 COMMA + DW_OP_breg26 = 0x8a COMMA + DW_OP_breg27 = 0x8b COMMA + DW_OP_breg28 = 0x8c COMMA + DW_OP_breg29 = 0x8d COMMA + DW_OP_breg30 = 0x8e COMMA + DW_OP_breg31 = 0x8f COMMA + DW_OP_regx = 0x90 COMMA + DW_OP_fbreg = 0x91 COMMA + DW_OP_bregx = 0x92 COMMA + DW_OP_piece = 0x93 COMMA + DW_OP_deref_size = 0x94 COMMA + DW_OP_xderef_size = 0x95 COMMA + DW_OP_nop = 0x96 COMMA + /* DWARF 3 extensions. */ + DW_OP_push_object_address = 0x97 COMMA + DW_OP_call2 = 0x98 COMMA + DW_OP_call4 = 0x99 COMMA + DW_OP_call_ref = 0x9a COMMA + /* GNU extensions. */ + DW_OP_GNU_push_tls_address = 0xe0 +IF_NOT_ASM(};) + +#define DW_OP_lo_user 0xe0 /* Implementation-defined range start. */ +#define DW_OP_hi_user 0xff /* Implementation-defined range end. */ + +/* Type encodings. */ +ENUM(dwarf_type) + + DW_ATE_void = 0x0 COMMA + DW_ATE_address = 0x1 COMMA + DW_ATE_boolean = 0x2 COMMA + DW_ATE_complex_float = 0x3 COMMA + DW_ATE_float = 0x4 COMMA + DW_ATE_signed = 0x5 COMMA + DW_ATE_signed_char = 0x6 COMMA + DW_ATE_unsigned = 0x7 COMMA + DW_ATE_unsigned_char = 0x8 COMMA + /* DWARF 3. */ + DW_ATE_imaginary_float = 0x9 +IF_NOT_ASM(};) + +#define DW_ATE_lo_user 0x80 +#define DW_ATE_hi_user 0xff + +/* Array ordering names and codes. */ +ENUM(dwarf_array_dim_ordering) + + DW_ORD_row_major = 0 COMMA + DW_ORD_col_major = 1 +IF_NOT_ASM(};) + +/* Access attribute. */ +ENUM(dwarf_access_attribute) + + DW_ACCESS_public = 1 COMMA + DW_ACCESS_protected = 2 COMMA + DW_ACCESS_private = 3 +IF_NOT_ASM(};) + +/* Visibility. */ +ENUM(dwarf_visibility_attribute) + + DW_VIS_local = 1 COMMA + DW_VIS_exported = 2 COMMA + DW_VIS_qualified = 3 +IF_NOT_ASM(};) + +/* Virtuality. */ +ENUM(dwarf_virtuality_attribute) + + DW_VIRTUALITY_none = 0 COMMA + DW_VIRTUALITY_virtual = 1 COMMA + DW_VIRTUALITY_pure_virtual = 2 +IF_NOT_ASM(};) + +/* Case sensitivity. */ +ENUM(dwarf_id_case) + + DW_ID_case_sensitive = 0 COMMA + DW_ID_up_case = 1 COMMA + DW_ID_down_case = 2 COMMA + DW_ID_case_insensitive = 3 +IF_NOT_ASM(};) + +/* Calling convention. */ +ENUM(dwarf_calling_convention) + + DW_CC_normal = 0x1 COMMA + DW_CC_program = 0x2 COMMA + DW_CC_nocall = 0x3 +IF_NOT_ASM(};) + +#define DW_CC_lo_user 0x40 +#define DW_CC_hi_user 0xff + +/* Inline attribute. */ +ENUM(dwarf_inline_attribute) + + DW_INL_not_inlined = 0 COMMA + DW_INL_inlined = 1 COMMA + DW_INL_declared_not_inlined = 2 COMMA + DW_INL_declared_inlined = 3 +IF_NOT_ASM(};) + +/* Discriminant lists. */ +ENUM(dwarf_discrim_list) + + DW_DSC_label = 0 COMMA + DW_DSC_range = 1 +IF_NOT_ASM(};) + +/* Line number opcodes. */ +ENUM(dwarf_line_number_ops) + + DW_LNS_extended_op = 0 COMMA + DW_LNS_copy = 1 COMMA + DW_LNS_advance_pc = 2 COMMA + DW_LNS_advance_line = 3 COMMA + DW_LNS_set_file = 4 COMMA + DW_LNS_set_column = 5 COMMA + DW_LNS_negate_stmt = 6 COMMA + DW_LNS_set_basic_block = 7 COMMA + DW_LNS_const_add_pc = 8 COMMA + DW_LNS_fixed_advance_pc = 9 COMMA + /* DWARF 3. */ + DW_LNS_set_prologue_end = 10 COMMA + DW_LNS_set_epilogue_begin = 11 COMMA + DW_LNS_set_isa = 12 +IF_NOT_ASM(};) + +/* Line number extended opcodes. */ +ENUM(dwarf_line_number_x_ops) + + DW_LNE_end_sequence = 1 COMMA + DW_LNE_set_address = 2 COMMA + DW_LNE_define_file = 3 +IF_NOT_ASM(};) + +/* Call frame information. */ +ENUM(dwarf_call_frame_info) + + DW_CFA_advance_loc = 0x40 COMMA + DW_CFA_offset = 0x80 COMMA + DW_CFA_restore = 0xc0 COMMA + DW_CFA_nop = 0x00 COMMA + DW_CFA_set_loc = 0x01 COMMA + DW_CFA_advance_loc1 = 0x02 COMMA + DW_CFA_advance_loc2 = 0x03 COMMA + DW_CFA_advance_loc4 = 0x04 COMMA + DW_CFA_offset_extended = 0x05 COMMA + DW_CFA_restore_extended = 0x06 COMMA + DW_CFA_undefined = 0x07 COMMA + DW_CFA_same_value = 0x08 COMMA + DW_CFA_register = 0x09 COMMA + DW_CFA_remember_state = 0x0a COMMA + DW_CFA_restore_state = 0x0b COMMA + DW_CFA_def_cfa = 0x0c COMMA + DW_CFA_def_cfa_register = 0x0d COMMA + DW_CFA_def_cfa_offset = 0x0e COMMA + + /* DWARF 3. */ + DW_CFA_def_cfa_expression = 0x0f COMMA + DW_CFA_expression = 0x10 COMMA + DW_CFA_offset_extended_sf = 0x11 COMMA + DW_CFA_def_cfa_sf = 0x12 COMMA + DW_CFA_def_cfa_offset_sf = 0x13 COMMA + + /* SGI/MIPS specific. */ + DW_CFA_MIPS_advance_loc8 = 0x1d COMMA + + /* GNU extensions. */ + DW_CFA_GNU_window_save = 0x2d COMMA + DW_CFA_GNU_args_size = 0x2e COMMA + DW_CFA_GNU_negative_offset_extended = 0x2f +IF_NOT_ASM(};) + +#define DW_CIE_ID 0xffffffff +#define DW_CIE_VERSION 1 + +#define DW_CFA_extended 0 +#define DW_CFA_lo_user 0x1c +#define DW_CFA_hi_user 0x3f + +#define DW_CHILDREN_no 0x00 +#define DW_CHILDREN_yes 0x01 + +#define DW_ADDR_none 0 + +/* Source language names and codes. */ +ENUM(dwarf_source_language) + + DW_LANG_C89 = 0x0001 COMMA + DW_LANG_C = 0x0002 COMMA + DW_LANG_Ada83 = 0x0003 COMMA + DW_LANG_C_plus_plus = 0x0004 COMMA + DW_LANG_Cobol74 = 0x0005 COMMA + DW_LANG_Cobol85 = 0x0006 COMMA + DW_LANG_Fortran77 = 0x0007 COMMA + DW_LANG_Fortran90 = 0x0008 COMMA + DW_LANG_Pascal83 = 0x0009 COMMA + DW_LANG_Modula2 = 0x000a COMMA + DW_LANG_Java = 0x000b COMMA + /* DWARF 3. */ + DW_LANG_C99 = 0x000c COMMA + DW_LANG_Ada95 = 0x000d COMMA + DW_LANG_Fortran95 = 0x000e COMMA + /* MIPS. */ + DW_LANG_Mips_Assembler = 0x8001 COMMA + /* UPC. */ + DW_LANG_Upc = 0x8765 +IF_NOT_ASM(};) + +#define DW_LANG_lo_user 0x8000 /* Implementation-defined range start. */ +#define DW_LANG_hi_user 0xffff /* Implementation-defined range start. */ + +/* Names and codes for macro information. */ +ENUM(dwarf_macinfo_record_type) + + DW_MACINFO_define = 1 COMMA + DW_MACINFO_undef = 2 COMMA + DW_MACINFO_start_file = 3 COMMA + DW_MACINFO_end_file = 4 COMMA + DW_MACINFO_vendor_ext = 255 +IF_NOT_ASM(};) + +/* @@@ For use with GNU frame unwind information. */ + +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_omit 0xff + +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0A +#define DW_EH_PE_sdata4 0x0B +#define DW_EH_PE_sdata8 0x0C +#define DW_EH_PE_signed 0x08 + +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 + +#define DW_EH_PE_indirect 0x80 + +#endif /* _ELF_DWARF2_H */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/linux/dwarf2-lang.h 2004-04-18 22:25:30.346217152 -0700 @@ -0,0 +1,132 @@ +#ifndef DWARF2_LANG +#define DWARF2_LANG +#include + +/* + * This is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2, or (at your option) any later + * version. + */ +/* + * This file defines macros that allow generation of DWARF debug records + * for asm files. This file is platform independent. Register numbers + * (which are about the only thing that is platform dependent) are to be + * supplied by a platform defined file. + */ +#define DWARF_preamble() .section .debug_frame,"",@progbits +/* + * This macro starts a debug frame section. The debug_frame describes + * where to find the registers that the enclosing function saved on + * entry. + * + * ORD is use by the label generator and should be the same as what is + * passed to CFI_postamble. + * + * pc, pc register gdb ordinal. + * + * code_align this is the factor used to define locations or regions + * where the given definitions apply. If you use labels to define these + * this should be 1. + * + * data_align this is the factor used to define register offsets. If + * you use struct offset, this should be the size of the register in + * bytes or the negative of that. This is how it is used: you will + * define a register as the reference register, say the stack pointer, + * then you will say where a register is located relative to this + * reference registers value, say 40 for register 3 (the gdb register + * number). The <40> will be multiplied by to define the + * byte offset of the given register (3, in this example). So if your + * <40> is the byte offset and the reference register points at the + * begining, you would want 1 for the data_offset. If <40> was the 40th + * 4-byte element in that structure you would want 4. And if your + * reference register points at the end of the structure you would want + * a negative data_align value(and you would have to do other math as + * well). + */ + +#define CFI_preamble(ORD, pc, code_align, data_align) \ +.section .debug_frame,"",@progbits ; \ +frame/**/_/**/ORD: \ + .long end/**/_/**/ORD-start/**/_/**/ORD; \ +start/**/_/**/ORD: \ + .long DW_CIE_ID; \ + .byte DW_CIE_VERSION; \ + .byte 0 ; \ + .uleb128 code_align; \ + .sleb128 data_align; \ + .byte pc; + +/* + * After the above macro and prior to the CFI_postamble, you need to + * define the initial state. This starts with defining the reference + * register and, usually the pc. Here are some helper macros: + */ + +#define CFA_define_reference(reg, offset) \ + .byte DW_CFA_def_cfa; \ + .uleb128 reg; \ + .uleb128 (offset); + +#define CFA_define_offset(reg, offset) \ + .byte (DW_CFA_offset + reg); \ + .uleb128 (offset); + +#define CFI_postamble(ORD) \ + .align 4; \ +end/**/_/**/ORD: +/* + * So now your code pushs stuff on the stack, you need a new location + * and the rules for what to do. This starts a running description of + * the call frame. You need to describe what changes with respect to + * the call registers as the location of the pc moves through the code. + * The following builds an FDE (fram descriptor entry?). Like the + * above, it has a preamble and a postamble. It also is tied to the CFI + * above. + * The first entry after the preamble must be the location in the code + * that the call frame is being described for. + */ +#define FDE_preamble(ORD, fde_no, initial_address, length) \ + .long FDE_end/**/_/**/fde_no-FDE_start/**/_/**/fde_no; \ +FDE_start/**/_/**/fde_no: \ + .long frame/**/_/**/ORD; \ + .long initial_address; \ + .long length; + +#define FDE_postamble(fde_no) \ + .align 4; \ +FDE_end/**/_/**/fde_no: +/* + * That done, you can now add registers, subtract registers, move the + * reference and even change the reference. You can also define a new + * area of code the info applies to. For discontinuous bits you should + * start a new FDE. You may have as many as you like. + */ + +/* + * To advance the address by + */ + +#define FDE_advance(bytes) \ + .byte DW_CFA_advance_loc4 \ + .long bytes + + + +/* + * With the above you can define all the register locations. But + * suppose the reference register moves... Takes the new offset NOT an + * increment. This is how esp is tracked if it is not saved. + */ + +#define CFA_define_cfa_offset(offset) \ + .byte $DW_CFA_def_cfa_offset; \ + .uleb128 (offset); +/* + * Or suppose you want to use a different reference register... + */ +#define CFA_define_cfa_register(reg) \ + .byte DW_CFA_def_cfa_register; \ + .uleb128 reg; + +#endif --- linux-2.6.6-rc1/include/linux/etherdevice.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/linux/etherdevice.h 2004-04-18 22:25:24.976033544 -0700 @@ -25,6 +25,7 @@ #define _LINUX_ETHERDEVICE_H #include +#include #ifdef __KERNEL__ extern int eth_header(struct sk_buff *skb, struct net_device *dev, @@ -40,7 +41,9 @@ extern int eth_header_parse(struct sk_b unsigned char *haddr); extern struct net_device *alloc_etherdev(int sizeof_priv); -static inline void eth_copy_and_sum (struct sk_buff *dest, unsigned char *src, int len, int base) +static inline void eth_copy_and_sum (struct sk_buff *dest, + const unsigned char *src, + int len, int base) { memcpy (dest->data, src, len); } @@ -55,13 +58,26 @@ static inline void eth_copy_and_sum (str * * Return true if the address is valid. */ -static inline int is_valid_ether_addr( u8 *addr ) +static inline int is_valid_ether_addr( const u8 *addr ) { const char zaddr[6] = {0,}; return !(addr[0]&1) && memcmp( addr, zaddr, 6); } +/** + * random_ether_addr - Generate software assigned random Ethernet address + * @addr: Pointer to a six-byte array containing the Ethernet address + * + * Generate a random Ethernet address (MAC) that is not multicast + * and has the local assigned bit set. + */ +static inline void random_ether_addr(u8 *addr) +{ + get_random_bytes (addr, ETH_ALEN); + addr [0] &= 0xfe; /* clear multicast bit */ + addr [0] |= 0x02; /* set local assignment bit (IEEE802) */ +} #endif #endif /* _LINUX_ETHERDEVICE_H */ --- linux-2.6.6-rc1/include/linux/ext3_fs.h 2003-08-08 22:55:14.000000000 -0700 +++ 25/include/linux/ext3_fs.h 2004-04-18 22:25:53.421709144 -0700 @@ -33,11 +33,11 @@ struct statfs; #undef EXT3FS_DEBUG /* - * Define EXT3_PREALLOCATE to preallocate data blocks for expanding files + * Define EXT3_RESERVATION to reserve data blocks for expanding files */ -#undef EXT3_PREALLOCATE /* @@@ Fix this! */ -#define EXT3_DEFAULT_PREALLOC_BLOCKS 8 - +#define EXT3_RESERVATION +#define EXT3_DEFAULT_RESERVE_BLOCKS 8 +#define EXT3_MAX_RESERVE_BLOCKS 1024 /* * Always enable hashed directories */ @@ -208,6 +208,10 @@ struct ext3_group_desc #ifdef CONFIG_JBD_DEBUG #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) #endif +#ifdef EXT3_RESERVATION +#define EXT3_IOC_GETRSVSZ _IOR('r', 1, long) +#define EXT3_IOC_SETRSVSZ _IOW('r', 2, long) +#endif /* * Structure of an inode on the disk @@ -306,24 +310,25 @@ struct ext3_inode { /* * Mount flags */ -#define EXT3_MOUNT_CHECK 0x0001 /* Do mount-time checks */ -#define EXT3_MOUNT_OLDALLOC 0x0002 /* Don't use the new Orlov allocator */ -#define EXT3_MOUNT_GRPID 0x0004 /* Create files with directory's group */ -#define EXT3_MOUNT_DEBUG 0x0008 /* Some debugging messages */ -#define EXT3_MOUNT_ERRORS_CONT 0x0010 /* Continue on errors */ -#define EXT3_MOUNT_ERRORS_RO 0x0020 /* Remount fs ro on errors */ -#define EXT3_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ -#define EXT3_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ -#define EXT3_MOUNT_NOLOAD 0x0100 /* Don't use existing journal*/ -#define EXT3_MOUNT_ABORT 0x0200 /* Fatal error detected */ -#define EXT3_MOUNT_DATA_FLAGS 0x0C00 /* Mode for data writes: */ - #define EXT3_MOUNT_JOURNAL_DATA 0x0400 /* Write data to journal */ - #define EXT3_MOUNT_ORDERED_DATA 0x0800 /* Flush data before commit */ - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ -#define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ -#define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ -#define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ +#define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */ +#define EXT3_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ +#define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */ +#define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */ +#define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ +#define EXT3_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ +#define EXT3_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ +#define EXT3_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ +#define EXT3_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ +#define EXT3_MOUNT_ABORT 0x00200 /* Fatal error detected */ +#define EXT3_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ +#define EXT3_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ +#define EXT3_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ +#define EXT3_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ +#define EXT3_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ +#define EXT3_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ +#define EXT3_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ +#define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ +#define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H @@ -680,8 +685,7 @@ struct dir_private_info { /* balloc.c */ extern int ext3_bg_has_super(struct super_block *sb, int group); extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); -extern int ext3_new_block (handle_t *, struct inode *, unsigned long, - __u32 *, __u32 *, int *); +extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, unsigned long); extern unsigned long ext3_count_free_blocks (struct super_block *); @@ -728,6 +732,7 @@ extern void ext3_put_inode (struct inode extern void ext3_delete_inode (struct inode *); extern int ext3_sync_inode (handle_t *, struct inode *); extern void ext3_discard_prealloc (struct inode *); +extern void ext3_discard_reservation (struct inode *); extern void ext3_dirty_inode(struct inode *); extern int ext3_change_inode_journal_flag(struct inode *, int); extern void ext3_truncate (struct inode *); --- linux-2.6.6-rc1/include/linux/ext3_fs_i.h 2003-10-08 15:07:10.000000000 -0700 +++ 25/include/linux/ext3_fs_i.h 2004-04-18 22:25:53.621678744 -0700 @@ -18,8 +18,16 @@ #include +struct reserve_window { + struct list_head rsv_list; + __u32 rsv_start; + __u32 rsv_end; + atomic_t rsv_goal_size; + __u32 rsv_alloc_hit; +}; + /* - * second extended file system inode data in memory + * third extended file system inode data in memory */ struct ext3_inode_info { __u32 i_data[15]; @@ -57,10 +65,9 @@ struct ext3_inode_info { * allocation when we detect linearly ascending requests. */ __u32 i_next_alloc_goal; -#ifdef EXT3_PREALLOCATE - __u32 i_prealloc_block; - __u32 i_prealloc_count; -#endif + /* block reservation window */ + struct reserve_window i_rsv_window; + __u32 i_dir_start_lookup; #ifdef CONFIG_EXT3_FS_XATTR /* --- linux-2.6.6-rc1/include/linux/ext3_fs_sb.h 2004-02-03 20:42:38.000000000 -0800 +++ 25/include/linux/ext3_fs_sb.h 2004-04-18 22:25:53.182745472 -0700 @@ -59,6 +59,10 @@ struct ext3_sb_info { struct percpu_counter s_dirs_counter; struct blockgroup_lock s_blockgroup_lock; + /* head of the per fs reservation window tree */ + spinlock_t s_rsv_window_lock; + struct reserve_window s_rsv_window_head; + /* Journaling */ struct inode * s_journal_inode; struct journal_s * s_journal; @@ -69,6 +73,10 @@ struct ext3_sb_info { struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ #endif +#ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ +#endif }; #endif /* _LINUX_EXT3_FS_SB */ --- linux-2.6.6-rc1/include/linux/ext3_jbd.h 2004-02-03 20:42:38.000000000 -0800 +++ 25/include/linux/ext3_jbd.h 2004-04-18 22:25:24.977033392 -0700 @@ -42,8 +42,9 @@ * superblock only gets updated once, of course, so don't bother * counting that again for the quota updates. */ -#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ - EXT3_XATTR_TRANS_BLOCKS - 2) +#define EXT3_DATA_TRANS_BLOCKS (EXT3_SINGLEDATA_TRANS_BLOCKS + \ + EXT3_XATTR_TRANS_BLOCKS - 2 + \ + 2*EXT3_QUOTA_TRANS_BLOCKS) extern int ext3_writepage_trans_blocks(struct inode *inode); @@ -72,6 +73,19 @@ extern int ext3_writepage_trans_blocks(s #define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 +#ifdef CONFIG_QUOTA +/* Amount of blocks needed for quota update - we know that the structure was + * allocated so we need to update only inode+data */ +#define EXT3_QUOTA_TRANS_BLOCKS 2 +/* Amount of blocks needed for quota insert/delete - we do some block writes + * but inode, sb and group updates are done only once */ +#define EXT3_QUOTA_INIT_BLOCKS (DQUOT_MAX_WRITES*\ + (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3) +#else +#define EXT3_QUOTA_TRANS_BLOCKS 0 +#define EXT3_QUOTA_INIT_BLOCKS 0 +#endif + int ext3_mark_iloc_dirty(handle_t *handle, struct inode *inode, --- linux-2.6.6-rc1/include/linux/fs.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/fs.h 2004-04-18 22:26:00.422644840 -0700 @@ -317,7 +317,7 @@ struct address_space_operations { sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); - int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, + ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); }; @@ -345,6 +345,7 @@ struct block_device { struct inode * bd_inode; /* will die */ int bd_openers; struct semaphore bd_sem; /* open/close mutex */ + struct semaphore bd_mount_sem; /* mount mutex */ struct list_head bd_inodes; void * bd_holder; int bd_holders; @@ -365,7 +366,7 @@ struct block_device { }; /* - * Radix-tre tags, for tagging dirty and writeback pages within the pagecache + * Radix-tree tags, for tagging dirty and writeback pages within the pagecache * radix trees */ #define PAGECACHE_TAG_DIRTY 0 @@ -407,6 +408,7 @@ static inline int mapping_writably_mappe struct inode { struct hlist_node i_hash; struct list_head i_list; + struct list_head i_sb_list; struct list_head i_dentry; unsigned long i_ino; atomic_t i_count; @@ -740,6 +742,7 @@ struct super_block { atomic_t s_active; void *s_security; + struct list_head s_inodes; /* all inodes */ struct list_head s_dirty; /* dirty inodes */ struct list_head s_io; /* parked for writeback */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ @@ -749,9 +752,11 @@ struct super_block { struct list_head s_instances; struct quota_info s_dquot; /* Diskquota specific options */ + int s_frozen; + wait_queue_head_t s_wait_unfrozen; + char s_id[32]; /* Informational name */ - struct kobject kobj; /* anchor for sysfs */ void *s_fs_info; /* Filesystem private info */ /* @@ -762,6 +767,18 @@ struct super_block { }; /* + * Snapshotting support. + */ +enum { + SB_UNFROZEN = 0, + SB_FREEZE_WRITE = 1, + SB_FREEZE_TRANS = 2, +}; + +#define vfs_check_frozen(sb, level) \ + wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) + +/* * Superblock locking. */ static inline void lock_super(struct super_block * sb) @@ -1359,7 +1376,7 @@ extern struct file * get_empty_filp(void extern void file_move(struct file *f, struct list_head *list); extern void file_kill(struct file *f); struct bio; -extern int submit_bio(int, struct bio *); +extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); extern int set_blocksize(struct block_device *, int); extern int sb_set_blocksize(struct super_block *, int); @@ -1410,7 +1427,7 @@ static inline void do_generic_file_read( actor); } -int __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, +ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, int needs_special_locking); @@ -1418,7 +1435,7 @@ int __blockdev_direct_IO(int rw, struct /* * For filesystems which need locking between buffered and direct access */ -static inline int blockdev_direct_IO(int rw, struct kiocb *iocb, +static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io) @@ -1427,7 +1444,7 @@ static inline int blockdev_direct_IO(int nr_segs, get_blocks, end_io, 1); } -static inline int blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, +static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io) --- linux-2.6.6-rc1/include/linux/gfp.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/gfp.h 2004-04-18 22:25:48.851403936 -0700 @@ -4,6 +4,8 @@ #include #include #include +#include + /* * GFP bitmasks.. */ @@ -69,19 +71,43 @@ * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets * optimized to &contig_page_data at compile-time. */ -extern struct page * FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *)); -static inline struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order) +extern struct page * +FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *)); + +static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask, + unsigned int order) +{ + if (unlikely(order >= MAX_ORDER)) + return NULL; + + return __alloc_pages(gfp_mask, order, + NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK)); +} + +extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order); +struct vm_area_struct; + +#ifdef CONFIG_NUMA +static inline struct page * +alloc_pages(unsigned int gfp_mask, unsigned int order) { if (unlikely(order >= MAX_ORDER)) return NULL; - return __alloc_pages(gfp_mask, order, NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK)); + return alloc_pages_current(gfp_mask, order); } +extern struct page *__alloc_page_vma(unsigned gfp_mask, + struct vm_area_struct *vma, unsigned long off); +extern struct page *alloc_page_vma(unsigned gfp_mask, + struct vm_area_struct *vma, unsigned long addr); +#else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_page(gfp_mask) \ - alloc_pages_node(numa_node_id(), gfp_mask, 0) +#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0) +#define __alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0) +#endif +#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order)); extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask)); --- linux-2.6.6-rc1/include/linux/hugetlb.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/hugetlb.h 2004-04-18 22:25:59.348808088 -0700 @@ -3,6 +3,8 @@ #ifdef CONFIG_HUGETLB_PAGE +#include + struct ctl_table; static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) @@ -20,10 +22,8 @@ void huge_page_release(struct page *); int hugetlb_report_meminfo(char *); int is_hugepage_mem_enough(size_t); unsigned long hugetlb_total_pages(void); -struct page *follow_huge_addr(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, int write); -struct vm_area_struct *hugepage_vma(struct mm_struct *mm, - unsigned long address); +struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write); int is_aligned_hugepage_range(unsigned long addr, unsigned long len); @@ -65,7 +65,7 @@ static inline unsigned long hugetlb_tota } #define follow_hugetlb_page(m,v,p,vs,a,b,i) ({ BUG(); 0; }) -#define follow_huge_addr(mm, vma, addr, write) 0 +#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) #define zap_hugepage_range(vma, start, len) BUG() @@ -73,7 +73,6 @@ static inline unsigned long hugetlb_tota #define huge_page_release(page) BUG() #define is_hugepage_mem_enough(size) 0 #define hugetlb_report_meminfo(buf) 0 -#define hugepage_vma(mm, addr) 0 #define mark_mm_hugetlb(mm, vma) do { } while (0) #define follow_huge_pmd(mm, addr, pmd, write) 0 #define is_aligned_hugepage_range(addr, len) 0 @@ -108,6 +107,17 @@ struct hugetlbfs_sb_info { spinlock_t stat_lock; }; + +struct hugetlbfs_inode_info { + struct shared_policy policy; + struct inode vfs_inode; +}; + +static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) +{ + return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); +} + static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) { return sb->s_fs_info; --- linux-2.6.6-rc1/include/linux/icmpv6.h 2003-06-14 12:18:04.000000000 -0700 +++ 25/include/linux/icmpv6.h 2004-04-18 22:25:24.979033088 -0700 @@ -95,8 +95,7 @@ struct icmp6hdr { #define MLD2_ALLOW_NEW_SOURCES 5 #define MLD2_BLOCK_OLD_SOURCES 6 -/* this must be an IANA-assigned value; 206 for testing only */ -#define ICMPV6_MLD2_REPORT 206 +#define ICMPV6_MLD2_REPORT 143 #define MLD2_ALL_MCR_INIT { { { 0xff,0x02,0,0,0,0,0,0,0,0,0,0,0,0,0,0x16 } } } /* --- linux-2.6.6-rc1/include/linux/ide.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/ide.h 2004-04-18 22:26:01.789437056 -0700 @@ -293,8 +293,49 @@ void ide_setup_ports( hw_regs_t *hw, #endif int irq); +static inline void ide_std_init_ports(hw_regs_t *hw, + unsigned long io_addr, + unsigned long ctl_addr) +{ + unsigned int i; + + for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) + hw->io_ports[i] = io_addr++; + + hw->io_ports[IDE_CONTROL_OFFSET] = ctl_addr; +} + #include +/* + * ide_init_hwif_ports() is OBSOLETE and will be removed in 2.7 series. + * + * arm26, arm, h8300, m68k, m68knommu and i386-pc9800 still have own versions. + */ +#if !defined(CONFIG_ARM) && !defined(CONFIG_H8300) && !defined(CONFIG_M68K) && \ + !defined(CONFIG_M68KNOMMU) && !defined(CONFIG_X86_PC9800) +static inline void ide_init_hwif_ports(hw_regs_t *hw, + unsigned long io_addr, + unsigned long ctl_addr, + int *irq) +{ + if (!ctl_addr) + ide_std_init_ports(hw, io_addr, io_addr + 0x206); + else + ide_std_init_ports(hw, io_addr, ctl_addr); + + if (irq) + *irq = 0; + + hw->io_ports[IDE_IRQ_OFFSET] = 0; + +#ifdef CONFIG_PPC32 + if (ppc_ide_md.ide_init_hwif) + ppc_ide_md.ide_init_hwif(hw, io_addr, ctl_addr, irq); +#endif +} +#endif /* !ARM && !H8300 && !M68K && !M68KNOMMU && !X86_PC9800 */ + /* Currently only m68k, apus and m8xx need it */ #ifndef IDE_ARCH_ACK_INTR # define ide_ack_intr(hwif) (1) @@ -964,6 +1005,7 @@ typedef struct hwif_s { unsigned dma; void (*led_act)(void *data, int rw); + unsigned int (*max_rqsize)(ide_drive_t *); } ide_hwif_t; /* --- linux-2.6.6-rc1/include/linux/idr.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/linux/idr.h 2004-04-18 22:25:44.487067416 -0700 @@ -23,6 +23,7 @@ # error "BITS_PER_LONG is not 32 or 64" #endif +#define IDR_SIZE (1 << IDR_BITS) #define IDR_MASK ((1 << IDR_BITS)-1) /* Define the size of the id's */ @@ -53,6 +54,17 @@ struct idr { spinlock_t lock; }; +#define IDR_INIT(name) \ +{ \ + .top = NULL, \ + .id_free = NULL, \ + .count = 0, \ + .layers = 0, \ + .id_free_cnt = 0, \ + .lock = SPIN_LOCK_UNLOCKED, \ +} +#define DEFINE_IDR(name) struct idr name = IDR_INIT(name) + /* * This is what we export. */ --- linux-2.6.6-rc1/include/linux/if_bridge.h 2003-06-14 12:18:21.000000000 -0700 +++ 25/include/linux/if_bridge.h 2004-04-18 22:25:24.980032936 -0700 @@ -98,9 +98,6 @@ struct __fdb_entry #include -struct net_bridge; -struct net_bridge_port; - extern void brioctl_set(int (*ioctl_hook)(unsigned long)); extern int (*br_handle_frame_hook)(struct sk_buff *skb); extern int (*br_should_route_hook)(struct sk_buff **pskb); --- linux-2.6.6-rc1/include/linux/if.h 2003-06-14 12:18:09.000000000 -0700 +++ 25/include/linux/if.h 2004-04-18 22:25:24.979033088 -0700 @@ -144,7 +144,7 @@ struct ifreq struct ifmap ifru_map; char ifru_slave[IFNAMSIZ]; /* Just fits the size */ char ifru_newname[IFNAMSIZ]; - char * ifru_data; + char __user * ifru_data; struct if_settings ifru_settings; } ifr_ifru; }; --- linux-2.6.6-rc1/include/linux/init.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/init.h 2004-04-18 22:25:54.260581616 -0700 @@ -3,6 +3,7 @@ #include #include +#include /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) @@ -46,8 +47,6 @@ #define __exitdata __attribute__ ((__section__(".exit.data"))) #define __exit_call __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) -#define __sched __attribute__((__section__(".sched.text"))) - #ifdef MODULE #define __exit __attribute__ ((__section__(".exit.text"))) #else @@ -68,6 +67,9 @@ typedef void (*exitcall_t)(void); extern initcall_t __con_initcall_start, __con_initcall_end; extern initcall_t __security_initcall_start, __security_initcall_end; + +/* Defined in init/main.c */ +extern char saved_command_line[COMMAND_LINE_SIZE]; #endif #ifndef MODULE @@ -109,25 +111,33 @@ extern initcall_t __security_initcall_st struct obs_kernel_param { const char *str; int (*setup_func)(char *); + int early; }; -/* OBSOLETE: see moduleparam.h for the right way. */ -#define __setup_param(str, unique_id, fn) \ +/* Only for really core code. See moduleparam.h for the normal way. */ +#define __setup_param(str, unique_id, fn, early) \ static char __setup_str_##unique_id[] __initdata = str; \ static struct obs_kernel_param __setup_##unique_id \ __attribute_used__ \ __attribute__((__section__(".init.setup"))) \ - = { __setup_str_##unique_id, fn } + = { __setup_str_##unique_id, fn, early } #define __setup_null_param(str, unique_id) \ - __setup_param(str, unique_id, NULL) + __setup_param(str, unique_id, NULL, 0) #define __setup(str, fn) \ - __setup_param(str, fn, fn) + __setup_param(str, fn, fn, 0) #define __obsolete_setup(str) \ __setup_null_param(str, __LINE__) +/* NOTE: fn is as per module_param, not __setup! Emits warning if fn + * returns non-zero. */ +#define early_param(str, fn) \ + __setup_param(str, fn, fn, 1) + +/* Relies on saved_command_line being set */ +void __init parse_early_param(void); #endif /* __ASSEMBLY__ */ /** --- linux-2.6.6-rc1/include/linux/kmalloc_sizes.h 2003-06-14 12:18:30.000000000 -0700 +++ 25/include/linux/kmalloc_sizes.h 2004-04-18 22:25:43.581205128 -0700 @@ -12,6 +12,9 @@ CACHE(256) CACHE(512) CACHE(1024) +#if (PAGE_SIZE != 4096) /* special cache for eth skbs - 5 fit into one 8 kB page */ + CACHE(1620) +#endif CACHE(2048) CACHE(4096) CACHE(8192) --- linux-2.6.6-rc1/include/linux/list.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/list.h 2004-04-18 22:25:47.626590136 -0700 @@ -158,8 +158,11 @@ static inline void __list_del(struct lis * Note: list_empty on entry does not return true after this, the entry is * in an undefined state. */ +#include /* BUG_ON */ static inline void list_del(struct list_head *entry) { + BUG_ON(entry->prev->next != entry); + BUG_ON(entry->next->prev != entry); __list_del(entry->prev, entry->next); entry->next = LIST_POISON1; entry->prev = LIST_POISON2; --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/linux/lockmeter.h 2004-04-18 22:25:47.926544536 -0700 @@ -0,0 +1,320 @@ +/* + * Copyright (C) 1999-2002 Silicon Graphics, Inc. + * + * Written by John Hawkes (hawkes@sgi.com) + * Based on klstat.h by Jack Steiner (steiner@sgi.com) + * + * Modified by Ray Bryant (raybry@us.ibm.com) Feb-Apr 2000 + * Changes Copyright (C) 2000 IBM, Inc. + * Added save of index in spinlock_t to improve efficiency + * of "hold" time reporting for spinlocks + * Added support for hold time statistics for read and write + * locks. + * Moved machine dependent code to include/asm/lockmeter.h. + * + */ + +#ifndef _LINUX_LOCKMETER_H +#define _LINUX_LOCKMETER_H + + +/*--------------------------------------------------- + * architecture-independent lockmeter.h + *-------------------------------------------------*/ + +/* + * raybry -- version 2: added efficient hold time statistics + * requires lstat recompile, so flagged as new version + * raybry -- version 3: added global reader lock data + * hawkes -- version 4: removed some unnecessary fields to simplify mips64 port + */ +#define LSTAT_VERSION 5 + +int lstat_update(void*, void*, int); +int lstat_update_time(void*, void*, int, uint32_t); + +/* + * Currently, the mips64 and sparc64 kernels talk to a 32-bit lockstat, so we + * need to force compatibility in the inter-communication data structure. + */ + +#if defined(CONFIG_MIPS32_COMPAT) +#define TIME_T uint32_t +#elif defined(CONFIG_SPARC) || defined(CONFIG_SPARC64) +#define TIME_T uint64_t +#else +#define TIME_T time_t +#endif + +#if defined(__KERNEL__) || (!defined(CONFIG_MIPS32_COMPAT) && !defined(CONFIG_SPARC) && !defined(CONFIG_SPARC64)) || (_MIPS_SZLONG==32) +#define POINTER void * +#else +#define POINTER int64_t +#endif + +/* + * Values for the "action" parameter passed to lstat_update. + * ZZZ - do we want a try-success status here??? + */ +#define LSTAT_ACT_NO_WAIT 0 +#define LSTAT_ACT_SPIN 1 +#define LSTAT_ACT_REJECT 2 +#define LSTAT_ACT_WW_SPIN 3 +#define LSTAT_ACT_SLEPT 4 /* UNUSED */ + +#define LSTAT_ACT_MAX_VALUES 4 /* NOTE: Increase to 5 if use ACT_SLEPT */ + +/* + * Special values for the low 2 bits of an RA passed to + * lstat_update. + */ +/* we use these values to figure out what kind of lock data */ +/* is stored in the statistics table entry at index ....... */ +#define LSTAT_RA_SPIN 0 /* spin lock data */ +#define LSTAT_RA_READ 1 /* read lock statistics */ +#define LSTAT_RA_SEMA 2 /* RESERVED */ +#define LSTAT_RA_WRITE 3 /* write lock statistics*/ + +#define LSTAT_RA(n) \ + ((void*)( ((unsigned long)__builtin_return_address(0) & ~3) | n) ) + +/* + * Constants used for lock addresses in the lstat_directory + * to indicate special values of the lock address. + */ +#define LSTAT_MULTI_LOCK_ADDRESS NULL + +/* + * Maximum size of the lockstats tables. Increase this value + * if its not big enough. (Nothing bad happens if its not + * big enough although some locks will not be monitored.) + * We record overflows of this quantity in lstat_control.dir_overflows + * + * Note: The max value here must fit into the field set + * and obtained by the macro's PUT_INDEX() and GET_INDEX(). + * This value depends on how many bits are available in the + * lock word in the particular machine implementation we are on. + */ +#define LSTAT_MAX_STAT_INDEX 2000 + +/* + * Size and mask for the hash table into the directory. + */ +#define LSTAT_HASH_TABLE_SIZE 4096 /* must be 2**N */ +#define LSTAT_HASH_TABLE_MASK (LSTAT_HASH_TABLE_SIZE-1) + +#define DIRHASH(ra) ((unsigned long)(ra)>>2 & LSTAT_HASH_TABLE_MASK) + +/* + * This defines an entry in the lockstat directory. It contains + * information about a lock being monitored. + * A directory entry only contains the lock identification - + * counts on usage of the lock are kept elsewhere in a per-cpu + * data structure to minimize cache line pinging. + */ +typedef struct { + POINTER caller_ra; /* RA of code that set lock */ + POINTER lock_ptr; /* lock address */ + ushort next_stat_index; /* Used to link multiple locks that have the same hash table value */ +} lstat_directory_entry_t; + +/* + * A multi-dimensioned array used to contain counts for lock accesses. + * The array is 3-dimensional: + * - CPU number. Keep from thrashing cache lines between CPUs + * - Directory entry index. Identifies the lock + * - Action. Indicates what kind of contention occurred on an + * access to the lock. + * + * The index of an entry in the directory is the same as the 2nd index + * of the entry in the counts array. + */ +/* + * This table contains data for spin_locks, write locks, and read locks + * Not all data is used for all cases. In particular, the hold time + * information is not stored here for read locks since that is a global + * (e. g. cannot be separated out by return address) quantity. + * See the lstat_read_lock_counts_t structure for the global read lock + * hold time. + */ +typedef struct { + uint64_t cum_wait_ticks; /* sum of wait times */ + /* for write locks, sum of time a */ + /* writer is waiting for a reader */ + int64_t cum_hold_ticks; /* cumulative sum of holds */ + /* not used for read mode locks */ + /* must be signed. ............... */ + uint32_t max_wait_ticks; /* max waiting time */ + uint32_t max_hold_ticks; /* max holding time */ + uint64_t cum_wait_ww_ticks; /* sum times writer waits on writer*/ + uint32_t max_wait_ww_ticks; /* max wait time writer vs writer */ + /* prev 2 only used for write locks*/ + uint32_t acquire_time; /* time lock acquired this CPU */ + uint32_t count[LSTAT_ACT_MAX_VALUES]; +} lstat_lock_counts_t; + +typedef lstat_lock_counts_t lstat_cpu_counts_t[LSTAT_MAX_STAT_INDEX]; + +/* + * User request to: + * - turn statistic collection on/off, or to reset + */ +#define LSTAT_OFF 0 +#define LSTAT_ON 1 +#define LSTAT_RESET 2 +#define LSTAT_RELEASE 3 + +#define LSTAT_MAX_READ_LOCK_INDEX 1000 +typedef struct { + POINTER lock_ptr; /* address of lock for output stats */ + uint32_t read_lock_count; + int64_t cum_hold_ticks; /* sum of read lock hold times over */ + /* all callers. ....................*/ + uint32_t write_index; /* last write lock hash table index */ + uint32_t busy_periods; /* count of busy periods ended this */ + uint64_t start_busy; /* time this busy period started. ..*/ + uint64_t busy_ticks; /* sum of busy periods this lock. ..*/ + uint64_t max_busy; /* longest busy period for this lock*/ + uint32_t max_readers; /* maximum number of readers ...... */ +#ifdef USER_MODE_TESTING + rwlock_t entry_lock; /* lock for this read lock entry... */ + /* avoid having more than one rdr at*/ + /* needed for user space testing... */ + /* not needed for kernel 'cause it */ + /* is non-preemptive. ............. */ +#endif +} lstat_read_lock_counts_t; +typedef lstat_read_lock_counts_t lstat_read_lock_cpu_counts_t[LSTAT_MAX_READ_LOCK_INDEX]; + +#if defined(__KERNEL__) || defined(USER_MODE_TESTING) + +#ifndef USER_MODE_TESTING +#include +#else +#include "asm_newlockmeter.h" +#endif + +/* + * Size and mask for the hash table into the directory. + */ +#define LSTAT_HASH_TABLE_SIZE 4096 /* must be 2**N */ +#define LSTAT_HASH_TABLE_MASK (LSTAT_HASH_TABLE_SIZE-1) + +#define DIRHASH(ra) ((unsigned long)(ra)>>2 & LSTAT_HASH_TABLE_MASK) + +/* + * This version eliminates the per processor lock stack. What we do is to + * store the index of the lock hash structure in unused bits in the lock + * itself. Then on unlock we can find the statistics record without doing + * any additional hash or lock stack lookup. This works for spin_locks. + * Hold time reporting is now basically as cheap as wait time reporting + * so we ignore the difference between LSTAT_ON_HOLD and LSTAT_ON_WAIT + * as in version 1.1.* of lockmeter. + * + * For rw_locks, we store the index of a global reader stats structure in + * the lock and the writer index is stored in the latter structure. + * For read mode locks we hash at the time of the lock to find an entry + * in the directory for reader wait time and the like. + * At unlock time for read mode locks, we update just the global structure + * so we don't need to know the reader directory index value at unlock time. + * + */ + +/* + * Protocol to change lstat_control.state + * This is complicated because we don't want the cum_hold_time for + * a rw_lock to be decremented in _read_lock_ without making sure it + * is incremented in _read_lock_ and vice versa. So here is the + * way we change the state of lstat_control.state: + * I. To Turn Statistics On + * After allocating storage, set lstat_control.state non-zero. + * This works because we don't start updating statistics for in use + * locks until the reader lock count goes to zero. + * II. To Turn Statistics Off: + * (0) Disable interrupts on this CPU + * (1) Seize the lstat_control.directory_lock + * (2) Obtain the current value of lstat_control.next_free_read_lock_index + * (3) Store a zero in lstat_control.state. + * (4) Release the lstat_control.directory_lock + * (5) For each lock in the read lock list up to the saved value + * (well, -1) of the next_free_read_lock_index, do the following: + * (a) Check validity of the stored lock address + * by making sure that the word at the saved addr + * has an index that matches this entry. If not + * valid, then skip this entry. + * (b) If there is a write lock already set on this lock, + * skip to (d) below. + * (c) Set a non-metered write lock on the lock + * (d) set the cached INDEX in the lock to zero + * (e) Release the non-metered write lock. + * (6) Re-enable interrupts + * + * These rules ensure that a read lock will not have its statistics + * partially updated even though the global lock recording state has + * changed. See put_lockmeter_info() for implementation. + * + * The reason for (b) is that there may be write locks set on the + * syscall path to put_lockmeter_info() from user space. If we do + * not do this check, then we can deadlock. A similar problem would + * occur if the lock was read locked by the current CPU. At the + * moment this does not appear to happen. + */ + +/* + * Main control structure for lockstat. Used to turn statistics on/off + * and to maintain directory info. + */ +typedef struct { + int state; + spinlock_t control_lock; /* used to serialize turning statistics on/off */ + spinlock_t directory_lock; /* for serialize adding entries to directory */ + volatile int next_free_dir_index;/* next free entry in the directory */ + /* FIXME not all of these fields are used / needed .............. */ + /* the following fields represent data since */ + /* first "lstat on" or most recent "lstat reset" */ + TIME_T first_started_time; /* time when measurement first enabled */ + TIME_T started_time; /* time when measurement last started */ + TIME_T ending_time; /* time when measurement last disabled */ + uint64_t started_cycles64; /* cycles when measurement last started */ + uint64_t ending_cycles64; /* cycles when measurement last disabled */ + uint64_t enabled_cycles64; /* total cycles with measurement enabled */ + int intervals; /* number of measurement intervals recorded */ + /* i. e. number of times did lstat on;lstat off */ + lstat_directory_entry_t *dir; /* directory */ + int dir_overflow; /* count of times ran out of space in directory */ + int rwlock_overflow; /* count of times we couldn't allocate a rw block*/ + ushort *hashtab; /* hash table for quick dir scans */ + lstat_cpu_counts_t *counts[NR_CPUS]; /* Array of pointers to per-cpu stats */ + int next_free_read_lock_index; /* next rwlock reader (global) stats block */ + lstat_read_lock_cpu_counts_t *read_lock_counts[NR_CPUS]; /* per cpu read lock stats */ +} lstat_control_t; + +#endif /* defined(__KERNEL__) || defined(USER_MODE_TESTING) */ + +typedef struct { + short lstat_version; /* version of the data */ + short state; /* the current state is returned */ + int maxcpus; /* Number of cpus present */ + int next_free_dir_index; /* index of the next free directory entry */ + TIME_T first_started_time; /* when measurement enabled for first time */ + TIME_T started_time; /* time in secs since 1969 when stats last turned on */ + TIME_T ending_time; /* time in secs since 1969 when stats last turned off */ + uint32_t cycleval; /* cycles per second */ +#ifdef notyet + void *kernel_magic_addr; /* address of kernel_magic */ + void *kernel_end_addr; /* contents of kernel magic (points to "end") */ +#endif + int next_free_read_lock_index; /* index of next (global) read lock stats struct */ + uint64_t started_cycles64; /* cycles when measurement last started */ + uint64_t ending_cycles64; /* cycles when stats last turned off */ + uint64_t enabled_cycles64; /* total cycles with measurement enabled */ + int intervals; /* number of measurement intervals recorded */ + /* i.e. number of times we did lstat on;lstat off*/ + int dir_overflow; /* number of times we wanted more space in directory */ + int rwlock_overflow; /* # of times we wanted more space in read_locks_count */ + struct new_utsname uts; /* info about machine where stats are measured */ + /* -T option of lockstat allows data to be */ + /* moved to another machine. ................. */ +} lstat_user_request_t; + +#endif /* _LINUX_LOCKMETER_H */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/linux/mempolicy.h 2004-04-18 22:25:48.853403632 -0700 @@ -0,0 +1,221 @@ +#ifndef _LINUX_MEMPOLICY_H +#define _LINUX_MEMPOLICY_H 1 + +#include + +/* + * NUMA memory policies for Linux. + * Copyright 2003,2004 Andi Kleen SuSE Labs + */ + +/* Policies */ +#define MPOL_DEFAULT 0 +#define MPOL_PREFERRED 1 +#define MPOL_BIND 2 +#define MPOL_INTERLEAVE 3 + +#define MPOL_MAX MPOL_INTERLEAVE + +/* Flags for get_mem_policy */ +#define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */ +#define MPOL_F_ADDR (1<<1) /* look up vma using address */ + +/* Flags for mbind */ +#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include +#include + +struct vm_area_struct; + +#ifdef CONFIG_NUMA + +/* + * Describe a memory policy. + * + * A mempolicy can be either associated with a process or with a VMA. + * For VMA related allocations the VMA policy is preferred, otherwise + * the process policy is used. Interrupts ignore the memory policy + * of the current process. + * + * Locking policy for interlave: + * In process context there is no locking because only the process accesses + * its own state. All vma manipulation is somewhat protected by a down_read on + * mmap_sem. For allocating in the interleave policy the page_table_lock + * must be also aquired to protect il_next. + * + * Freeing policy: + * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd. + * All other policies don't have any external state. mpol_free() handles this. + * + * Copying policy objects: + * For MPOL_BIND the zonelist must be always duplicated. mpol_clone() does this. + */ +struct mempolicy { + atomic_t refcnt; + short policy; /* See MPOL_* above */ + union { + struct zonelist *zonelist; /* bind */ + short preferred_node; /* preferred */ + DECLARE_BITMAP(nodes, MAX_NUMNODES); /* interleave */ + /* undefined for default */ + } v; +}; + +/* An NULL mempolicy pointer is a synonym of &default_policy. */ +extern struct mempolicy default_policy; + +/* + * Support for managing mempolicy data objects (clone, copy, destroy) + * The default fast path of a NULL MPOL_DEFAULT policy is always inlined. + */ + +extern void __mpol_free(struct mempolicy *pol); +static inline void mpol_free(struct mempolicy *pol) +{ + if (pol) + __mpol_free(pol); +} + +extern struct mempolicy *__mpol_copy(struct mempolicy *pol); +static inline struct mempolicy *mpol_copy(struct mempolicy *pol) +{ + if (pol) + pol = __mpol_copy(pol); + return pol; +} + +#define vma_policy(vma) ((vma)->vm_policy) +#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol)) + +static inline void mpol_get(struct mempolicy *pol) +{ + if (pol) + atomic_inc(&pol->refcnt); +} + +extern int __mpol_equal(struct mempolicy *a, struct mempolicy *b); +static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) +{ + if (a == b) + return 1; + return __mpol_equal(a, b); +} +#define vma_mpol_equal(a,b) mpol_equal(vma_policy(a), vma_policy(b)) + +/* Could later add inheritance of the process policy here. */ + +#define mpol_set_vma_default(vma) ((vma)->vm_policy = NULL) + +/* + * Hugetlb policy. i386 hugetlb so far works with node numbers + * instead of zone lists, so give it special interfaces for now. + */ +extern int mpol_first_node(struct vm_area_struct *vma, unsigned long addr); +extern int mpol_node_valid(int nid, struct vm_area_struct *vma, + unsigned long addr); + +/* + * Tree of shared policies for a shared memory region. + * Maintain the policies in a pseudo mm that contains vmas. The vmas + * carry the policy. As a special twist the pseudo mm is indexed in pages, not + * bytes, so that we can work with shared memory segments bigger than + * unsigned long. + */ + +struct sp_node { + struct rb_node nd; + unsigned long start, end; + struct mempolicy *policy; +}; + +struct shared_policy { + struct rb_root root; + struct semaphore sem; +}; + +static inline void mpol_shared_policy_init(struct shared_policy *info) +{ + info->root = RB_ROOT; + init_MUTEX(&info->sem); +} + +int mpol_set_shared_policy(struct shared_policy *info, + struct vm_area_struct *vma, + struct mempolicy *new); +void mpol_free_shared_policy(struct shared_policy *p); +struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, + unsigned long idx); + +#else + +struct mempolicy {}; + +static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) +{ + return 1; +} +#define vma_mpol_equal(a,b) 1 + +#define mpol_set_vma_default(vma) do {} while(0) + +static inline void mpol_free(struct mempolicy *p) +{ +} + +static inline void mpol_get(struct mempolicy *pol) +{ +} + +static inline struct mempolicy *mpol_copy(struct mempolicy *old) +{ + return NULL; +} + +static inline int mpol_first_node(struct vm_area_struct *vma, unsigned long a) +{ + return numa_node_id(); +} + +static inline int +mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long a) +{ + return 1; +} + +struct shared_policy {}; + +static inline int mpol_set_shared_policy(struct shared_policy *info, + struct vm_area_struct *vma, + struct mempolicy *new) +{ + return -EINVAL; +} + +static inline void mpol_shared_policy_init(struct shared_policy *info) +{ +} + +static inline void mpol_free_shared_policy(struct shared_policy *p) +{ +} + +static inline struct mempolicy * +mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) +{ + return NULL; +} + +#define vma_policy(vma) NULL +#define vma_set_policy(vma, pol) do {} while(0) + +#endif /* CONFIG_NUMA */ +#endif /* __KERNEL__ */ + +#endif --- linux-2.6.6-rc1/include/linux/mm.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/mm.h 2004-04-18 22:25:49.283338272 -0700 @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ extern unsigned long max_mapnr; @@ -47,6 +48,10 @@ extern int page_cluster; * * This structure is exactly 64 bytes on ia32. Please think very, very hard * before adding anything to it. + * [Now 4 bytes more on 32bit NUMA machines. Sorry. -AK. + * But if you want to recover the 4 bytes justr remove vm_next. It is redundant + * with vm_rb. Will be a lot of editing work though. vm_rb.color is redundant + * too.] */ struct vm_area_struct { struct mm_struct * vm_mm; /* The address space we belong to. */ @@ -77,6 +82,10 @@ struct vm_area_struct { units, *not* PAGE_CACHE_SIZE */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ + +#ifdef CONFIG_NUMA + struct mempolicy *vm_policy; /* NUMA policy for the VMA */ +#endif }; /* @@ -148,10 +157,13 @@ struct vm_operations_struct { void (*close)(struct vm_area_struct * area); struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); +#ifdef CONFIG_NUMA + int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); + struct mempolicy *(*get_policy)(struct vm_area_struct *vma, + unsigned long addr); +#endif }; -/* forward declaration; pte_chain is meant to be internal to rmap.c */ -struct pte_chain; struct mmu_gather; struct inode; @@ -173,27 +185,26 @@ typedef unsigned long page_flags_t; * * The first line is data used in page cache lookup, the second line * is used for linear searches (eg. clock algorithm scans). - * - * TODO: make this structure smaller, it could be as small as 32 bytes. */ struct page { - page_flags_t flags; /* atomic flags, some possibly - updated asynchronously */ + page_flags_t flags; /* Atomic flags, some possibly + * updated asynchronously */ atomic_t count; /* Usage count, see below. */ - struct address_space *mapping; /* The inode (or ...) we belong to. */ - pgoff_t index; /* Our offset within mapping. */ - struct list_head lru; /* Pageout list, eg. active_list; - protected by zone->lru_lock !! */ - union { - struct pte_chain *chain;/* Reverse pte mapping pointer. - * protected by PG_chainlock */ - pte_addr_t direct; - } pte; + unsigned int mapcount; /* Count of ptes mapped in mms, + * to show when page is mapped + * & limit reverse map searches, + * protected by PG_maplock. + */ unsigned long private; /* Mapping-private opaque data: * usually used for buffer_heads * if PagePrivate set; used for * swp_entry_t if PageSwapCache */ + struct address_space *mapping; /* The inode (or ...) we belong to. */ + pgoff_t index; /* Our offset within mapping. */ + struct list_head lru; /* Pageout list, eg. active_list + * protected by zone->lru_lock ! + */ /* * On machines where all RAM is mapped into kernel address space, * we can simply calculate the virtual address. On machines with @@ -404,13 +415,11 @@ static inline struct address_space *page } /* - * Return true if this page is mapped into pagetables. Subtle: test pte.direct - * rather than pte.chain. Because sometimes pte.direct is 64-bit, and .chain - * is only 32-bit. + * Return true if this page is mapped into pagetables. */ static inline int page_mapped(struct page *page) { - return page->pte.direct != 0; + return page->mapcount != 0; } /* @@ -435,26 +444,34 @@ extern void show_free_areas(void); struct page *shmem_nopage(struct vm_area_struct * vma, unsigned long address, int *type); +int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new); +struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, + unsigned long addr); struct file *shmem_file_setup(char * name, loff_t size, unsigned long flags); void shmem_lock(struct file * file, int lock); int shmem_zero_setup(struct vm_area_struct *); +struct zap_details; void zap_page_range(struct vm_area_struct *vma, unsigned long address, - unsigned long size); + unsigned long size, struct zap_details *); int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, struct vm_area_struct *start_vma, unsigned long start_addr, - unsigned long end_addr, unsigned long *nr_accounted); -void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long address, unsigned long size); + unsigned long end_addr, unsigned long *nr_accounted, + struct zap_details *); void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); int zeromap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long size, pgprot_t prot); +void unmap_mapping_range(struct address_space *mapping, + loff_t const holebegin, loff_t const holelen, int even_cows); + +static inline void unmap_shared_mapping_range(struct address_space *mapping, + loff_t const holebegin, loff_t const holelen) +{ + unmap_mapping_range(mapping, holebegin, holelen, 0); +} -extern void invalidate_mmap_range(struct address_space *mapping, - loff_t const holebegin, - loff_t const holelen); extern int vmtruncate(struct inode * inode, loff_t offset); extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); @@ -526,9 +543,8 @@ extern void si_meminfo_node(struct sysin extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, struct rb_node **, struct rb_node *); -extern struct vm_area_struct *copy_vma(struct vm_area_struct *, +extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, unsigned long pgoff); -extern void vma_relink_file(struct vm_area_struct *, struct vm_area_struct *); extern void exit_mmap(struct mm_struct *); extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); @@ -621,6 +637,11 @@ static inline struct vm_area_struct * fi return vma; } +static inline unsigned long vma_pages(struct vm_area_struct *vma) +{ + return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; +} + extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); extern unsigned int nr_used_zone_pages(void); --- linux-2.6.6-rc1/include/linux/mmzone.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/mmzone.h 2004-04-18 22:25:50.533148272 -0700 @@ -52,6 +52,14 @@ struct per_cpu_pages { struct per_cpu_pageset { struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ +#ifdef CONFIG_NUMA + unsigned long numa_hit; /* allocated in intended node */ + unsigned long numa_miss; /* allocated in non intended node */ + unsigned long numa_foreign; /* was intended here, hit elsewhere */ + unsigned long interleave_hit; /* interleaver prefered this zone */ + unsigned long local_node; /* allocation from local node */ + unsigned long other_node; /* allocation from other node */ +#endif } ____cacheline_aligned_in_smp; #define ZONE_DMA 0 --- linux-2.6.6-rc1/include/linux/netdevice.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/netdevice.h 2004-04-18 22:25:24.981032784 -0700 @@ -42,13 +42,14 @@ struct divert_blk; struct vlan_group; struct ethtool_ops; - /* source back-compat hook */ + /* source back-compat hooks */ #define SET_ETHTOOL_OPS(netdev,ops) \ ( (netdev)->ethtool_ops = (ops) ) #define HAVE_ALLOC_NETDEV /* feature macro: alloc_xxxdev functions are available. */ -#define HAVE_FREE_NETDEV +#define HAVE_FREE_NETDEV /* free_netdev() */ +#define HAVE_NETDEV_PRIV /* netdev_priv() */ #define NET_XMIT_SUCCESS 0 #define NET_XMIT_DROP 1 /* skb dropped */ --- linux-2.6.6-rc1/include/linux/netfilter.h 2003-07-02 14:53:18.000000000 -0700 +++ 25/include/linux/netfilter.h 2004-04-18 22:25:24.982032632 -0700 @@ -99,6 +99,24 @@ void nf_unregister_sockopt(struct nf_soc extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; +typedef void nf_logfn(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix); + +/* Function to register/unregister log function. */ +int nf_log_register(int pf, nf_logfn *logfn); +void nf_log_unregister(int pf, nf_logfn *logfn); + +/* Calls the registered backend logging function */ +void nf_log_packet(int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *fmt, ...); + /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). --- linux-2.6.6-rc1/include/linux/netfilter_ipv4.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/include/linux/netfilter_ipv4.h 2004-04-18 22:25:24.985032176 -0700 @@ -51,6 +51,8 @@ enum nf_ip_hook_priorities { NF_IP_PRI_FIRST = INT_MIN, + NF_IP_PRI_CONNTRACK_DEFRAG = -400, + NF_IP_PRI_RAW = -300, NF_IP_PRI_SELINUX_FIRST = -225, NF_IP_PRI_CONNTRACK = -200, NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175, --- linux-2.6.6-rc1/include/linux/netfilter_ipv4/ip_conntrack.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/linux/netfilter_ipv4/ip_conntrack.h 2004-04-18 22:25:24.983032480 -0700 @@ -252,6 +252,9 @@ extern void ip_ct_refresh(struct ip_conn /* Call me when a conntrack is destroyed. */ extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack); +/* Fake conntrack entry for untracked connections */ +extern struct ip_conntrack ip_conntrack_untracked; + /* Returns new sk_buff, or NULL */ struct sk_buff * ip_ct_gather_frags(struct sk_buff *skb); --- linux-2.6.6-rc1/include/linux/netfilter_ipv4/ip_conntrack_helper.h 2003-06-14 12:17:58.000000000 -0700 +++ 25/include/linux/netfilter_ipv4/ip_conntrack_helper.h 2004-04-18 22:25:24.983032480 -0700 @@ -35,9 +35,13 @@ extern void ip_conntrack_helper_unregist extern struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple); + +/* Allocate space for an expectation: this is mandatory before calling + ip_conntrack_expect_related. */ +extern struct ip_conntrack_expect *ip_conntrack_expect_alloc(void); /* Add an expected connection: can have more than one per connection */ -extern int ip_conntrack_expect_related(struct ip_conntrack *related_to, - struct ip_conntrack_expect *exp); +extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp, + struct ip_conntrack *related_to); extern int ip_conntrack_change_expect(struct ip_conntrack_expect *expect, struct ip_conntrack_tuple *newtuple); extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp); --- linux-2.6.6-rc1/include/linux/netfilter_ipv4/ip_conntrack_tftp.h 2003-06-14 12:18:04.000000000 -0700 +++ 25/include/linux/netfilter_ipv4/ip_conntrack_tftp.h 2004-04-18 22:25:24.983032480 -0700 @@ -9,5 +9,8 @@ struct tftphdr { #define TFTP_OPCODE_READ 1 #define TFTP_OPCODE_WRITE 2 +#define TFTP_OPCODE_DATA 3 +#define TFTP_OPCODE_ACK 4 +#define TFTP_OPCODE_ERROR 5 #endif /* _IP_CT_TFTP */ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/linux/netfilter_ipv4/ip_logging.h 2004-04-18 22:25:24.984032328 -0700 @@ -0,0 +1,20 @@ +/* IPv4 macros for the internal logging interface. */ +#ifndef __IP_LOGGING_H +#define __IP_LOGGING_H + +#ifdef __KERNEL__ +#include +#include + +#define nf_log_ip_packet(pskb,hooknum,in,out,fmt,args...) \ + nf_log_packet(AF_INET,pskb,hooknum,in,out,fmt,##args) + +#define nf_log_ip(pfh,len,fmt,args...) \ + nf_log(AF_INET,pfh,len,fmt,##args) + +#define nf_ip_log_register(logging) nf_log_register(AF_INET,logging) +#define nf_ip_log_unregister(logging) nf_log_unregister(AF_INET,logging) + +#endif /*__KERNEL__*/ + +#endif /*__IP_LOGGING_H*/ --- linux-2.6.6-rc1/include/linux/netfilter_ipv4/ipt_conntrack.h 2003-06-14 12:18:29.000000000 -0700 +++ 25/include/linux/netfilter_ipv4/ipt_conntrack.h 2004-04-18 22:25:24.984032328 -0700 @@ -10,6 +10,7 @@ #define IPT_CONNTRACK_STATE_SNAT (1 << (IP_CT_NUMBER + 1)) #define IPT_CONNTRACK_STATE_DNAT (1 << (IP_CT_NUMBER + 2)) +#define IPT_CONNTRACK_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 3)) /* flags, invflags: */ #define IPT_CONNTRACK_STATE 0x01 --- linux-2.6.6-rc1/include/linux/netfilter_ipv4/ipt_state.h 2003-06-14 12:18:31.000000000 -0700 +++ 25/include/linux/netfilter_ipv4/ipt_state.h 2004-04-18 22:25:24.985032176 -0700 @@ -4,6 +4,8 @@ #define IPT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1)) #define IPT_STATE_INVALID (1 << 0) +#define IPT_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 1)) + struct ipt_state_info { unsigned int statemask; --- linux-2.6.6-rc1/include/linux/netfilter_ipv4/ipt_ULOG.h 2003-06-14 12:18:08.000000000 -0700 +++ 25/include/linux/netfilter_ipv4/ipt_ULOG.h 2004-04-18 22:25:24.984032328 -0700 @@ -11,6 +11,9 @@ #define NETLINK_NFLOG 5 #endif +#define ULOG_DEFAULT_NLGROUP 1 +#define ULOG_DEFAULT_QTHRESHOLD 1 + #define ULOG_MAC_LEN 80 #define ULOG_PREFIX_LEN 32 --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/linux/netfilter_ipv6/ip6_logging.h 2004-04-18 22:25:24.985032176 -0700 @@ -0,0 +1,20 @@ +/* IPv6 macros for the nternal logging interface. */ +#ifndef __IP6_LOGGING_H +#define __IP6_LOGGING_H + +#ifdef __KERNEL__ +#include +#include + +#define nf_log_ip6_packet(pskb,hooknum,in,out,fmt,args...) \ + nf_log_packet(AF_INET6,pskb,hooknum,in,out,fmt,##args) + +#define nf_log_ip6(pfh,len,fmt,args...) \ + nf_log(AF_INET6,pfh,len,fmt,##args) + +#define nf_ip6_log_register(logging) nf_log_register(AF_INET6,logging) +#define nf_ip6_log_unregister(logging) nf_log_unregister(AF_INET6,logging) + +#endif /*__KERNEL__*/ + +#endif /*__IP6_LOGGING_H*/ --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/include/linux/netfilter_logging.h 2004-04-18 22:25:24.986032024 -0700 @@ -0,0 +1,33 @@ +/* Internal logging interface, which relies on the real + LOG target modules */ +#ifndef __LINUX_NETFILTER_LOGGING_H +#define __LINUX_NETFILTER_LOGGING_H + +#ifdef __KERNEL__ +#include + +struct nf_logging_t { + void (*nf_log_packet)(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const char *prefix); + void (*nf_log)(char *pfh, size_t len, + const char *prefix); +}; + +extern void nf_log_register(int pf, const struct nf_logging_t *logging); +extern void nf_log_unregister(int pf, const struct nf_logging_t *logging); + +extern void nf_log_packet(int pf, + struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const char *fmt, ...); +extern void nf_log(int pf, + char *pfh, size_t len, + const char *fmt, ...); +#endif /*__KERNEL__*/ + +#endif /*__LINUX_NETFILTER_LOGGING_H*/ --- linux-2.6.6-rc1/include/linux/netlink.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/netlink.h 2004-04-18 22:25:24.986032024 -0700 @@ -127,6 +127,13 @@ int netlink_attachskb(struct sock *sk, s void netlink_detachskb(struct sock *sk, struct sk_buff *skb); int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol); +/* finegrained unicast helpers: */ +struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid); +struct sock *netlink_getsockbyfilp(struct file *filp); +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long timeo); +void netlink_detachskb(struct sock *sk, struct sk_buff *skb); +int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol); + /* * skb should fit one page. This choice is good for headerless malloc. * --- linux-2.6.6-rc1/include/linux/nfsd/nfsd.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/nfsd/nfsd.h 2004-04-18 22:25:24.987031872 -0700 @@ -196,6 +196,9 @@ void nfsd_lockd_shutdown(void); #define nfserr_openmode __constant_htonl(NFSERR_OPENMODE) #define nfserr_locks_held __constant_htonl(NFSERR_LOCKS_HELD) #define nfserr_op_illegal __constant_htonl(NFSERR_OP_ILLEGAL) +#define nfserr_grace __constant_htonl(NFSERR_GRACE) +#define nfserr_no_grace __constant_htonl(NFSERR_NO_GRACE) +#define nfserr_reclaim_bad __constant_htonl(NFSERR_RECLAIM_BAD) /* error codes for internal use */ /* if a request fails due to kmalloc failure, it gets dropped. --- linux-2.6.6-rc1/include/linux/nfsd/state.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/linux/nfsd/state.h 2004-04-18 22:25:24.987031872 -0700 @@ -132,6 +132,9 @@ struct nfs4_replay { * release a stateowner. * so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when * close is called to reap associated byte-range locks +* so_close_lru: (open) stateowner is placed on this list instead of being +* reaped (when so_perfilestate is empty) to hold the last close replay. +* reaped by laundramat thread after lease period. */ struct nfs4_stateowner { struct list_head so_idhash; /* hash by so_id */ @@ -139,6 +142,8 @@ struct nfs4_stateowner { struct list_head so_perclient; /* nfs4_client->cl_perclient */ struct list_head so_perfilestate; /* list: nfs4_stateid */ struct list_head so_perlockowner; /* nfs4_stateid->st_perlockowner */ + struct list_head so_close_lru; /* tail queue */ + time_t so_time; /* time of placement on so_close_lru */ int so_is_open_owner; /* 1=openowner,0=lockowner */ u32 so_id; struct nfs4_client * so_client; @@ -194,6 +199,7 @@ struct nfs4_stateid { #define OPEN_STATE 0x00000004 #define LOCK_STATE 0x00000008 #define RDWR_STATE 0x00000010 +#define CLOSE_STATE 0x00000020 #define seqid_mutating_err(err) \ (((err) != nfserr_stale_clientid) && \ @@ -209,4 +215,6 @@ extern int nfs4_share_conflict(struct sv unsigned int deny_type); extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); +extern int nfs4_in_grace(void); +extern int nfs4_in_no_grace(void); #endif /* NFSD4_STATE_H */ --- linux-2.6.6-rc1/include/linux/nfs_fs.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/nfs_fs.h 2004-04-18 22:25:57.356111024 -0700 @@ -304,8 +304,12 @@ nfs_file_cred(struct file *file) /* * linux/fs/nfs/direct.c */ -extern int nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t, +extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t, unsigned long); +extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char *buf, + size_t count, loff_t pos); +extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char *buf, + size_t count, loff_t pos); /* * linux/fs/nfs/dir.c --- linux-2.6.6-rc1/include/linux/page-flags.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/page-flags.h 2004-04-18 22:25:32.958819976 -0700 @@ -71,12 +71,12 @@ #define PG_nosave 14 /* Used for system suspend/resume */ #define PG_maplock 15 /* Lock bit for rmap to ptes */ -#define PG_direct 16 /* ->pte_chain points directly at pte */ +#define PG_swapcache 16 /* Swap page: swp_entry_t in private */ #define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ #define PG_reclaim 18 /* To be reclaimed asap */ #define PG_compound 19 /* Part of a compound page */ -#define PG_anon 20 /* Anonymous page: anon_vma in mapping*/ -#define PG_swapcache 21 /* Swap page: swp_entry_t in private */ + +#define PG_anon 20 /* Anonymous page: anonmm in mapping */ /* @@ -281,12 +281,6 @@ extern void get_full_page_state(struct p #define ClearPageNosave(page) clear_bit(PG_nosave, &(page)->flags) #define TestClearPageNosave(page) test_and_clear_bit(PG_nosave, &(page)->flags) -#define PageDirect(page) test_bit(PG_direct, &(page)->flags) -#define SetPageDirect(page) set_bit(PG_direct, &(page)->flags) -#define TestSetPageDirect(page) test_and_set_bit(PG_direct, &(page)->flags) -#define ClearPageDirect(page) clear_bit(PG_direct, &(page)->flags) -#define TestClearPageDirect(page) test_and_clear_bit(PG_direct, &(page)->flags) - #define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags) #define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) #define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) --- linux-2.6.6-rc1/include/linux/pagemap.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/pagemap.h 2004-04-18 22:25:24.988031720 -0700 @@ -139,14 +139,12 @@ static inline unsigned long get_page_cac return atomic_read(&nr_pagecache); } -static inline void ___add_to_page_cache(struct page *page, - struct address_space *mapping, unsigned long index) +static inline pgoff_t linear_page_index(struct vm_area_struct *vma, + unsigned long address) { - page->mapping = mapping; - page->index = index; - - mapping->nrpages++; - pagecache_acct(1); + pgoff_t pgoff = (address - vma->vm_start) >> PAGE_SHIFT; + pgoff += vma->vm_pgoff; + return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT); } extern void FASTCALL(__lock_page(struct page *page)); --- linux-2.6.6-rc1/include/linux/pci.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/pci.h 2004-04-18 22:25:24.990031416 -0700 @@ -305,18 +305,89 @@ #define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ #define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ #define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ -#define PCI_X_DEVFN 4 /* A copy of devfn. */ -#define PCI_X_BUSNR 5 /* Bus segment number */ -#define PCI_X_STATUS 6 /* PCI-X capabilities */ -#define PCI_X_STATUS_64BIT 0x0001 /* 64-bit device */ -#define PCI_X_STATUS_133MHZ 0x0002 /* 133 MHz capable */ -#define PCI_X_STATUS_SPL_DISC 0x0004 /* Split Completion Discarded */ -#define PCI_X_STATUS_UNX_SPL 0x0008 /* Unexpected Split Completion */ -#define PCI_X_STATUS_COMPLEX 0x0010 /* Device Complexity */ -#define PCI_X_STATUS_MAX_READ 0x0060 /* Designed Maximum Memory Read Count */ -#define PCI_X_STATUS_MAX_SPLIT 0x0380 /* Design Max Outstanding Split Trans */ -#define PCI_X_STATUS_MAX_CUM 0x1c00 /* Designed Max Cumulative Read Size */ -#define PCI_X_STATUS_SPL_ERR 0x2000 /* Rcvd Split Completion Error Msg */ +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ +#define PCI_X_STATUS 4 /* PCI-X capabilities */ +#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ +#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ +#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */ +#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */ +#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */ +#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */ +#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */ +#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */ +#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */ +#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */ +#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */ +#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ +#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ + +/* Extended Capabilities (PCI-X 2.0 and Express) */ +#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) +#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf) +#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) + +#define PCI_EXT_CAP_ID_ERR 1 +#define PCI_EXT_CAP_ID_VC 2 +#define PCI_EXT_CAP_ID_DSN 3 +#define PCI_EXT_CAP_ID_PWR 4 + +/* Advanced Error Reporting */ +#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ +#define PCI_ERR_UNC_TRAIN 0x00000001 /* Training */ +#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ +#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ +#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */ +#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */ +#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */ +#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */ +#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */ +#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */ +#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */ +#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */ +#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ + /* Same bits as above */ +#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ +#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ +#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ +#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ +#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */ +#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */ +#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ +#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ +#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ +#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ +#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ +#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ +#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ +#define PCI_ERR_ROOT_STATUS 48 +#define PCI_ERR_ROOT_COR_SRC 52 +#define PCI_ERR_ROOT_SRC 54 + +/* Virtual Channel */ +#define PCI_VC_PORT_REG1 4 +#define PCI_VC_PORT_REG2 8 +#define PCI_VC_PORT_CTRL 12 +#define PCI_VC_PORT_STATUS 14 +#define PCI_VC_RES_CAP 16 +#define PCI_VC_RES_CTRL 20 +#define PCI_VC_RES_STATUS 26 + +/* Power Budgeting */ +#define PCI_PWR_DSR 4 /* Data Select Register */ +#define PCI_PWR_DATA 8 /* Data Register */ +#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ +#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ +#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ +#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ +#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ +#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ +#define PCI_PWR_CAP 12 /* Capability */ +#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ /* Include the ID list */ @@ -403,6 +474,8 @@ struct pci_dev { unsigned short vendor_compatible[DEVICE_COUNT_COMPATIBLE]; unsigned short device_compatible[DEVICE_COUNT_COMPATIBLE]; + int cfg_size; /* Size of configuration space */ + /* * Instead of touching interrupt line and base address registers * directly, use the values stored here. They might be different! @@ -602,6 +675,7 @@ struct pci_dev *pci_find_subsys (unsigne struct pci_dev *pci_find_class (unsigned int class, const struct pci_dev *from); struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn); int pci_find_capability (struct pci_dev *dev, int cap); +int pci_find_ext_capability (struct pci_dev *dev, int cap); struct pci_bus * pci_find_next_bus(const struct pci_bus *from); struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from); @@ -774,6 +848,7 @@ static inline int pci_assign_resource(st static inline int pci_register_driver(struct pci_driver *drv) { return 0;} static inline void pci_unregister_driver(struct pci_driver *drv) { } static inline int pci_find_capability (struct pci_dev *dev, int cap) {return 0; } +static inline int pci_find_ext_capability (struct pci_dev *dev, int cap) {return 0; } static inline const struct pci_device_id *pci_match_device(const struct pci_device_id *ids, const struct pci_dev *dev) { return NULL; } /* Power management related routines */ --- linux-2.6.6-rc1/include/linux/pci_ids.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/pci_ids.h 2004-04-18 22:25:28.364518416 -0700 @@ -1638,8 +1638,8 @@ #define PCI_SUBDEVICE_ID_CHASE_PCIRAS8 0xF010 #define PCI_VENDOR_ID_AUREAL 0x12eb -#define PCI_DEVICE_ID_AUREAL_VORTEX 0x0001 -#define PCI_DEVICE_ID_AUREAL_VORTEX2 0x0002 +#define PCI_DEVICE_ID_AUREAL_VORTEX_1 0x0001 +#define PCI_DEVICE_ID_AUREAL_VORTEX_2 0x0002 #define PCI_DEVICE_ID_AUREAL_ADVANTAGE 0x0003 #define PCI_VENDOR_ID_ELECTRONICDESIGNGMBH 0x12f8 @@ -2087,6 +2087,7 @@ #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 #define PCI_DEVICE_ID_INTEL_82855GM_IG 0x3582 +#define PCI_DEVICE_ID_INTEL_SMCH 0x3590 #define PCI_DEVICE_ID_INTEL_80310 0x530d #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 --- linux-2.6.6-rc1/include/linux/proc_fs.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/linux/proc_fs.h 2004-04-18 22:25:56.787197512 -0700 @@ -26,9 +26,6 @@ enum { /* Finally, the dynamically allocatable proc entries are reserved: */ -#define PROC_DYNAMIC_FIRST 4096 -#define PROC_NDYNAMIC 16384 - #define PROC_SUPER_MAGIC 0x9fa0 /* @@ -53,7 +50,7 @@ typedef int (write_proc_t)(struct file * typedef int (get_info_t)(char *, char **, off_t, int); struct proc_dir_entry { - unsigned short low_ino; + unsigned int low_ino; unsigned short namelen; const char *name; mode_t mode; @@ -102,7 +99,7 @@ extern void remove_proc_entry(const char extern struct vfsmount *proc_mnt; extern int proc_fill_super(struct super_block *,void *,int); -extern struct inode * proc_get_inode(struct super_block *, int, struct proc_dir_entry *); +extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *); extern int proc_match(int, const char *,struct proc_dir_entry *); --- linux-2.6.6-rc1/include/linux/quota.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/include/linux/quota.h 2004-04-18 22:25:24.993030960 -0700 @@ -138,6 +138,10 @@ struct if_dqinfo { #include #include +/* Maximal numbers of writes for quota operation (insert/delete/update) + * (over all formats) - info block, 4 pointer blocks, data block */ +#define DQUOT_MAX_WRITES 6 + /* * Data for one user/group kept in memory */ @@ -168,22 +172,21 @@ struct mem_dqinfo { } u; }; +struct super_block; + #define DQF_MASK 0xffff /* Mask for format specific flags */ #define DQF_INFO_DIRTY_B 16 #define DQF_ANY_DQUOT_DIRTY_B 17 #define DQF_INFO_DIRTY (1 << DQF_INFO_DIRTY_B) /* Is info dirty? */ #define DQF_ANY_DQUOT_DIRTY (1 << DQF_ANY_DQUOT_DIRTY_B) /* Is any dquot dirty? */ -extern inline void mark_info_dirty(struct mem_dqinfo *info) -{ - set_bit(DQF_INFO_DIRTY_B, &info->dqi_flags); -} - +extern void mark_info_dirty(struct super_block *sb, int type); #define info_dirty(info) test_bit(DQF_INFO_DIRTY_B, &(info)->dqi_flags) #define info_any_dquot_dirty(info) test_bit(DQF_ANY_DQUOT_DIRTY_B, &(info)->dqi_flags) #define info_any_dirty(info) (info_dirty(info) || info_any_dquot_dirty(info)) #define sb_dqopt(sb) (&(sb)->s_dquot) +#define sb_dqinfo(sb, type) (sb_dqopt(sb)->info+(type)) struct dqstats { int lookups; @@ -200,15 +203,13 @@ extern struct dqstats dqstats; #define NR_DQHASH 43 /* Just an arbitrary number */ -#define DQ_MOD_B 0 -#define DQ_BLKS_B 1 -#define DQ_INODES_B 2 -#define DQ_FAKE_B 3 - -#define DQ_MOD (1 << DQ_MOD_B) /* dquot modified since read */ -#define DQ_BLKS (1 << DQ_BLKS_B) /* uid/gid has been warned about blk limit */ -#define DQ_INODES (1 << DQ_INODES_B) /* uid/gid has been warned about inode limit */ -#define DQ_FAKE (1 << DQ_FAKE_B) /* no limits only usage */ +#define DQ_MOD_B 0 /* dquot modified since read */ +#define DQ_BLKS_B 1 /* uid/gid has been warned about blk limit */ +#define DQ_INODES_B 2 /* uid/gid has been warned about inode limit */ +#define DQ_FAKE_B 3 /* no limits only usage */ +#define DQ_READ_B 4 /* dquot was read into memory */ +#define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ +#define DQ_WAITFREE_B 6 /* dquot being waited (by invalidate_dquots) */ struct dquot { struct list_head dq_hash; /* Hash list in memory */ @@ -216,8 +217,7 @@ struct dquot { struct list_head dq_free; /* Free list element */ struct semaphore dq_lock; /* dquot IO lock */ atomic_t dq_count; /* Use count */ - - /* fields after this point are cleared when invalidating */ + wait_queue_head_t dq_wait_unused; /* Wait queue for dquot to become unused */ struct super_block *dq_sb; /* superblock this applies to */ unsigned int dq_id; /* ID this applies to (uid, gid) */ loff_t dq_off; /* Offset of dquot on disk */ @@ -238,19 +238,22 @@ struct quota_format_ops { int (*write_file_info)(struct super_block *sb, int type); /* Write main info about file */ int (*free_file_info)(struct super_block *sb, int type); /* Called on quotaoff() */ int (*read_dqblk)(struct dquot *dquot); /* Read structure for one user */ - int (*commit_dqblk)(struct dquot *dquot); /* Write (or delete) structure for one user */ + int (*commit_dqblk)(struct dquot *dquot); /* Write structure for one user */ + int (*release_dqblk)(struct dquot *dquot); /* Called when last reference to dquot is being dropped */ }; /* Operations working with dquots */ struct dquot_operations { - void (*initialize) (struct inode *, int); - void (*drop) (struct inode *); + int (*initialize) (struct inode *, int); + int (*drop) (struct inode *); int (*alloc_space) (struct inode *, qsize_t, int); int (*alloc_inode) (const struct inode *, unsigned long); - void (*free_space) (struct inode *, qsize_t); - void (*free_inode) (const struct inode *, unsigned long); + int (*free_space) (struct inode *, qsize_t); + int (*free_inode) (const struct inode *, unsigned long); int (*transfer) (struct inode *, struct iattr *); - int (*write_dquot) (struct dquot *); + int (*write_dquot) (struct dquot *); /* Ordinary dquot write */ + int (*mark_dirty) (struct dquot *); /* Dquot is marked dirty */ + int (*write_info) (struct super_block *, int); /* Write of quota "superblock" */ }; /* Operations handling requests from userspace */ @@ -289,10 +292,7 @@ struct quota_info { }; /* Inline would be better but we need to dereference super_block which is not defined yet */ -#define mark_dquot_dirty(dquot) do {\ - set_bit(DQF_ANY_DQUOT_DIRTY_B, &(sb_dqopt((dquot)->dq_sb)->info[(dquot)->dq_type].dqi_flags));\ - set_bit(DQ_MOD_B, &(dquot)->dq_flags);\ -} while (0) +int mark_dquot_dirty(struct dquot *dquot); #define dquot_dirty(dquot) test_bit(DQ_MOD_B, &(dquot)->dq_flags) @@ -304,7 +304,6 @@ struct quota_info { int register_quota_format(struct quota_format_type *fmt); void unregister_quota_format(struct quota_format_type *fmt); -void init_dquot_operations(struct dquot_operations *fsdqops); struct quota_module_name { int qm_fmt_id; --- linux-2.6.6-rc1/include/linux/quotaops.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/quotaops.h 2004-04-18 22:25:24.993030960 -0700 @@ -22,16 +22,31 @@ */ extern void sync_dquots(struct super_block *sb, int type); -extern void dquot_initialize(struct inode *inode, int type); -extern void dquot_drop(struct inode *inode); +extern int dquot_initialize(struct inode *inode, int type); +extern int dquot_drop(struct inode *inode); -extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); -extern int dquot_alloc_inode(const struct inode *inode, unsigned long number); +extern int dquot_alloc_space(struct inode *inode, qsize_t number, int prealloc); +extern int dquot_alloc_inode(const struct inode *inode, unsigned long number); -extern void dquot_free_space(struct inode *inode, qsize_t number); -extern void dquot_free_inode(const struct inode *inode, unsigned long number); +extern int dquot_free_space(struct inode *inode, qsize_t number); +extern int dquot_free_inode(const struct inode *inode, unsigned long number); -extern int dquot_transfer(struct inode *inode, struct iattr *iattr); +extern int dquot_transfer(struct inode *inode, struct iattr *iattr); +extern int dquot_commit(struct dquot *dquot); +extern int dquot_acquire(struct dquot *dquot); +extern int dquot_release(struct dquot *dquot); +extern int dquot_commit_info(struct super_block *sb, int type); +extern int dquot_mark_dquot_dirty(struct dquot *dquot); + +extern int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path); +extern int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry); +extern int vfs_quota_off(struct super_block *sb, int type); +#define vfs_quota_off_mount(sb, type) vfs_quota_off(sb, type) +extern int vfs_quota_sync(struct super_block *sb, int type); +extern int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); +extern int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); +extern int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); +extern int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di); /* * Operations supported for diskquotas. @@ -42,6 +57,8 @@ extern struct quotactl_ops vfs_quotactl_ #define sb_dquot_ops (&dquot_operations) #define sb_quotactl_ops (&vfs_quotactl_ops) +/* It is better to call this function outside of any transaction as it might + * need a lot of space in journal for dquot structure allocation. */ static __inline__ void DQUOT_INIT(struct inode *inode) { BUG_ON(!inode->i_sb); @@ -49,6 +66,7 @@ static __inline__ void DQUOT_INIT(struct inode->i_sb->dq_op->initialize(inode, -1); } +/* The same as with DQUOT_INIT */ static __inline__ void DQUOT_DROP(struct inode *inode) { if (IS_QUOTAINIT(inode)) { @@ -57,6 +75,8 @@ static __inline__ void DQUOT_DROP(struct } } +/* The following allocation/freeing/transfer functions *must* be called inside + * a transaction (deadlocks possible otherwise) */ static __inline__ int DQUOT_PREALLOC_SPACE_NODIRTY(struct inode *inode, qsize_t nr) { if (sb_any_quota_enabled(inode->i_sb)) { @@ -137,6 +157,7 @@ static __inline__ int DQUOT_TRANSFER(str return 0; } +/* The following two functions cannot be called inside a transaction */ #define DQUOT_SYNC(sb) sync_dquots(sb, -1) static __inline__ int DQUOT_OFF(struct super_block *sb) --- linux-2.6.6-rc1/include/linux/reiserfs_fs_sb.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/reiserfs_fs_sb.h 2004-04-18 22:25:24.994030808 -0700 @@ -208,6 +208,7 @@ struct reiserfs_journal { unsigned int s_journal_trans_max ; /* max number of blocks in a transaction. */ unsigned int s_journal_max_batch ; /* max number of blocks to batch into a trans */ unsigned int s_journal_max_commit_age ; /* in seconds, how old can an async commit be */ + unsigned int s_journal_default_max_commit_age ; /* the default for the max commit age */ unsigned int s_journal_max_trans_age ; /* in seconds, how old can a transaction be */ struct reiserfs_journal_cnode *j_cnode_free_list ; @@ -481,6 +482,7 @@ int reiserfs_resize(struct super_block * #define SB_JOURNAL_TRANS_MAX(s) (SB_JOURNAL(s)->s_journal_trans_max) #define SB_JOURNAL_MAX_BATCH(s) (SB_JOURNAL(s)->s_journal_max_batch) #define SB_JOURNAL_MAX_COMMIT_AGE(s) (SB_JOURNAL(s)->s_journal_max_commit_age) +#define SB_JOURNAL_DEFAULT_MAX_COMMIT_AGE(s) (SB_JOURNAL(s)->s_journal_default_max_commit_age) #define SB_JOURNAL_MAX_TRANS_AGE(s) (SB_JOURNAL(s)->s_journal_max_trans_age) /* A safe version of the "bdevname", which returns the "s_id" field of --- linux-2.6.6-rc1/include/linux/rmap.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/rmap.h 2004-04-18 22:25:33.304767384 -0700 @@ -15,19 +15,78 @@ #ifdef CONFIG_MMU -struct pte_chain; -struct pte_chain *pte_chain_alloc(int gfp_flags); -void __pte_chain_free(struct pte_chain *pte_chain); +void fastcall page_add_anon_rmap(struct page *, + struct mm_struct *, unsigned long addr); +void fastcall page_add_file_rmap(struct page *); +void fastcall page_remove_rmap(struct page *); + +/** + * page_dup_rmap - duplicate pte mapping to a page + * @page: the page to add the mapping to + * + * For copy_page_range only: minimal extract from page_add_rmap, + * avoiding unnecessary tests (already checked) so it's quicker. + */ +static inline void page_dup_rmap(struct page *page) +{ + rmap_lock(page); + page->mapcount++; + rmap_unlock(page); +} + +int fastcall mremap_move_anon_rmap(struct page *page, unsigned long addr); + +/** + * mremap_moved_anon_rmap - does new address clash with that noted? + * @page: the page just brought back in from swap + * @addr: the user virtual address at which it is mapped + * + * Returns boolean, true if addr clashes with address already in page. + * + * For do_swap_page and unuse_pte: anonmm rmap cannot find the page if + * it's at different addresses in different mms, so caller must take a + * copy of the page to avoid that: not very clever, but too rare a case + * to merit cleverness. + */ +static inline int mremap_moved_anon_rmap(struct page *page, unsigned long addr) +{ + return page->index != (addr & PAGE_MASK); +} -static inline void pte_chain_free(struct pte_chain *pte_chain) +/** + * make_page_exclusive - try to make page exclusive to one mm + * @vma the vm_area_struct covering this address + * @addr the user virtual address of the page in question + * + * Assumes that the page at this address is anonymous (COWable), + * and that the caller holds mmap_sem for reading or for writing. + * + * For mremap's move_page_tables and for swapoff's unuse_process: + * not a general purpose routine, and in general may not succeed. + * But move_page_tables loops until it succeeds, and unuse_process + * holds the original page locked, which protects against races. + */ +static inline int make_page_exclusive(struct vm_area_struct *vma, + unsigned long addr) { - if (pte_chain) - __pte_chain_free(pte_chain); + switch (handle_mm_fault(vma->vm_mm, vma, addr, 1)) { + case VM_FAULT_MINOR: + case VM_FAULT_MAJOR: + return 0; + case VM_FAULT_OOM: + return -ENOMEM; + default: + return -EFAULT; + } } -struct pte_chain * fastcall - page_add_rmap(struct page *, pte_t *, struct pte_chain *); -void fastcall page_remove_rmap(struct page *, pte_t *); +/* + * Called from kernel/fork.c to manage anonymous memory + */ +void init_rmap(void); +int exec_rmap(struct mm_struct *); +int dup_rmap(struct mm_struct *, struct mm_struct *oldmm); +void exit_rmap(struct mm_struct *); /* * Called from mm/vmscan.c to handle paging out @@ -37,6 +96,11 @@ int fastcall try_to_unmap(struct page *) #else /* !CONFIG_MMU */ +#define init_rmap() do {} while (0) +#define exec_rmap(mm) (0) +#define dup_rmap(mm, oldmm) (0) +#define exit_rmap(mm) do {} while (0) + #define page_referenced(page) TestClearPageReferenced(page) #define try_to_unmap(page) SWAP_FAIL --- linux-2.6.6-rc1/include/linux/rtnetlink.h 2004-02-03 20:42:39.000000000 -0800 +++ 25/include/linux/rtnetlink.h 2004-04-18 22:25:24.995030656 -0700 @@ -47,7 +47,11 @@ #define RTM_NEWPREFIX (RTM_BASE+36) #define RTM_GETPREFIX (RTM_BASE+38) -#define RTM_MAX (RTM_BASE+39) +#define RTM_GETMULTICAST (RTM_BASE+42) + +#define RTM_GETANYCAST (RTM_BASE+46) + +#define RTM_MAX (RTM_BASE+47) /* Generic structure for encapsulation of optional route information. @@ -340,7 +344,8 @@ enum IFA_LABEL, IFA_BROADCAST, IFA_ANYCAST, - IFA_CACHEINFO + IFA_CACHEINFO, + IFA_MULTICAST }; #define IFA_MAX IFA_CACHEINFO --- linux-2.6.6-rc1/include/linux/rwsem.h 2003-06-26 22:07:26.000000000 -0700 +++ 25/include/linux/rwsem.h 2004-04-18 22:25:56.366261504 -0700 @@ -22,9 +22,9 @@ struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK -#include /* use a generic implementation */ +#include /* use a generic implementation */ #else -#include /* use an arch-specific implementation */ +#include /* use an arch-specific implementation */ #endif #ifndef rwsemtrace @@ -41,9 +41,9 @@ extern void FASTCALL(rwsemtrace(struct r static inline void down_read(struct rw_semaphore *sem) { might_sleep(); - rwsemtrace(sem,"Entering down_read"); + rwsemtrace(sem, "Entering down_read"); __down_read(sem); - rwsemtrace(sem,"Leaving down_read"); + rwsemtrace(sem, "Leaving down_read"); } /* @@ -52,9 +52,9 @@ static inline void down_read(struct rw_s static inline int down_read_trylock(struct rw_semaphore *sem) { int ret; - rwsemtrace(sem,"Entering down_read_trylock"); + rwsemtrace(sem, "Entering down_read_trylock"); ret = __down_read_trylock(sem); - rwsemtrace(sem,"Leaving down_read_trylock"); + rwsemtrace(sem, "Leaving down_read_trylock"); return ret; } @@ -64,9 +64,9 @@ static inline int down_read_trylock(stru static inline void down_write(struct rw_semaphore *sem) { might_sleep(); - rwsemtrace(sem,"Entering down_write"); + rwsemtrace(sem, "Entering down_write"); __down_write(sem); - rwsemtrace(sem,"Leaving down_write"); + rwsemtrace(sem, "Leaving down_write"); } /* @@ -75,9 +75,9 @@ static inline void down_write(struct rw_ static inline int down_write_trylock(struct rw_semaphore *sem) { int ret; - rwsemtrace(sem,"Entering down_write_trylock"); + rwsemtrace(sem, "Entering down_write_trylock"); ret = __down_write_trylock(sem); - rwsemtrace(sem,"Leaving down_write_trylock"); + rwsemtrace(sem, "Leaving down_write_trylock"); return ret; } @@ -86,9 +86,9 @@ static inline int down_write_trylock(str */ static inline void up_read(struct rw_semaphore *sem) { - rwsemtrace(sem,"Entering up_read"); + rwsemtrace(sem, "Entering up_read"); __up_read(sem); - rwsemtrace(sem,"Leaving up_read"); + rwsemtrace(sem, "Leaving up_read"); } /* @@ -96,9 +96,9 @@ static inline void up_read(struct rw_sem */ static inline void up_write(struct rw_semaphore *sem) { - rwsemtrace(sem,"Entering up_write"); + rwsemtrace(sem, "Entering up_write"); __up_write(sem); - rwsemtrace(sem,"Leaving up_write"); + rwsemtrace(sem, "Leaving up_write"); } /* @@ -106,9 +106,9 @@ static inline void up_write(struct rw_se */ static inline void downgrade_write(struct rw_semaphore *sem) { - rwsemtrace(sem,"Entering downgrade_write"); + rwsemtrace(sem, "Entering downgrade_write"); __downgrade_write(sem); - rwsemtrace(sem,"Leaving downgrade_write"); + rwsemtrace(sem, "Leaving downgrade_write"); } #endif /* __KERNEL__ */ --- linux-2.6.6-rc1/include/linux/rwsem-spinlock.h 2003-06-14 12:18:23.000000000 -0700 +++ 25/include/linux/rwsem-spinlock.h 2004-04-18 22:25:56.366261504 -0700 @@ -26,14 +26,14 @@ struct rwsem_waiter; * - if activity is 0 then there are no active readers or writers * - if activity is +ve then that is the number of active readers * - if activity is -1 then there is one active writer - * - if wait_list is not empty, then there are processes waiting for the semaphore + * - if wait_list is not empty, there are processes waiting for the semaphore */ struct rw_semaphore { - __s32 activity; - spinlock_t wait_lock; - struct list_head wait_list; + __s32 activity; + spinlock_t wait_lock; + struct list_head wait_list; #if RWSEM_DEBUG - int debug; + int debug; #endif }; --- linux-2.6.6-rc1/include/linux/sched.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/sched.h 2004-04-18 22:25:54.261581464 -0700 @@ -29,6 +29,7 @@ #include #include #include +#include struct exec_domain; @@ -96,6 +97,14 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_iowait(void); +#ifdef CONFIG_SCHEDSTATS +#define schedstat_inc(s, field) ((s)->field++) +#define schedstat_add(s, field, amt) ((s)->field += amt) +#else +#define schedstat_inc(s, field) do { } while (0) +#define schedstat_add(d, field, amt) do { } while (0) +#endif + #include #include #include @@ -147,6 +156,7 @@ extern spinlock_t mmlist_lock; typedef struct task_struct task_t; extern void sched_init(void); +extern void sched_init_smp(void); extern void init_idle(task_t *idle, int cpu); extern void show_state(void); @@ -170,9 +180,11 @@ extern void update_one_process(struct ta unsigned long system, int cpu); extern void scheduler_tick(int user_tick, int system); extern unsigned long cache_decay_ticks; -extern const unsigned long scheduling_functions_start_here; -extern const unsigned long scheduling_functions_end_here; +/* Attach to any functions which should be ignored in wchan output. */ +#define __sched __attribute__((__section__(".sched.text"))) +/* Is this address in the __sched functions? */ +extern int in_sched_functions(unsigned long addr); #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); @@ -203,6 +215,7 @@ struct mm_struct { * together off init_mm.mmlist, and are protected * by mmlist_lock */ + struct anonmm *anonmm; /* For rmap to track anon mem */ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; @@ -343,7 +356,6 @@ struct k_itimer { struct sigqueue *sigq; /* signal queue entry. */ }; - struct io_context; /* See blkdev.h */ void exit_io_context(void); @@ -503,6 +515,9 @@ struct task_struct { unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ + + struct mempolicy *mempolicy; + short il_next; /* could be shared with used_math */ }; static inline pid_t process_group(struct task_struct *tsk) @@ -541,6 +556,147 @@ do { if (atomic_dec_and_test(&(tsk)->usa #define PF_SYNCWRITE 0x00200000 /* I am doing a sync write */ #ifdef CONFIG_SMP +#define SCHED_LOAD_SCALE 128UL /* increase resolution of load */ + +#define SD_BALANCE_NEWIDLE 1 /* Balance when about to become idle */ +#define SD_BALANCE_EXEC 2 /* Balance on exec */ +#define SD_BALANCE_CLONE 4 /* Balance on clone */ +#define SD_WAKE_IDLE 8 /* Wake to idle CPU on task wakeup */ +#define SD_WAKE_AFFINE 16 /* Wake task to waking CPU */ +#define SD_WAKE_BALANCE 32 /* Perform balancing at task wakeup */ +#define SD_SHARE_CPUPOWER 64 /* Domain members share cpu power */ + +struct sched_group { + struct sched_group *next; /* Must be a circular list */ + cpumask_t cpumask; + + /* + * CPU power of this group, SCHED_LOAD_SCALE being max power for a + * single CPU. This should be read only (except for setup). Although + * it will need to be written to at cpu hot(un)plug time, perhaps the + * cpucontrol semaphore will provide enough exclusion? + */ + unsigned long cpu_power; +}; + +struct sched_domain { + /* These fields must be setup */ + struct sched_domain *parent; /* top domain must be null terminated */ + struct sched_group *groups; /* the balancing groups of the domain */ + cpumask_t span; /* span of all CPUs in this domain */ + unsigned long min_interval; /* Minimum balance interval ms */ + unsigned long max_interval; /* Maximum balance interval ms */ + unsigned int busy_factor; /* less balancing by factor if busy */ + unsigned int imbalance_pct; /* No balance until over watermark */ + unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ + unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ + unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */ + int flags; /* See SD_* */ + + /* Runtime fields. */ + unsigned long last_balance; /* init to jiffies. units in jiffies */ + unsigned int balance_interval; /* initialise to 1. units in ms. */ + unsigned int nr_balance_failed; /* initialise to 0 */ + +#ifdef CONFIG_SCHEDSTATS + unsigned long lb_cnt[3]; + unsigned long lb_balanced[3]; + unsigned long lb_failed[3]; + unsigned long lb_pulled[3]; + unsigned long lb_hot_pulled[3]; + unsigned long lb_imbalance[3]; + + /* Active load balancing */ + unsigned long alb_cnt; + unsigned long alb_failed; + unsigned long alb_pushed; + + /* Wakeups */ + unsigned long sched_wake_remote; + + /* Passive load balancing */ + unsigned long plb_pulled; + + /* Affine wakeups */ + unsigned long afw_pulled; + + /* SD_BALANCE_EXEC balances */ + unsigned long sbe_pushed; + + /* SD_BALANCE_CLONE balances */ + unsigned long sbc_pushed; +#endif +}; + +/* Common values for SMT siblings */ +#define SD_SIBLING_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 2, \ + .busy_factor = 8, \ + .imbalance_pct = 110, \ + .cache_hot_time = 0, \ + .cache_nice_tries = 0, \ + .per_cpu_gain = 15, \ + .flags = SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_BALANCE_CLONE \ + | SD_WAKE_AFFINE \ + | SD_WAKE_IDLE \ + | SD_SHARE_CPUPOWER, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + +/* Common values for CPUs */ +#define SD_CPU_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_hot_time = (5*1000000/2), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_BALANCE_CLONE \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + +#ifdef CONFIG_NUMA +/* Common values for NUMA nodes */ +#define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 8, \ + .max_interval = 256*fls(num_online_cpus()),\ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000000), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_BALANCE_EXEC \ + | SD_BALANCE_CLONE \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} +#endif + +extern void cpu_attach_domain(struct sched_domain *sd, int cpu); + extern int set_cpus_allowed(task_t *p, cpumask_t new_mask); #else static inline int set_cpus_allowed(task_t *p, cpumask_t new_mask) @@ -551,12 +707,10 @@ static inline int set_cpus_allowed(task_ extern unsigned long long sched_clock(void); -#ifdef CONFIG_NUMA +#ifdef CONFIG_SMP extern void sched_balance_exec(void); -extern void node_nr_running_init(void); #else #define sched_balance_exec() {} -#define node_nr_running_init() {} #endif /* Move tasks off this (offline) CPU onto another. */ @@ -611,12 +765,17 @@ extern void do_timer(struct pt_regs *); extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state)); extern int FASTCALL(wake_up_process(struct task_struct * tsk)); +extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk)); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); + extern void FASTCALL(wake_up_forked_thread(struct task_struct * tsk)); #else static inline void kick_process(struct task_struct *tsk) { } + static inline void wake_up_forked_thread(struct task_struct * tsk) + { + return wake_up_forked_process(tsk); + } #endif -extern void FASTCALL(wake_up_forked_process(struct task_struct * tsk)); extern void FASTCALL(sched_fork(task_t * p)); extern void FASTCALL(sched_exit(task_t * p)); --- linux-2.6.6-rc1/include/linux/security.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/linux/security.h 2004-04-18 22:25:51.782958272 -0700 @@ -44,7 +44,7 @@ extern int cap_capget (struct task_struc extern int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_bprm_set_security (struct linux_binprm *bprm); -extern void cap_bprm_compute_creds (struct linux_binprm *bprm); +extern void cap_bprm_apply_creds (struct linux_binprm *bprm); extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_inode_setxattr(struct dentry *dentry, char *name, void *value, size_t size, int flags); extern int cap_inode_removexattr(struct dentry *dentry, char *name); @@ -102,7 +102,7 @@ struct swap_info_struct; * @bprm_free_security: * @bprm contains the linux_binprm structure to be modified. * Deallocate and clear the @bprm->security field. - * @bprm_compute_creds: + * @bprm_apply_creds: * Compute and set the security attributes of a process being transformed * by an execve operation based on the old attributes (current->security) * and the information saved in @bprm->security by the set_security hook. @@ -115,7 +115,7 @@ struct swap_info_struct; * @bprm contains the linux_binprm structure. * @bprm_set_security: * Save security information in the bprm->security field, typically based - * on information about the bprm->file, for later use by the compute_creds + * on information about the bprm->file, for later use by the apply_creds * hook. This hook may also optionally check permissions (e.g. for * transitions between security domains). * This hook may be called multiple times during a single execve, e.g. for @@ -924,7 +924,7 @@ struct swap_info_struct; * Check permission before allowing the @parent process to trace the * @child process. * Security modules may also want to perform a process tracing check - * during an execve in the set_security or compute_creds hooks of + * during an execve in the set_security or apply_creds hooks of * binprm_security_ops if the process is being traced and its security * attributes would be changed by the execve. * @parent contains the task_struct structure for parent process. @@ -1026,7 +1026,7 @@ struct security_operations { int (*bprm_alloc_security) (struct linux_binprm * bprm); void (*bprm_free_security) (struct linux_binprm * bprm); - void (*bprm_compute_creds) (struct linux_binprm * bprm); + void (*bprm_apply_creds) (struct linux_binprm * bprm); int (*bprm_set_security) (struct linux_binprm * bprm); int (*bprm_check_security) (struct linux_binprm * bprm); int (*bprm_secureexec) (struct linux_binprm * bprm); @@ -1290,9 +1290,9 @@ static inline void security_bprm_free (s { security_ops->bprm_free_security (bprm); } -static inline void security_bprm_compute_creds (struct linux_binprm *bprm) +static inline void security_bprm_apply_creds (struct linux_binprm *bprm) { - security_ops->bprm_compute_creds (bprm); + security_ops->bprm_apply_creds (bprm); } static inline int security_bprm_set (struct linux_binprm *bprm) { @@ -1962,9 +1962,9 @@ static inline int security_bprm_alloc (s static inline void security_bprm_free (struct linux_binprm *bprm) { } -static inline void security_bprm_compute_creds (struct linux_binprm *bprm) +static inline void security_bprm_apply_creds (struct linux_binprm *bprm) { - cap_bprm_compute_creds (bprm); + cap_bprm_apply_creds (bprm); } static inline int security_bprm_set (struct linux_binprm *bprm) --- linux-2.6.6-rc1/include/linux/serial_core.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/serial_core.h 2004-04-18 22:25:30.347217000 -0700 @@ -168,7 +168,9 @@ struct uart_port { unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ unsigned char iotype; /* io access style */ - +#ifdef CONFIG_KGDB + int kgdb; /* in use by kgdb */ +#endif #define UPIO_PORT (0) #define UPIO_HUB6 (1) #define UPIO_MEM (2) --- linux-2.6.6-rc1/include/linux/shmem_fs.h 2003-06-14 12:18:30.000000000 -0700 +++ 25/include/linux/shmem_fs.h 2004-04-18 22:25:50.166204056 -0700 @@ -2,6 +2,7 @@ #define __SHMEM_FS_H #include +#include /* inode in-kernel data */ @@ -15,6 +16,7 @@ struct shmem_inode_info { unsigned long alloced; /* data pages allocated to file */ unsigned long swapped; /* subtotal assigned to swap */ unsigned long flags; + struct shared_policy policy; struct list_head list; struct inode vfs_inode; }; --- linux-2.6.6-rc1/include/linux/signal.h 2004-02-03 20:42:39.000000000 -0800 +++ 25/include/linux/signal.h 2004-04-18 22:25:45.306942776 -0700 @@ -213,7 +213,7 @@ extern int sigprocmask(int, sigset_t *, #ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER struct pt_regs; -extern int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs, void *cookie); +extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); #endif #endif /* __KERNEL__ */ --- linux-2.6.6-rc1/include/linux/slab.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/slab.h 2004-04-18 22:25:32.695859952 -0700 @@ -44,6 +44,7 @@ typedef struct kmem_cache_s kmem_cache_t #define SLAB_STORE_USER 0x00010000UL /* store the last owner for bug hunting */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* track pages allocated to indicate what is reclaimable later*/ +#define SLAB_PANIC 0x00040000UL /* panic if kmem_cache_create() fails */ /* flags passed to a constructor func */ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */ --- linux-2.6.6-rc1/include/linux/spinlock.h 2003-11-09 16:45:05.000000000 -0800 +++ 25/include/linux/spinlock.h 2004-04-18 22:25:47.927544384 -0700 @@ -15,6 +15,12 @@ #include /* for cpu relax */ #include +#ifdef CONFIG_KGDB +#include +#define SET_WHO(x, him) (x)->who = him; +#else +#define SET_WHO(x, him) +#endif /* * Must define these before including other files, inline functions need them @@ -55,6 +61,9 @@ typedef struct { const char *module; char *owner; int oline; +#ifdef CONFIG_KGDB + struct task_struct *who; +#endif } spinlock_t; #define SPIN_LOCK_UNLOCKED (spinlock_t) { SPINLOCK_MAGIC, 0, 10, __FILE__ , NULL, 0} @@ -66,6 +75,7 @@ typedef struct { (x)->module = __FILE__; \ (x)->owner = NULL; \ (x)->oline = 0; \ + SET_WHO(x, NULL) \ } while (0) #define CHECK_LOCK(x) \ @@ -88,6 +98,7 @@ typedef struct { (x)->lock = 1; \ (x)->owner = __FILE__; \ (x)->oline = __LINE__; \ + SET_WHO(x, current) \ } while (0) /* without debugging, spin_is_locked on UP always says @@ -118,6 +129,7 @@ typedef struct { (x)->lock = 1; \ (x)->owner = __FILE__; \ (x)->oline = __LINE__; \ + SET_WHO(x, current) \ 1; \ }) @@ -184,6 +196,17 @@ typedef struct { #endif /* !SMP */ +#ifdef CONFIG_LOCKMETER +extern void _metered_spin_lock (spinlock_t *lock); +extern void _metered_spin_unlock (spinlock_t *lock); +extern int _metered_spin_trylock(spinlock_t *lock); +extern void _metered_read_lock (rwlock_t *lock); +extern void _metered_read_unlock (rwlock_t *lock); +extern void _metered_write_lock (rwlock_t *lock); +extern void _metered_write_unlock (rwlock_t *lock); +extern int _metered_write_trylock(rwlock_t *lock); +#endif + /* * Define the various spin_lock and rw_lock methods. Note we define these * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various @@ -389,6 +412,141 @@ do { \ _raw_spin_trylock(lock) ? 1 : \ ({preempt_enable(); local_bh_enable(); 0;});}) +#ifdef CONFIG_LOCKMETER +#undef spin_lock +#undef spin_trylock +#undef spin_unlock +#undef spin_lock_irqsave +#undef spin_lock_irq +#undef spin_lock_bh +#undef read_lock +#undef read_unlock +#undef write_lock +#undef write_unlock +#undef write_trylock +#undef spin_unlock_bh +#undef read_lock_irqsave +#undef read_lock_irq +#undef read_lock_bh +#undef read_unlock_bh +#undef write_lock_irqsave +#undef write_lock_irq +#undef write_lock_bh +#undef write_unlock_bh + +#define spin_lock(lock) \ +do { \ + preempt_disable(); \ + _metered_spin_lock(lock); \ +} while(0) + +#define spin_trylock(lock) ({preempt_disable(); _metered_spin_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) +#define spin_unlock(lock) \ +do { \ + _metered_spin_unlock(lock); \ + preempt_enable(); \ +} while (0) + +#define spin_lock_irqsave(lock, flags) \ +do { \ + local_irq_save(flags); \ + preempt_disable(); \ + _metered_spin_lock(lock); \ +} while (0) + +#define spin_lock_irq(lock) \ +do { \ + local_irq_disable(); \ + preempt_disable(); \ + _metered_spin_lock(lock); \ +} while (0) + +#define spin_lock_bh(lock) \ +do { \ + local_bh_disable(); \ + preempt_disable(); \ + _metered_spin_lock(lock); \ +} while (0) + +#define spin_unlock_bh(lock) \ +do { \ + _metered_spin_unlock(lock); \ + preempt_enable(); \ + local_bh_enable(); \ +} while (0) + + +#define read_lock(lock) ({preempt_disable(); _metered_read_lock(lock);}) +#define read_unlock(lock) ({_metered_read_unlock(lock); preempt_enable();}) +#define write_lock(lock) ({preempt_disable(); _metered_write_lock(lock);}) +#define write_unlock(lock) ({_metered_write_unlock(lock); preempt_enable();}) +#define write_trylock(lock) ({preempt_disable();_metered_write_trylock(lock) ? \ + 1 : ({preempt_enable(); 0;});}) +#define spin_unlock_no_resched(lock) \ +do { \ + _metered_spin_unlock(lock); \ + preempt_enable_no_resched(); \ +} while (0) + +#define read_lock_irqsave(lock, flags) \ +do { \ + local_irq_save(flags); \ + preempt_disable(); \ + _metered_read_lock(lock); \ +} while (0) + +#define read_lock_irq(lock) \ +do { \ + local_irq_disable(); \ + preempt_disable(); \ + _metered_read_lock(lock); \ +} while (0) + +#define read_lock_bh(lock) \ +do { \ + local_bh_disable(); \ + preempt_disable(); \ + _metered_read_lock(lock); \ +} while (0) + +#define read_unlock_bh(lock) \ +do { \ + _metered_read_unlock(lock); \ + preempt_enable(); \ + local_bh_enable(); \ +} while (0) + +#define write_lock_irqsave(lock, flags) \ +do { \ + local_irq_save(flags); \ + preempt_disable(); \ + _metered_write_lock(lock); \ +} while (0) + +#define write_lock_irq(lock) \ +do { \ + local_irq_disable(); \ + preempt_disable(); \ + _metered_write_lock(lock); \ +} while (0) + +#define write_lock_bh(lock) \ +do { \ + local_bh_disable(); \ + preempt_disable(); \ + _metered_write_lock(lock); \ +} while (0) + +#define write_unlock_bh(lock) \ +do { \ + _metered_write_unlock(lock); \ + preempt_enable(); \ + local_bh_enable(); \ +} while (0) + +#endif /* !CONFIG_LOCKMETER */ + /* "lock on reference count zero" */ #ifndef ATOMIC_DEC_AND_LOCK #include --- linux-2.6.6-rc1/include/linux/swap.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/swap.h 2004-04-18 22:25:50.735117568 -0700 @@ -151,7 +151,7 @@ struct swap_list_t { extern void out_of_memory(void); /* linux/mm/memory.c */ -extern void swapin_readahead(swp_entry_t); +extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; @@ -181,8 +181,6 @@ extern int vm_swappiness; extern int shmem_unuse(swp_entry_t entry, struct page *page); #endif /* CONFIG_MMU */ -extern void swap_unplug_io_fn(struct backing_dev_info *); - #ifdef CONFIG_SWAP /* linux/mm/page_io.c */ extern int swap_readpage(struct file *, struct page *); @@ -202,7 +200,8 @@ extern int move_from_swap_cache(struct p extern void free_page_and_swap_cache(struct page *); extern void free_pages_and_swap_cache(struct page **, int); extern struct page * lookup_swap_cache(swp_entry_t); -extern struct page * read_swap_cache_async(swp_entry_t); +extern struct page * read_swap_cache_async(swp_entry_t, struct vm_area_struct *vma, + unsigned long addr); /* linux/mm/swapfile.c */ extern int total_swap_pages; @@ -218,7 +217,7 @@ extern sector_t map_swap_page(struct swa extern struct swap_info_struct *get_swap_info_struct(unsigned); extern int can_share_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *); -struct backing_dev_info; +extern void swap_unplug_io_fn(struct page *); extern struct swap_list_t swap_list; extern spinlock_t swaplock; @@ -244,7 +243,7 @@ extern spinlock_t swaplock; #define free_swap_and_cache(swp) /*NOTHING*/ #define swap_duplicate(swp) /*NOTHING*/ #define swap_free(swp) /*NOTHING*/ -#define read_swap_cache_async(swp) NULL +#define read_swap_cache_async(swp,vma,addr) NULL #define lookup_swap_cache(swp) NULL #define valid_swaphandles(swp, off) 0 #define can_share_swap_page(p) 0 @@ -252,6 +251,7 @@ extern spinlock_t swaplock; #define move_from_swap_cache(p, i, m) 1 #define __delete_from_swap_cache(p) /*NOTHING*/ #define delete_from_swap_cache(p) /*NOTHING*/ +#define swap_unplug_io_fn(p) /*NOTHING*/ static inline int remove_exclusive_swap_page(struct page *p) { --- linux-2.6.6-rc1/include/linux/sysctl.h 2004-04-14 23:14:49.000000000 -0700 +++ 25/include/linux/sysctl.h 2004-04-18 22:25:24.996030504 -0700 @@ -131,6 +131,7 @@ enum KERN_PRINTK_RATELIMIT_BURST=61, /* int: tune printk ratelimiting */ KERN_PTY=62, /* dir: pty driver */ KERN_NGROUPS_MAX=63, /* int: NGROUPS_MAX */ + KERN_SPARC_SCONS_PWROFF=64, /* int: serial console power-off halt */ }; --- linux-2.6.6-rc1/include/linux/udp.h 2004-01-09 00:04:32.000000000 -0800 +++ 25/include/linux/udp.h 2004-04-18 22:25:24.997030352 -0700 @@ -31,6 +31,7 @@ struct udphdr { #define UDP_ENCAP 100 /* Set the socket to accept encapsulated packets */ /* UDP encapsulation types */ +#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ #define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-06 */ #ifdef __KERNEL__ --- linux-2.6.6-rc1/include/linux/wireless.h 2003-06-14 12:18:24.000000000 -0700 +++ 25/include/linux/wireless.h 2004-04-18 22:25:24.997030352 -0700 @@ -438,7 +438,7 @@ struct iw_param */ struct iw_point { - caddr_t pointer; /* Pointer to the data (in user space) */ + void __user *pointer; /* Pointer to the data (in user space) */ __u16 length; /* number of fields or size in bytes */ __u16 flags; /* Optional params */ }; --- linux-2.6.6-rc1/include/linux/workqueue.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/linux/workqueue.h 2004-04-18 22:25:27.574638496 -0700 @@ -49,7 +49,11 @@ struct work_struct { init_timer(&(_work)->timer); \ } while (0) -extern struct workqueue_struct *create_workqueue(const char *name); +extern struct workqueue_struct *__create_workqueue(const char *name, + int singlethread); +#define create_workqueue(name) __create_workqueue((name), 0) +#define create_singlethread_workqueue(name) __create_workqueue((name), 1) + extern void destroy_workqueue(struct workqueue_struct *wq); extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work)); --- linux-2.6.6-rc1/include/net/bluetooth/hci_core.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/include/net/bluetooth/hci_core.h 2004-04-18 22:25:24.998030200 -0700 @@ -515,9 +515,9 @@ struct hci_pinfo { #define HCI_SFLT_MAX_OGF 5 struct hci_sec_filter { - unsigned long type_mask; - unsigned long event_mask[2]; - unsigned long ocf_mask[HCI_SFLT_MAX_OGF + 1][4]; + __u32 type_mask; + __u32 event_mask[2]; + __u32 ocf_mask[HCI_SFLT_MAX_OGF + 1][4]; }; /* ----- HCI requests ----- */ --- linux-2.6.6-rc1/include/net/irda/ali-ircc.h 2004-03-10 20:41:31.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,228 +0,0 @@ -/********************************************************************* - * - * Filename: ali-ircc.h - * Version: 0.5 - * Description: Driver for the ALI M1535D and M1543C FIR Controller - * Status: Experimental. - * Author: Benjamin Kong - * Created at: 2000/10/16 03:46PM - * Modified at: 2001/1/3 02:56PM - * Modified by: Benjamin Kong - * - * Copyright (c) 2000 Benjamin Kong - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - ********************************************************************/ - -#ifndef ALI_IRCC_H -#define ALI_IRCC_H - -#include - -#include -#include -#include - -/* SIR Register */ -/* Usr definition of linux/serial_reg.h */ - -/* FIR Register */ -#define BANK0 0x20 -#define BANK1 0x21 -#define BANK2 0x22 -#define BANK3 0x23 - -#define FIR_MCR 0x07 /* Master Control Register */ - -/* Bank 0 */ -#define FIR_DR 0x00 /* Alias 0, FIR Data Register (R/W) */ -#define FIR_IER 0x01 /* Alias 1, FIR Interrupt Enable Register (R/W) */ -#define FIR_IIR 0x02 /* Alias 2, FIR Interrupt Identification Register (Read only) */ -#define FIR_LCR_A 0x03 /* Alias 3, FIR Line Control Register A (R/W) */ -#define FIR_LCR_B 0x04 /* Alias 4, FIR Line Control Register B (R/W) */ -#define FIR_LSR 0x05 /* Alias 5, FIR Line Status Register (R/W) */ -#define FIR_BSR 0x06 /* Alias 6, FIR Bus Status Register (Read only) */ - - - /* Alias 1 */ - #define IER_FIFO 0x10 /* FIR FIFO Interrupt Enable */ - #define IER_TIMER 0x20 /* Timer Interrupt Enable */ - #define IER_EOM 0x40 /* End of Message Interrupt Enable */ - #define IER_ACT 0x80 /* Active Frame Interrupt Enable */ - - /* Alias 2 */ - #define IIR_FIFO 0x10 /* FIR FIFO Interrupt */ - #define IIR_TIMER 0x20 /* Timer Interrupt */ - #define IIR_EOM 0x40 /* End of Message Interrupt */ - #define IIR_ACT 0x80 /* Active Frame Interrupt */ - - /* Alias 3 */ - #define LCR_A_FIFO_RESET 0x80 /* FIFO Reset */ - - /* Alias 4 */ - #define LCR_B_BW 0x10 /* Brick Wall */ - #define LCR_B_SIP 0x20 /* SIP Enable */ - #define LCR_B_TX_MODE 0x40 /* Transmit Mode */ - #define LCR_B_RX_MODE 0x80 /* Receive Mode */ - - /* Alias 5 */ - #define LSR_FIR_LSA 0x00 /* FIR Line Status Address */ - #define LSR_FRAME_ABORT 0x08 /* Frame Abort */ - #define LSR_CRC_ERROR 0x10 /* CRC Error */ - #define LSR_SIZE_ERROR 0x20 /* Size Error */ - #define LSR_FRAME_ERROR 0x40 /* Frame Error */ - #define LSR_FIFO_UR 0x80 /* FIFO Underrun */ - #define LSR_FIFO_OR 0x80 /* FIFO Overrun */ - - /* Alias 6 */ - #define BSR_FIFO_NOT_EMPTY 0x80 /* FIFO Not Empty */ - -/* Bank 1 */ -#define FIR_CR 0x00 /* Alias 0, FIR Configuration Register (R/W) */ -#define FIR_FIFO_TR 0x01 /* Alias 1, FIR FIFO Threshold Register (R/W) */ -#define FIR_DMA_TR 0x02 /* Alias 2, FIR DMA Threshold Register (R/W) */ -#define FIR_TIMER_IIR 0x03 /* Alias 3, FIR Timer interrupt interval register (W/O) */ -#define FIR_FIFO_FR 0x03 /* Alias 3, FIR FIFO Flag register (R/O) */ -#define FIR_FIFO_RAR 0x04 /* Alias 4, FIR FIFO Read Address register (R/O) */ -#define FIR_FIFO_WAR 0x05 /* Alias 5, FIR FIFO Write Address register (R/O) */ -#define FIR_TR 0x06 /* Alias 6, Test REgister (W/O) */ - - /* Alias 0 */ - #define CR_DMA_EN 0x01 /* DMA Enable */ - #define CR_DMA_BURST 0x02 /* DMA Burst Mode */ - #define CR_TIMER_EN 0x08 /* Timer Enable */ - - /* Alias 3 */ - #define TIMER_IIR_500 0x00 /* 500 us */ - #define TIMER_IIR_1ms 0x01 /* 1 ms */ - #define TIMER_IIR_2ms 0x02 /* 2 ms */ - #define TIMER_IIR_4ms 0x03 /* 4 ms */ - -/* Bank 2 */ -#define FIR_IRDA_CR 0x00 /* Alias 0, IrDA Control Register (R/W) */ -#define FIR_BOF_CR 0x01 /* Alias 1, BOF Count Register (R/W) */ -#define FIR_BW_CR 0x02 /* Alias 2, Brick Wall Count Register (R/W) */ -#define FIR_TX_DSR_HI 0x03 /* Alias 3, TX Data Size Register (high) (R/W) */ -#define FIR_TX_DSR_LO 0x04 /* Alias 4, TX Data Size Register (low) (R/W) */ -#define FIR_RX_DSR_HI 0x05 /* Alias 5, RX Data Size Register (high) (R/W) */ -#define FIR_RX_DSR_LO 0x06 /* Alias 6, RX Data Size Register (low) (R/W) */ - - /* Alias 0 */ - #define IRDA_CR_HDLC1152 0x80 /* 1.152Mbps HDLC Select */ - #define IRDA_CR_CRC 0X40 /* CRC Select. */ - #define IRDA_CR_HDLC 0x20 /* HDLC select. */ - #define IRDA_CR_HP_MODE 0x10 /* HP mode (read only) */ - #define IRDA_CR_SD_ST 0x08 /* SD/MODE State. */ - #define IRDA_CR_FIR_SIN 0x04 /* FIR SIN Select. */ - #define IRDA_CR_ITTX_0 0x02 /* SOUT State. IRTX force to 0 */ - #define IRDA_CR_ITTX_1 0x03 /* SOUT State. IRTX force to 1 */ - -/* Bank 3 */ -#define FIR_ID_VR 0x00 /* Alias 0, FIR ID Version Register (R/O) */ -#define FIR_MODULE_CR 0x01 /* Alias 1, FIR Module Control Register (R/W) */ -#define FIR_IO_BASE_HI 0x02 /* Alias 2, FIR Higher I/O Base Address Register (R/O) */ -#define FIR_IO_BASE_LO 0x03 /* Alias 3, FIR Lower I/O Base Address Register (R/O) */ -#define FIR_IRQ_CR 0x04 /* Alias 4, FIR IRQ Channel Register (R/O) */ -#define FIR_DMA_CR 0x05 /* Alias 5, FIR DMA Channel Register (R/O) */ - -struct ali_chip { - char *name; - int cfg[2]; - unsigned char entr1; - unsigned char entr2; - unsigned char cid_index; - unsigned char cid_value; - int (*probe)(struct ali_chip *chip, chipio_t *info); - int (*init)(struct ali_chip *chip, chipio_t *info); -}; -typedef struct ali_chip ali_chip_t; - - -/* DMA modes needed */ -#define DMA_TX_MODE 0x08 /* Mem to I/O, ++, demand. */ -#define DMA_RX_MODE 0x04 /* I/O to mem, ++, demand. */ - -#define MAX_TX_WINDOW 7 -#define MAX_RX_WINDOW 7 - -#define TX_FIFO_Threshold 8 -#define RX_FIFO_Threshold 1 -#define TX_DMA_Threshold 1 -#define RX_DMA_Threshold 1 - -/* For storing entries in the status FIFO */ - -struct st_fifo_entry { - int status; - int len; -}; - -struct st_fifo { - struct st_fifo_entry entries[MAX_RX_WINDOW]; - int pending_bytes; - int head; - int tail; - int len; -}; - -struct frame_cb { - void *start; /* Start of frame in DMA mem */ - int len; /* Lenght of frame in DMA mem */ -}; - -struct tx_fifo { - struct frame_cb queue[MAX_TX_WINDOW]; /* Info about frames in queue */ - int ptr; /* Currently being sent */ - int len; /* Lenght of queue */ - int free; /* Next free slot */ - void *tail; /* Next free start in DMA mem */ -}; - -/* Private data for each instance */ -struct ali_ircc_cb { - - struct st_fifo st_fifo; /* Info about received frames */ - struct tx_fifo tx_fifo; /* Info about frames to be transmitted */ - - struct net_device *netdev; /* Yes! we are some kind of netdevice */ - struct net_device_stats stats; - - struct irlap_cb *irlap; /* The link layer we are binded to */ - struct qos_info qos; /* QoS capabilities for this device */ - - chipio_t io; /* IrDA controller information */ - iobuff_t tx_buff; /* Transmit buffer */ - iobuff_t rx_buff; /* Receive buffer */ - - __u8 ier; /* Interrupt enable register */ - - __u8 InterruptID; /* Interrupt ID */ - __u8 BusStatus; /* Bus Status */ - __u8 LineStatus; /* Line Status */ - - unsigned char rcvFramesOverflow; - - struct timeval stamp; - struct timeval now; - - spinlock_t lock; /* For serializing operations */ - - __u32 new_speed; - int index; /* Instance index */ - - unsigned char fifo_opti_buf; - - struct pm_dev *dev; -}; - -static inline void switch_bank(int iobase, int bank) -{ - outb(bank, iobase+FIR_MCR); -} - -#endif /* ALI_IRCC_H */ --- linux-2.6.6-rc1/include/net/irda/au1000_ircc.h 2004-03-10 20:41:31.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,127 +0,0 @@ -/* - * - * BRIEF MODULE DESCRIPTION - * Au1000 IrDA driver. - * - * Copyright 2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef AU1000_IRCC_H -#define AU1000_IRCC_H - -#include - -#include -#include -#include - -#define NUM_IR_IFF 1 -#define NUM_IR_DESC 64 -#define RING_SIZE_4 0x0 -#define RING_SIZE_16 0x3 -#define RING_SIZE_64 0xF -#define MAX_NUM_IR_DESC 64 -#define MAX_BUF_SIZE 2048 - -#define BPS_115200 0 -#define BPS_57600 1 -#define BPS_38400 2 -#define BPS_19200 5 -#define BPS_9600 11 -#define BPS_2400 47 - -/* Ring descriptor flags */ -#define AU_OWN (1<<7) /* tx,rx */ - -#define IR_DIS_CRC (1<<6) /* tx */ -#define IR_BAD_CRC (1<<5) /* tx */ -#define IR_NEED_PULSE (1<<4) /* tx */ -#define IR_FORCE_UNDER (1<<3) /* tx */ -#define IR_DISABLE_TX (1<<2) /* tx */ -#define IR_HW_UNDER (1<<0) /* tx */ -#define IR_TX_ERROR (IR_DIS_CRC|IR_BAD_CRC|IR_HW_UNDER) - -#define IR_PHY_ERROR (1<<6) /* rx */ -#define IR_CRC_ERROR (1<<5) /* rx */ -#define IR_MAX_LEN (1<<4) /* rx */ -#define IR_FIFO_OVER (1<<3) /* rx */ -#define IR_SIR_ERROR (1<<2) /* rx */ -#define IR_RX_ERROR (IR_PHY_ERROR|IR_CRC_ERROR| \ - IR_MAX_LEN|IR_FIFO_OVER|IR_SIR_ERROR) - -typedef struct db_dest { - struct db_dest *pnext; - volatile u32 *vaddr; - dma_addr_t dma_addr; -} db_dest_t; - - -typedef struct ring_desc { - u8 count_0; /* 7:0 */ - u8 count_1; /* 12:8 */ - u8 reserved; - u8 flags; - u8 addr_0; /* 7:0 */ - u8 addr_1; /* 15:8 */ - u8 addr_2; /* 23:16 */ - u8 addr_3; /* 31:24 */ -} ring_dest_t; - - -/* Private data for each instance */ -struct au1k_private { - - db_dest_t *pDBfree; - db_dest_t db[2*NUM_IR_DESC]; - volatile ring_dest_t *rx_ring[NUM_IR_DESC]; - volatile ring_dest_t *tx_ring[NUM_IR_DESC]; - db_dest_t *rx_db_inuse[NUM_IR_DESC]; - db_dest_t *tx_db_inuse[NUM_IR_DESC]; - u32 rx_head; - u32 tx_head; - u32 tx_tail; - u32 tx_full; - - iobuff_t rx_buff; - - struct net_device *netdev; - struct net_device_stats stats; - - struct timeval stamp; - struct timeval now; - struct qos_info qos; - struct irlap_cb *irlap; - - u8 open; - u32 speed; - u32 newspeed; - - u32 intr_work_done; /* number of Rx and Tx pkts processed in the isr */ - struct timer_list timer; - - spinlock_t lock; /* For serializing operations */ - struct pm_dev *dev; -}; -#endif /* AU1000_IRCC_H */ --- linux-2.6.6-rc1/include/net/irda/irda-usb.h 2004-03-10 20:41:31.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,163 +0,0 @@ -/***************************************************************************** - * - * Filename: irda-usb.h - * Version: 0.9b - * Description: IrDA-USB Driver - * Status: Experimental - * Author: Dag Brattli - * - * Copyright (C) 2001, Roman Weissgaerber - * Copyright (C) 2000, Dag Brattli - * Copyright (C) 2001, Jean Tourrilhes - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - *****************************************************************************/ - -#include - -#include -#include /* struct irlap_cb */ - -#define RX_COPY_THRESHOLD 200 -#define IRDA_USB_MAX_MTU 2051 -#define IRDA_USB_SPEED_MTU 64 /* Weird, but work like this */ - -/* Maximum number of active URB on the Rx path - * This is the amount of buffers the we keep between the USB harware and the - * IrDA stack. - * - * Note : the network layer does also queue the packets between us and the - * IrDA stack, and is actually pretty fast and efficient in doing that. - * Therefore, we don't need to have a large number of URBs, and we can - * perfectly live happy with only one. We certainly don't need to keep the - * full IrTTP window around here... - * I repeat for those who have trouble to understand : 1 URB is plenty - * good enough to handle back-to-back (brickwalled) frames. I tried it, - * it works (it's the hardware that has trouble doing it). - * - * Having 2 URBs would allow the USB stack to process one URB while we take - * care of the other and then swap the URBs... - * On the other hand, increasing the number of URB will have penalities - * in term of latency and will interact with the link management in IrLAP... - * Jean II */ -#define IU_MAX_ACTIVE_RX_URBS 1 /* Don't touch !!! */ - -/* When a Rx URB is passed back to us, we can't reuse it immediately, - * because it may still be referenced by the USB layer. Therefore we - * need to keep one extra URB in the Rx path. - * Jean II */ -#define IU_MAX_RX_URBS (IU_MAX_ACTIVE_RX_URBS + 1) - -/* Various ugly stuff to try to workaround generic problems */ -/* Send speed command in case of timeout, just for trying to get things sane */ -#define IU_BUG_KICK_TIMEOUT -/* Show the USB class descriptor */ -#undef IU_DUMP_CLASS_DESC -/* Assume a minimum round trip latency for USB transfer (in us)... - * USB transfer are done in the next USB slot if there is no traffic - * (1/19 msec) and is done at 12 Mb/s : - * Waiting for slot + tx = (53us + 16us) * 2 = 137us minimum. - * Rx notification will only be done at the end of the USB frame period : - * OHCI : frame period = 1ms - * UHCI : frame period = 1ms, but notification can take 2 or 3 ms :-( - * EHCI : frame period = 125us */ -#define IU_USB_MIN_RTT 500 /* This should be safe in most cases */ - -/* Inbound header */ -#define MEDIA_BUSY 0x80 - -#define SPEED_2400 0x01 -#define SPEED_9600 0x02 -#define SPEED_19200 0x03 -#define SPEED_38400 0x04 -#define SPEED_57600 0x05 -#define SPEED_115200 0x06 -#define SPEED_576000 0x07 -#define SPEED_1152000 0x08 -#define SPEED_4000000 0x09 - -/* Basic capabilities */ -#define IUC_DEFAULT 0x00 /* Basic device compliant with 1.0 spec */ -/* Main bugs */ -#define IUC_SPEED_BUG 0x01 /* Device doesn't set speed after the frame */ -#define IUC_NO_WINDOW 0x02 /* Device doesn't behave with big Rx window */ -#define IUC_NO_TURN 0x04 /* Device doesn't do turnaround by itself */ -/* Not currently used */ -#define IUC_SIR_ONLY 0x08 /* Device doesn't behave at FIR speeds */ -#define IUC_SMALL_PKT 0x10 /* Device doesn't behave with big Rx packets */ -#define IUC_MAX_WINDOW 0x20 /* Device underestimate the Rx window */ -#define IUC_MAX_XBOFS 0x40 /* Device need more xbofs than advertised */ - -/* USB class definitions */ -#define USB_IRDA_HEADER 0x01 -#define USB_CLASS_IRDA 0x02 /* USB_CLASS_APP_SPEC subclass */ -#define USB_DT_IRDA 0x21 - -struct irda_class_desc { - __u8 bLength; - __u8 bDescriptorType; - __u16 bcdSpecRevision; - __u8 bmDataSize; - __u8 bmWindowSize; - __u8 bmMinTurnaroundTime; - __u16 wBaudRate; - __u8 bmAdditionalBOFs; - __u8 bIrdaRateSniff; - __u8 bMaxUnicastList; -} __attribute__ ((packed)); - -/* class specific interface request to get the IrDA-USB class descriptor - * (6.2.5, USB-IrDA class spec 1.0) */ - -#define IU_REQ_GET_CLASS_DESC 0x06 - -struct irda_usb_cb { - struct irda_class_desc *irda_desc; - struct usb_device *usbdev; /* init: probe_irda */ - struct usb_interface *usbintf; /* init: probe_irda */ - int netopen; /* Device is active for network */ - int present; /* Device is present on the bus */ - __u32 capability; /* Capability of the hardware */ - __u8 bulk_in_ep; /* Rx Endpoint assignments */ - __u8 bulk_out_ep; /* Tx Endpoint assignments */ - __u16 bulk_out_mtu; /* Max Tx packet size in bytes */ - __u8 bulk_int_ep; /* Interrupt Endpoint assignments */ - - wait_queue_head_t wait_q; /* for timeouts */ - - struct urb *rx_urb[IU_MAX_RX_URBS]; /* URBs used to receive data frames */ - struct urb *idle_rx_urb; /* Pointer to idle URB in Rx path */ - struct urb *tx_urb; /* URB used to send data frames */ - struct urb *speed_urb; /* URB used to send speed commands */ - - struct net_device *netdev; /* Yes! we are some kind of netdev. */ - struct net_device_stats stats; - struct irlap_cb *irlap; /* The link layer we are binded to */ - struct qos_info qos; - hashbin_t *tx_list; /* Queued transmit skb's */ - char *speed_buff; /* Buffer for speed changes */ - - struct timeval stamp; - struct timeval now; - - spinlock_t lock; /* For serializing operations */ - - __u16 xbofs; /* Current xbofs setting */ - __s16 new_xbofs; /* xbofs we need to set */ - __u32 speed; /* Current speed */ - __s32 new_speed; /* speed we need to set */ -}; - --- linux-2.6.6-rc1/include/net/irda/irlan_common.h 2003-08-22 19:23:42.000000000 -0700 +++ 25/include/net/irda/irlan_common.h 2004-04-18 22:25:25.002029592 -0700 @@ -219,7 +219,6 @@ int irlan_insert_array_param(struct sk_b __u16 value_len); int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len); -void print_ret_code(__u8 code); #endif --- linux-2.6.6-rc1/include/net/irda/irlan_eth.h 2003-08-22 19:23:42.000000000 -0700 +++ 25/include/net/irda/irlan_eth.h 2004-04-18 22:25:25.002029592 -0700 @@ -25,16 +25,9 @@ #ifndef IRLAN_ETH_H #define IRLAN_ETH_H -void irlan_eth_setup(struct net_device *dev); -int irlan_eth_open(struct net_device *dev); -int irlan_eth_close(struct net_device *dev); +struct net_device *alloc_irlandev(const char *name); int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb); -int irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); void irlan_eth_flow_indication( void *instance, void *sap, LOCAL_FLOW flow); void irlan_eth_send_gratuitous_arp(struct net_device *dev); - -void irlan_eth_set_multicast_list( struct net_device *dev); -struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); - #endif --- linux-2.6.6-rc1/include/net/irda/irlan_filter.h 2003-08-22 19:23:42.000000000 -0700 +++ 25/include/net/irda/irlan_filter.h 2004-04-18 22:25:25.002029592 -0700 @@ -27,7 +27,7 @@ void irlan_check_command_param(struct irlan_cb *self, char *param, char *value); -void handle_filter_request(struct irlan_cb *self, struct sk_buff *skb); +void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb); int irlan_print_filter(struct seq_file *seq, int filter_type); #endif /* IRLAN_FILTER_H */ --- linux-2.6.6-rc1/include/net/irda/irport.h 2003-06-14 12:18:24.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,90 +0,0 @@ -/********************************************************************* - * - * Filename: irport.h - * Version: 0.1 - * Description: Serial driver for IrDA - * Status: Experimental. - * Author: Dag Brattli - * Created at: Sun Aug 3 13:49:59 1997 - * Modified at: Fri Jan 14 10:21:10 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1997, 1998-2000 Dag Brattli - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef IRPORT_H -#define IRPORT_H - -#include -#include -#include -#include - -#include - -#define SPEED_DEFAULT 9600 -#define SPEED_MAX 115200 - -/* - * These are the supported serial types. - */ -#define PORT_UNKNOWN 0 -#define PORT_8250 1 -#define PORT_16450 2 -#define PORT_16550 3 -#define PORT_16550A 4 -#define PORT_CIRRUS 5 -#define PORT_16650 6 -#define PORT_MAX 6 - -#define FRAME_MAX_SIZE 2048 - -struct irport_cb { - struct net_device *netdev; /* Yes! we are some kind of netdevice */ - struct net_device_stats stats; - - struct irlap_cb *irlap; /* The link layer we are attached to */ - - chipio_t io; /* IrDA controller information */ - iobuff_t tx_buff; /* Transmit buffer */ - iobuff_t rx_buff; /* Receive buffer */ - - struct qos_info qos; /* QoS capabilities for this device */ - dongle_t *dongle; /* Dongle driver */ - - __u32 flags; /* Interface flags */ - __u32 new_speed; - int mode; - int index; /* Instance index */ - int transmitting; /* Are we transmitting ? */ - - spinlock_t lock; /* For serializing operations */ - - /* For piggyback drivers */ - void *priv; - void (*change_speed)(void *priv, __u32 speed); - int (*interrupt)(int irq, void *dev_id, struct pt_regs *regs); -}; - -struct irport_cb *irport_open(int i, unsigned int iobase, unsigned int irq); -int irport_close(struct irport_cb *self); -void irport_start(struct irport_cb *self); -void irport_stop(struct irport_cb *self); -void irport_change_speed(void *priv, __u32 speed); -irqreturn_t irport_interrupt(int irq, void *dev_id, struct pt_regs *regs); -int irport_hard_xmit(struct sk_buff *skb, struct net_device *dev); -int irport_net_open(struct net_device *dev); -int irport_net_close(struct net_device *dev); - -#endif /* IRPORT_H */ --- linux-2.6.6-rc1/include/net/irda/nsc-ircc.h 2004-03-10 20:41:31.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,277 +0,0 @@ -/********************************************************************* - * - * Filename: nsc-ircc.h - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli - * Created at: Fri Nov 13 14:37:40 1998 - * Modified at: Sun Jan 23 17:47:00 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli - * Copyright (c) 1998 Lichen Wang, - * Copyright (c) 1998 Actisys Corp., www.actisys.com - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef NSC_IRCC_H -#define NSC_IRCC_H - -#include - -#include -#include -#include - -/* DMA modes needed */ -#define DMA_TX_MODE 0x08 /* Mem to I/O, ++, demand. */ -#define DMA_RX_MODE 0x04 /* I/O to mem, ++, demand. */ - -/* Config registers for the '108 */ -#define CFG_108_BAIC 0x00 -#define CFG_108_CSRT 0x01 -#define CFG_108_MCTL 0x02 - -/* Config registers for the '338 */ -#define CFG_338_FER 0x00 -#define CFG_338_FAR 0x01 -#define CFG_338_PTR 0x02 -#define CFG_338_PNP0 0x1b -#define CFG_338_PNP1 0x1c -#define CFG_338_PNP3 0x4f - -/* Config registers for the '39x (in the logical device bank) */ -#define CFG_39X_LDN 0x07 /* Logical device number (Super I/O bank) */ -#define CFG_39X_SIOCF1 0x21 /* SuperI/O Config */ -#define CFG_39X_ACT 0x30 /* Device activation */ -#define CFG_39X_BASEH 0x60 /* Device base address (high bits) */ -#define CFG_39X_BASEL 0x61 /* Device base address (low bits) */ -#define CFG_39X_IRQNUM 0x70 /* Interrupt number & wake up enable */ -#define CFG_39X_IRQSEL 0x71 /* Interrupt select (edge/level + polarity) */ -#define CFG_39X_DMA0 0x74 /* DMA 0 configuration */ -#define CFG_39X_DMA1 0x75 /* DMA 1 configuration */ -#define CFG_39X_SPC 0xF0 /* Serial port configuration register */ - -/* Flags for configuration register CRF0 */ -#define APEDCRC 0x02 -#define ENBNKSEL 0x01 - -/* Set 0 */ -#define TXD 0x00 /* Transmit data port */ -#define RXD 0x00 /* Receive data port */ - -/* Register 1 */ -#define IER 0x01 /* Interrupt Enable Register*/ -#define IER_RXHDL_IE 0x01 /* Receiver high data level interrupt */ -#define IER_TXLDL_IE 0x02 /* Transeiver low data level interrupt */ -#define IER_LS_IE 0x04//* Link Status Interrupt */ -#define IER_ETXURI 0x04 /* Tx underrun */ -#define IER_DMA_IE 0x10 /* DMA finished interrupt */ -#define IER_TXEMP_IE 0x20 -#define IER_SFIF_IE 0x40 /* Frame status FIFO intr */ -#define IER_TMR_IE 0x80 /* Timer event */ - -#define FCR 0x02 /* (write only) */ -#define FCR_FIFO_EN 0x01 /* Enable FIFO's */ -#define FCR_RXSR 0x02 /* Rx FIFO soft reset */ -#define FCR_TXSR 0x04 /* Tx FIFO soft reset */ -#define FCR_RXTH 0x40 /* Rx FIFO threshold (set to 16) */ -#define FCR_TXTH 0x20 /* Tx FIFO threshold (set to 17) */ - -#define EIR 0x02 /* (read only) */ -#define EIR_RXHDL_EV 0x01 -#define EIR_TXLDL_EV 0x02 -#define EIR_LS_EV 0x04 -#define EIR_DMA_EV 0x10 -#define EIR_TXEMP_EV 0x20 -#define EIR_SFIF_EV 0x40 -#define EIR_TMR_EV 0x80 - -#define LCR 0x03 /* Link control register */ -#define LCR_WLS_8 0x03 /* 8 bits */ - -#define BSR 0x03 /* Bank select register */ -#define BSR_BKSE 0x80 -#define BANK0 LCR_WLS_8 /* Must make sure that we set 8N1 */ -#define BANK1 0x80 -#define BANK2 0xe0 -#define BANK3 0xe4 -#define BANK4 0xe8 -#define BANK5 0xec -#define BANK6 0xf0 -#define BANK7 0xf4 - -#define MCR 0x04 /* Mode Control Register */ -#define MCR_MODE_MASK ~(0xd0) -#define MCR_UART 0x00 -#define MCR_RESERVED 0x20 -#define MCR_SHARP_IR 0x40 -#define MCR_SIR 0x60 -#define MCR_MIR 0x80 -#define MCR_FIR 0xa0 -#define MCR_CEIR 0xb0 -#define MCR_IR_PLS 0x10 -#define MCR_DMA_EN 0x04 -#define MCR_EN_IRQ 0x08 -#define MCR_TX_DFR 0x08 - -#define LSR 0x05 /* Link status register */ -#define LSR_RXDA 0x01 /* Receiver data available */ -#define LSR_TXRDY 0x20 /* Transmitter ready */ -#define LSR_TXEMP 0x40 /* Transmitter empty */ - -#define ASCR 0x07 /* Auxillary Status and Control Register */ -#define ASCR_RXF_TOUT 0x01 /* Rx FIFO timeout */ -#define ASCR_FEND_INF 0x02 /* Frame end bytes in rx FIFO */ -#define ASCR_S_EOT 0x04 /* Set end of transmission */ -#define ASCT_RXBSY 0x20 /* Rx busy */ -#define ASCR_TXUR 0x40 /* Transeiver underrun */ -#define ASCR_CTE 0x80 /* Clear timer event */ - -/* Bank 2 */ -#define BGDL 0x00 /* Baud Generator Divisor Port (Low Byte) */ -#define BGDH 0x01 /* Baud Generator Divisor Port (High Byte) */ - -#define ECR1 0x02 /* Extended Control Register 1 */ -#define ECR1_EXT_SL 0x01 /* Extended Mode Select */ -#define ECR1_DMANF 0x02 /* DMA Fairness */ -#define ECR1_DMATH 0x04 /* DMA Threshold */ -#define ECR1_DMASWP 0x08 /* DMA Swap */ - -#define EXCR2 0x04 -#define EXCR2_TFSIZ 0x01 /* Rx FIFO size = 32 */ -#define EXCR2_RFSIZ 0x04 /* Tx FIFO size = 32 */ - -#define TXFLV 0x06 /* Tx FIFO level */ -#define RXFLV 0x07 /* Rx FIFO level */ - -/* Bank 3 */ -#define MID 0x00 - -/* Bank 4 */ -#define TMRL 0x00 /* Timer low byte */ -#define TMRH 0x01 /* Timer high byte */ -#define IRCR1 0x02 /* Infrared control register 1 */ -#define IRCR1_TMR_EN 0x01 /* Timer enable */ - -#define TFRLL 0x04 -#define TFRLH 0x05 -#define RFRLL 0x06 -#define RFRLH 0x07 - -/* Bank 5 */ -#define IRCR2 0x04 /* Infrared control register 2 */ -#define IRCR2_MDRS 0x04 /* MIR data rate select */ -#define IRCR2_FEND_MD 0x20 /* */ - -#define FRM_ST 0x05 /* Frame status FIFO */ -#define FRM_ST_VLD 0x80 /* Frame status FIFO data valid */ -#define FRM_ST_ERR_MSK 0x5f -#define FRM_ST_LOST_FR 0x40 /* Frame lost */ -#define FRM_ST_MAX_LEN 0x10 /* Max frame len exceeded */ -#define FRM_ST_PHY_ERR 0x08 /* Physical layer error */ -#define FRM_ST_BAD_CRC 0x04 -#define FRM_ST_OVR1 0x02 /* Rx FIFO overrun */ -#define FRM_ST_OVR2 0x01 /* Frame status FIFO overrun */ - -#define RFLFL 0x06 -#define RFLFH 0x07 - -/* Bank 6 */ -#define IR_CFG2 0x00 -#define IR_CFG2_DIS_CRC 0x02 - -/* Bank 7 */ -#define IRM_CR 0x07 /* Infrared module control register */ -#define IRM_CR_IRX_MSL 0x40 -#define IRM_CR_AF_MNT 0x80 /* Automatic format */ - -/* NSC chip information */ -struct nsc_chip { - char *name; /* Name of chipset */ - int cfg[3]; /* Config registers */ - u_int8_t cid_index; /* Chip identification index reg */ - u_int8_t cid_value; /* Chip identification expected value */ - u_int8_t cid_mask; /* Chip identification revision mask */ - - /* Functions for probing and initializing the specific chip */ - int (*probe)(struct nsc_chip *chip, chipio_t *info); - int (*init)(struct nsc_chip *chip, chipio_t *info); -}; -typedef struct nsc_chip nsc_chip_t; - -/* For storing entries in the status FIFO */ -struct st_fifo_entry { - int status; - int len; -}; - -#define MAX_TX_WINDOW 7 -#define MAX_RX_WINDOW 7 - -struct st_fifo { - struct st_fifo_entry entries[MAX_RX_WINDOW]; - int pending_bytes; - int head; - int tail; - int len; -}; - -struct frame_cb { - void *start; /* Start of frame in DMA mem */ - int len; /* Lenght of frame in DMA mem */ -}; - -struct tx_fifo { - struct frame_cb queue[MAX_TX_WINDOW]; /* Info about frames in queue */ - int ptr; /* Currently being sent */ - int len; /* Lenght of queue */ - int free; /* Next free slot */ - void *tail; /* Next free start in DMA mem */ -}; - -/* Private data for each instance */ -struct nsc_ircc_cb { - struct st_fifo st_fifo; /* Info about received frames */ - struct tx_fifo tx_fifo; /* Info about frames to be transmitted */ - - struct net_device *netdev; /* Yes! we are some kind of netdevice */ - struct net_device_stats stats; - - struct irlap_cb *irlap; /* The link layer we are binded to */ - struct qos_info qos; /* QoS capabilities for this device */ - - chipio_t io; /* IrDA controller information */ - iobuff_t tx_buff; /* Transmit buffer */ - iobuff_t rx_buff; /* Receive buffer */ - - __u8 ier; /* Interrupt enable register */ - - struct timeval stamp; - struct timeval now; - - spinlock_t lock; /* For serializing operations */ - - __u32 new_speed; - int index; /* Instance index */ - - struct pm_dev *dev; -}; - -static inline void switch_bank(int iobase, int bank) -{ - outb(bank, iobase+BSR); -} - -#endif /* NSC_IRCC_H */ --- linux-2.6.6-rc1/include/net/irda/smc-ircc.h 2004-03-10 20:41:31.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,180 +0,0 @@ -/********************************************************************* - * - * Filename: smc-ircc.h - * Version: 0.3 - * Description: Definitions for the SMC IrCC chipset - * Status: Experimental. - * Author: Thomas Davis (tadavis@jps.net) - * - * Copyright (c) 1999-2000, Dag Brattli - * Copyright (c) 1998-1999, Thomas Davis (tadavis@jps.net> - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - * - ********************************************************************/ - -#ifndef SMC_IRCC_H -#define SMC_IRCC_H - -#include -#include - -#include - -/* DMA modes needed */ -#define DMA_TX_MODE 0x08 /* Mem to I/O, ++, demand. */ -#define DMA_RX_MODE 0x04 /* I/O to mem, ++, demand. */ - -/* Master Control Register */ -#define IRCC_MASTER 0x07 -#define IRCC_MASTER_POWERDOWN 0x80 -#define IRCC_MASTER_RESET 0x40 -#define IRCC_MASTER_INT_EN 0x20 -#define IRCC_MASTER_ERROR_RESET 0x10 - -/* Register block 0 */ - -/* Interrupt Identification */ -#define IRCC_IIR 0x01 -#define IRCC_IIR_ACTIVE_FRAME 0x80 -#define IRCC_IIR_EOM 0x40 -#define IRCC_IIR_RAW_MODE 0x20 -#define IRCC_IIR_FIFO 0x10 - -/* Interrupt Enable */ -#define IRCC_IER 0x02 -#define IRCC_IER_ACTIVE_FRAME 0x80 -#define IRCC_IER_EOM 0x40 -#define IRCC_IER_RAW_MODE 0x20 -#define IRCC_IER_FIFO 0x10 - -/* Line Status Register */ -#define IRCC_LSR 0x03 -#define IRCC_LSR_UNDERRUN 0x80 -#define IRCC_LSR_OVERRUN 0x40 -#define IRCC_LSR_FRAME_ERROR 0x20 -#define IRCC_LSR_SIZE_ERROR 0x10 -#define IRCC_LSR_CRC_ERROR 0x80 -#define IRCC_LSR_FRAME_ABORT 0x40 - -/* Line Control Register A */ -#define IRCC_LCR_A 0x04 -#define IRCC_LCR_A_FIFO_RESET 0x80 -#define IRCC_LCR_A_FAST 0x40 -#define IRCC_LCR_A_GP_DATA 0x20 -#define IRCC_LCR_A_RAW_TX 0x10 -#define IRCC_LCR_A_RAW_RX 0x08 -#define IRCC_LCR_A_ABORT 0x04 -#define IRCC_LCR_A_DATA_DONE 0x02 - -/* Line Control Register B */ -#define IRCC_LCR_B 0x05 -#define IRCC_LCR_B_SCE_DISABLED 0x00 -#define IRCC_LCR_B_SCE_TRANSMIT 0x40 -#define IRCC_LCR_B_SCE_RECEIVE 0x80 -#define IRCC_LCR_B_SCE_UNDEFINED 0xc0 -#define IRCC_LCR_B_SIP_ENABLE 0x20 -#define IRCC_LCR_B_BRICK_WALL 0x10 - -/* Bus Status Register */ -#define IRCC_BSR 0x06 -#define IRCC_BSR_NOT_EMPTY 0x80 -#define IRCC_BSR_FIFO_FULL 0x40 -#define IRCC_BSR_TIMEOUT 0x20 - -/* Register block 1 */ - -#define IRCC_FIFO_THRESHOLD 0x02 - -#define IRCC_SCE_CFGA 0x00 -#define IRCC_CFGA_AUX_IR 0x80 -#define IRCC_CFGA_HALF_DUPLEX 0x04 -#define IRCC_CFGA_TX_POLARITY 0x02 -#define IRCC_CFGA_RX_POLARITY 0x01 - -#define IRCC_CFGA_COM 0x00 -#define IRCC_CFGA_IRDA_SIR_A 0x08 -#define IRCC_CFGA_ASK_SIR 0x10 -#define IRCC_CFGA_IRDA_SIR_B 0x18 -#define IRCC_CFGA_IRDA_HDLC 0x20 -#define IRCC_CFGA_IRDA_4PPM 0x28 -#define IRCC_CFGA_CONSUMER 0x30 -#define IRCC_CFGA_RAW_IR 0x38 -#define IRCC_CFGA_OTHER 0x40 - -#define IRCC_IR_HDLC 0x04 -#define IRCC_IR_4PPM 0x01 -#define IRCC_IR_CONSUMER 0x02 - -#define IRCC_SCE_CFGB 0x01 -#define IRCC_CFGB_LOOPBACK 0x20 -#define IRCC_CFGB_LPBCK_TX_CRC 0x10 -#define IRCC_CFGB_NOWAIT 0x08 -#define IRCC_CFGB_STRING_MOVE 0x04 -#define IRCC_CFGB_DMA_BURST 0x02 -#define IRCC_CFGB_DMA_ENABLE 0x01 - -#define IRCC_CFGB_MUX_COM 0x00 -#define IRCC_CFGB_MUX_IR 0x40 -#define IRCC_CFGB_MUX_AUX 0x80 -#define IRCC_CFGB_MUX_INACTIVE 0xc0 - -/* Register block 3 - Identification Registers! */ -#define IRCC_ID_HIGH 0x00 /* 0x10 */ -#define IRCC_ID_LOW 0x01 /* 0xB8 */ -#define IRCC_CHIP_ID 0x02 /* 0xF1 */ -#define IRCC_VERSION 0x03 /* 0x01 */ -#define IRCC_INTERFACE 0x04 /* low 4 = DMA, high 4 = IRQ */ - -/* Register block 4 - IrDA */ -#define IRCC_CONTROL 0x00 -#define IRCC_BOF_COUNT_LO 0x01 /* Low byte */ -#define IRCC_BOF_COUNT_HI 0x00 /* High nibble (bit 0-3) */ -#define IRCC_BRICKWALL_CNT_LO 0x02 /* Low byte */ -#define IRCC_BRICKWALL_CNT_HI 0x03 /* High nibble (bit 4-7) */ -#define IRCC_TX_SIZE_LO 0x04 /* Low byte */ -#define IRCC_TX_SIZE_HI 0x03 /* High nibble (bit 0-3) */ -#define IRCC_RX_SIZE_HI 0x05 /* High nibble (bit 0-3) */ -#define IRCC_RX_SIZE_LO 0x06 /* Low byte */ - -#define IRCC_1152 0x80 -#define IRCC_CRC 0x40 - -/* Private data for each instance */ -struct ircc_cb { - struct net_device *netdev; /* Yes! we are some kind of netdevice */ - struct irlap_cb *irlap; /* The link layer we are binded to */ - - chipio_t *io; /* IrDA controller information */ - iobuff_t tx_buff; /* Transmit buffer */ - iobuff_t rx_buff; /* Receive buffer */ - - struct irport_cb *irport; - - /* Locking : half of our operations are done with irport, so we - * use the irport spinlock to make sure *everything* is properly - * synchronised - Jean II */ - - __u32 new_speed; - - int tx_buff_offsets[10]; /* Offsets between frames in tx_buff */ - int tx_len; /* Number of frames in tx_buff */ - - struct pm_dev *pmdev; -}; - -#endif /* SMC_IRCC_H */ --- linux-2.6.6-rc1/include/net/irda/toshoboe.h 2004-03-10 20:41:31.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,166 +0,0 @@ -/********************************************************************* - * - * Filename: toshoboe.h - * Version: 0.1 - * Description: Driver for the Toshiba OBOE (or type-O) - * FIR Chipset. - * Status: Experimental. - * Author: James McKenzie - * Created at: Sat May 8 12:35:27 1999 - * - * Copyright (c) 1999-2000 James McKenzie, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither James McKenzie nor Cambridge University admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - * Applicable Models : Libretto 100CT. and many more - * - ********************************************************************/ - -#ifndef TOSHOBOE_H -#define TOSHOBOE_H - -/* Registers */ -/*Receive and transmit task registers (read only) */ -#define OBOE_RCVT (0x00+(self->base)) -#define OBOE_XMTT (0x01+(self->base)) -#define OBOE_XMTT_OFFSET 0x40 - -/*Page pointers to the TaskFile structure */ -#define OBOE_TFP2 (0x02+(self->base)) -#define OBOE_TFP0 (0x04+(self->base)) -#define OBOE_TFP1 (0x05+(self->base)) - -/*Dunno */ -#define OBOE_REG_3 (0x03+(self->base)) - -/*Number of tasks to use in Xmit and Recv queues */ -#define OBOE_NTR (0x07+(self->base)) -#define OBOE_NTR_XMIT4 0x00 -#define OBOE_NTR_XMIT8 0x10 -#define OBOE_NTR_XMIT16 0x30 -#define OBOE_NTR_XMIT32 0x70 -#define OBOE_NTR_XMIT64 0xf0 -#define OBOE_NTR_RECV4 0x00 -#define OBOE_NTR_RECV8 0x01 -#define OBOE_NTR_RECV6 0x03 -#define OBOE_NTR_RECV32 0x07 -#define OBOE_NTR_RECV64 0x0f - -/* Dunno */ -#define OBOE_REG_9 (0x09+(self->base)) - -/* Interrupt Status Register */ -#define OBOE_ISR (0x0c+(self->base)) -#define OBOE_ISR_TXDONE 0x80 -#define OBOE_ISR_RXDONE 0x40 -#define OBOE_ISR_20 0x20 -#define OBOE_ISR_10 0x10 -#define OBOE_ISR_8 0x08 /*This is collision or parity or something */ -#define OBOE_ISR_4 0x08 -#define OBOE_ISR_2 0x08 -#define OBOE_ISR_1 0x08 - -/*Dunno */ -#define OBOE_REG_D (0x0d+(self->base)) - -/*Register Lock Register */ -#define OBOE_LOCK ((self->base)+0x0e) - - - -/*Speed control registers */ -#define OBOE_PMDL (0x10+(self->base)) -#define OBOE_PMDL_SIR 0x18 -#define OBOE_PMDL_MIR 0xa0 -#define OBOE_PMDL_FIR 0x40 - -#define OBOE_SMDL (0x18+(self->base)) -#define OBOE_SMDL_SIR 0x20 -#define OBOE_SMDL_MIR 0x01 -#define OBOE_SMDL_FIR 0x0f - -#define OBOE_UDIV (0x19+(self->base)) - -/*Dunno */ -#define OBOE_REG_11 (0x11+(self->base)) - -/*Chip Reset Register */ -#define OBOE_RST (0x15+(self->base)) -#define OBOE_RST_WRAP 0x8 - -/*Dunno */ -#define OBOE_REG_1A (0x1a+(self->base)) -#define OBOE_REG_1B (0x1b+(self->base)) - -/* The PCI ID of the OBOE chip */ -#ifndef PCI_DEVICE_ID_FIR701 -#define PCI_DEVICE_ID_FIR701 0x0701 -#endif - -typedef unsigned int dword; -typedef unsigned short int word; -typedef unsigned char byte; -typedef dword Paddr; - -struct OboeTask - { - __u16 len; - __u8 unused; - __u8 control; - __u32 buffer; - }; - -#define OBOE_NTASKS 64 - -struct OboeTaskFile - { - struct OboeTask recv[OBOE_NTASKS]; - struct OboeTask xmit[OBOE_NTASKS]; - }; - -#define OBOE_TASK_BUF_LEN (sizeof(struct OboeTaskFile) << 1) - -/*These set the number of slots in use */ -#define TX_SLOTS 4 -#define RX_SLOTS 4 - -/* You need also to change this, toshiba uses 4,8 and 4,4 */ -/* It makes no difference if you are only going to use ONETASK mode */ -/* remember each buffer use XX_BUF_SZ more _PHYSICAL_ memory */ -#define OBOE_NTR_VAL (OBOE_NTR_XMIT4 | OBOE_NTR_RECV4) - -struct toshoboe_cb - { - struct net_device *netdev; /* Yes! we are some kind of netdevice */ - struct net_device_stats stats; - - struct irlap_cb *irlap; /* The link layer we are binded to */ - struct qos_info qos; /* QoS capabilities for this device */ - - chipio_t io; /* IrDA controller information */ - - __u32 new_speed; - - struct pci_dev *pdev; /*PCI device */ - int base; /*IO base */ - int txpending; /*how many tx's are pending */ - int txs, rxs; /*Which slots are we at */ - void *taskfilebuf; /*The unaligned taskfile buffer */ - struct OboeTaskFile *taskfile; /*The taskfile */ - void *xmit_bufs[TX_SLOTS]; /*The buffers */ - void *recv_bufs[RX_SLOTS]; - int open; - int stopped; /*Stopped by some or other APM stuff*/ - }; - - -#endif - - --- linux-2.6.6-rc1/include/net/irda/vlsi_ir.h 2004-04-03 20:39:14.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,799 +0,0 @@ - -/********************************************************************* - * - * vlsi_ir.h: VLSI82C147 PCI IrDA controller driver for Linux - * - * Version: 0.5 - * - * Copyright (c) 2001-2003 Martin Diehl - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - * - ********************************************************************/ - -#ifndef IRDA_VLSI_FIR_H -#define IRDA_VLSI_FIR_H - -/* ================================================================ - * compatibility stuff - */ - -/* definitions not present in pci_ids.h */ - -#ifndef PCI_CLASS_WIRELESS_IRDA -#define PCI_CLASS_WIRELESS_IRDA 0x0d00 -#endif - -#ifndef PCI_CLASS_SUBCLASS_MASK -#define PCI_CLASS_SUBCLASS_MASK 0xffff -#endif - -/* in recent 2.5 interrupt handlers have non-void return value */ -#ifndef IRQ_RETVAL -typedef void irqreturn_t; -#define IRQ_NONE -#define IRQ_HANDLED -#define IRQ_RETVAL(x) -#endif - -/* some stuff need to check kernelversion. Not all 2.5 stuff was present - * in early 2.5.x - the test is merely to separate 2.4 from 2.5 - */ -#include - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) - -/* PDE() introduced in 2.5.4 */ -#ifdef CONFIG_PROC_FS -#define PDE(inode) ((inode)->u.generic_ip) -#endif - -/* irda crc16 calculation exported in 2.5.42 */ -#define irda_calc_crc16(fcs,buf,len) (GOOD_FCS) - -/* we use this for unified pci device name access */ -#define PCIDEV_NAME(pdev) ((pdev)->name) - -#else /* 2.5 or later */ - -/* recent 2.5/2.6 stores pci device names at varying places ;-) */ -#ifdef CONFIG_PCI_NAMES -/* human readable name */ -#define PCIDEV_NAME(pdev) ((pdev)->pretty_name) -#else -/* whatever we get from the associated struct device - bus:slot:dev.fn id */ -#define PCIDEV_NAME(pdev) (pci_name(pdev)) -#endif - -#endif - -/* ================================================================ */ - -/* non-standard PCI registers */ - -enum vlsi_pci_regs { - VLSI_PCI_CLKCTL = 0x40, /* chip clock input control */ - VLSI_PCI_MSTRPAGE = 0x41, /* addr [31:24] for all busmaster cycles */ - VLSI_PCI_IRMISC = 0x42 /* mainly legacy UART related */ -}; - -/* ------------------------------------------ */ - -/* VLSI_PCI_CLKCTL: Clock Control Register (u8, rw) */ - -/* Three possible clock sources: either on-chip 48MHz PLL or - * external clock applied to EXTCLK pin. External clock may - * be either 48MHz or 40MHz, which is indicated by XCKSEL. - * CLKSTP controls whether the selected clock source gets - * connected to the IrDA block. - * - * On my HP OB-800 the BIOS sets external 40MHz clock as source - * when IrDA enabled and I've never detected any PLL lock success. - * Apparently the 14.3...MHz OSC input required for the PLL to work - * is not connected and the 40MHz EXTCLK is provided externally. - * At least this is what makes the driver working for me. - */ - -enum vlsi_pci_clkctl { - - /* PLL control */ - - CLKCTL_PD_INV = 0x04, /* PD#: inverted power down signal, - * i.e. PLL is powered, if PD_INV set */ - CLKCTL_LOCK = 0x40, /* (ro) set, if PLL is locked */ - - /* clock source selection */ - - CLKCTL_EXTCLK = 0x20, /* set to select external clock input, not PLL */ - CLKCTL_XCKSEL = 0x10, /* set to indicate EXTCLK is 40MHz, not 48MHz */ - - /* IrDA block control */ - - CLKCTL_CLKSTP = 0x80, /* set to disconnect from selected clock source */ - CLKCTL_WAKE = 0x08 /* set to enable wakeup feature: whenever IR activity - * is detected, PD_INV gets set(?) and CLKSTP cleared */ -}; - -/* ------------------------------------------ */ - -/* VLSI_PCI_MSTRPAGE: Master Page Register (u8, rw) and busmastering stuff */ - -#define DMA_MASK_USED_BY_HW 0xffffffff -#define DMA_MASK_MSTRPAGE 0x00ffffff -#define MSTRPAGE_VALUE (DMA_MASK_MSTRPAGE >> 24) - - /* PCI busmastering is somewhat special for this guy - in short: - * - * We select to operate using fixed MSTRPAGE=0, use ISA DMA - * address restrictions to make the PCI BM api aware of this, - * but ensure the hardware is dealing with real 32bit access. - * - * In detail: - * The chip executes normal 32bit busmaster cycles, i.e. - * drives all 32 address lines. These addresses however are - * composed of [0:23] taken from various busaddr-pointers - * and [24:31] taken from the MSTRPAGE register in the VLSI82C147 - * config space. Therefore _all_ busmastering must be - * targeted to/from one single 16MB (busaddr-) superpage! - * The point is to make sure all the allocations for memory - * locations with busmaster access (ring descriptors, buffers) - * are indeed bus-mappable to the same 16MB range (for x86 this - * means they must reside in the same 16MB physical memory address - * range). The only constraint we have which supports "several objects - * mappable to common 16MB range" paradigma, is the old ISA DMA - * restriction to the first 16MB of physical address range. - * Hence the approach here is to enable PCI busmaster support using - * the correct 32bit dma-mask used by the chip. Afterwards the device's - * dma-mask gets restricted to 24bit, which must be honoured somehow by - * all allocations for memory areas to be exposed to the chip ... - * - * Note: - * Don't be surprised to get "Setting latency timer..." messages every - * time when PCI busmastering is enabled for the chip. - * The chip has its PCI latency timer RO fixed at 0 - which is not a - * problem here, because it is never requesting _burst_ transactions. - */ - -/* ------------------------------------------ */ - -/* VLSI_PCIIRMISC: IR Miscellaneous Register (u8, rw) */ - -/* legacy UART emulation - not used by this driver - would require: - * (see below for some register-value definitions) - * - * - IRMISC_UARTEN must be set to enable UART address decoding - * - IRMISC_UARTSEL configured - * - IRCFG_MASTER must be cleared - * - IRCFG_SIR must be set - * - IRENABLE_PHYANDCLOCK must be asserted 0->1 (and hence IRENABLE_SIR_ON) - */ - -enum vlsi_pci_irmisc { - - /* IR transceiver control */ - - IRMISC_IRRAIL = 0x40, /* (ro?) IR rail power indication (and control?) - * 0=3.3V / 1=5V. Probably set during power-on? - * unclear - not touched by driver */ - IRMISC_IRPD = 0x08, /* transceiver power down, if set */ - - /* legacy UART control */ - - IRMISC_UARTTST = 0x80, /* UART test mode - "always write 0" */ - IRMISC_UARTEN = 0x04, /* enable UART address decoding */ - - /* bits [1:0] IRMISC_UARTSEL to select legacy UART address */ - - IRMISC_UARTSEL_3f8 = 0x00, - IRMISC_UARTSEL_2f8 = 0x01, - IRMISC_UARTSEL_3e8 = 0x02, - IRMISC_UARTSEL_2e8 = 0x03 -}; - -/* ================================================================ */ - -/* registers mapped to 32 byte PCI IO space */ - -/* note: better access all registers at the indicated u8/u16 size - * although some of them contain only 1 byte of information. - * some of them (particaluarly PROMPT and IRCFG) ignore - * access when using the wrong addressing mode! - */ - -enum vlsi_pio_regs { - VLSI_PIO_IRINTR = 0x00, /* interrupt enable/request (u8, rw) */ - VLSI_PIO_RINGPTR = 0x02, /* rx/tx ring pointer (u16, ro) */ - VLSI_PIO_RINGBASE = 0x04, /* [23:10] of ring address (u16, rw) */ - VLSI_PIO_RINGSIZE = 0x06, /* rx/tx ring size (u16, rw) */ - VLSI_PIO_PROMPT = 0x08, /* triggers ring processing (u16, wo) */ - /* 0x0a-0x0f: reserved / duplicated UART regs */ - VLSI_PIO_IRCFG = 0x10, /* configuration select (u16, rw) */ - VLSI_PIO_SIRFLAG = 0x12, /* BOF/EOF for filtered SIR (u16, ro) */ - VLSI_PIO_IRENABLE = 0x14, /* enable and status register (u16, rw/ro) */ - VLSI_PIO_PHYCTL = 0x16, /* physical layer current status (u16, ro) */ - VLSI_PIO_NPHYCTL = 0x18, /* next physical layer select (u16, rw) */ - VLSI_PIO_MAXPKT = 0x1a, /* [11:0] max len for packet receive (u16, rw) */ - VLSI_PIO_RCVBCNT = 0x1c /* current receive-FIFO byte count (u16, ro) */ - /* 0x1e-0x1f: reserved / duplicated UART regs */ -}; - -/* ------------------------------------------ */ - -/* VLSI_PIO_IRINTR: Interrupt Register (u8, rw) */ - -/* enable-bits: - * 1 = enable / 0 = disable - * interrupt condition bits: - * set according to corresponding interrupt source - * (regardless of the state of the enable bits) - * enable bit status indicates whether interrupt gets raised - * write-to-clear - * note: RPKTINT and TPKTINT behave different in legacy UART mode (which we don't use :-) - */ - -enum vlsi_pio_irintr { - IRINTR_ACTEN = 0x80, /* activity interrupt enable */ - IRINTR_ACTIVITY = 0x40, /* activity monitor (traffic detected) */ - IRINTR_RPKTEN = 0x20, /* receive packet interrupt enable*/ - IRINTR_RPKTINT = 0x10, /* rx-packet transfered from fifo to memory finished */ - IRINTR_TPKTEN = 0x08, /* transmit packet interrupt enable */ - IRINTR_TPKTINT = 0x04, /* last bit of tx-packet+crc shifted to ir-pulser */ - IRINTR_OE_EN = 0x02, /* UART rx fifo overrun error interrupt enable */ - IRINTR_OE_INT = 0x01 /* UART rx fifo overrun error (read LSR to clear) */ -}; - -/* we use this mask to check whether the (shared PCI) interrupt is ours */ - -#define IRINTR_INT_MASK (IRINTR_ACTIVITY|IRINTR_RPKTINT|IRINTR_TPKTINT) - -/* ------------------------------------------ */ - -/* VLSI_PIO_RINGPTR: Ring Pointer Read-Back Register (u16, ro) */ - -/* _both_ ring pointers are indices relative to the _entire_ rx,tx-ring! - * i.e. the referenced descriptor is located - * at RINGBASE + PTR * sizeof(descr) for rx and tx - * therefore, the tx-pointer has offset MAX_RING_DESCR - */ - -#define MAX_RING_DESCR 64 /* tx, rx rings may contain up to 64 descr each */ - -#define RINGPTR_RX_MASK (MAX_RING_DESCR-1) -#define RINGPTR_TX_MASK ((MAX_RING_DESCR-1)<<8) - -#define RINGPTR_GET_RX(p) ((p)&RINGPTR_RX_MASK) -#define RINGPTR_GET_TX(p) (((p)&RINGPTR_TX_MASK)>>8) - -/* ------------------------------------------ */ - -/* VLSI_PIO_RINGBASE: Ring Pointer Base Address Register (u16, ro) */ - -/* Contains [23:10] part of the ring base (bus-) address - * which must be 1k-alinged. [31:24] is taken from - * VLSI_PCI_MSTRPAGE above. - * The controller initiates non-burst PCI BM cycles to - * fetch and update the descriptors in the ring. - * Once fetched, the descriptor remains cached onchip - * until it gets closed and updated due to the ring - * processing state machine. - * The entire ring area is split in rx and tx areas with each - * area consisting of 64 descriptors of 8 bytes each. - * The rx(tx) ring is located at ringbase+0 (ringbase+64*8). - */ - -#define BUS_TO_RINGBASE(p) (((p)>>10)&0x3fff) - -/* ------------------------------------------ */ - -/* VLSI_PIO_RINGSIZE: Ring Size Register (u16, rw) */ - -/* bit mask to indicate the ring size to be used for rx and tx. - * possible values encoded bits - * 4 0000 - * 8 0001 - * 16 0011 - * 32 0111 - * 64 1111 - * located at [15:12] for tx and [11:8] for rx ([7:0] unused) - * - * note: probably a good idea to have IRCFG_MSTR cleared when writing - * this so the state machines are stopped and the RINGPTR is reset! - */ - -#define SIZE_TO_BITS(num) ((((num)-1)>>2)&0x0f) -#define TX_RX_TO_RINGSIZE(tx,rx) ((SIZE_TO_BITS(tx)<<12)|(SIZE_TO_BITS(rx)<<8)) -#define RINGSIZE_TO_RXSIZE(rs) ((((rs)&0x0f00)>>6)+4) -#define RINGSIZE_TO_TXSIZE(rs) ((((rs)&0xf000)>>10)+4) - - -/* ------------------------------------------ */ - -/* VLSI_PIO_PROMPT: Ring Prompting Register (u16, write-to-start) */ - -/* writing any value kicks the ring processing state machines - * for both tx, rx rings as follows: - * - active rings (currently owning an active descriptor) - * ignore the prompt and continue - * - idle rings fetch the next descr from the ring and start - * their processing - */ - -/* ------------------------------------------ */ - -/* VLSI_PIO_IRCFG: IR Config Register (u16, rw) */ - -/* notes: - * - not more than one SIR/MIR/FIR bit must be set at any time - * - SIR, MIR, FIR and CRC16 select the configuration which will - * be applied on next 0->1 transition of IRENABLE_PHYANDCLOCK (see below). - * - besides allowing the PCI interface to execute busmaster cycles - * and therefore the ring SM to operate, the MSTR bit has side-effects: - * when MSTR is cleared, the RINGPTR's get reset and the legacy UART mode - * (in contrast to busmaster access mode) gets enabled. - * - clearing ENRX or setting ENTX while data is received may stall the - * receive fifo until ENRX reenabled _and_ another packet arrives - * - SIRFILT means the chip performs the required unwrapping of hardware - * headers (XBOF's, BOF/EOF) and un-escaping in the _receive_ direction. - * Only the resulting IrLAP payload is copied to the receive buffers - - * but with the 16bit FCS still encluded. Question remains, whether it - * was already checked or we should do it before passing the packet to IrLAP? - */ - -enum vlsi_pio_ircfg { - IRCFG_LOOP = 0x4000, /* enable loopback test mode */ - IRCFG_ENTX = 0x1000, /* transmit enable */ - IRCFG_ENRX = 0x0800, /* receive enable */ - IRCFG_MSTR = 0x0400, /* master enable */ - IRCFG_RXANY = 0x0200, /* receive any packet */ - IRCFG_CRC16 = 0x0080, /* 16bit (not 32bit) CRC select for MIR/FIR */ - IRCFG_FIR = 0x0040, /* FIR 4PPM encoding mode enable */ - IRCFG_MIR = 0x0020, /* MIR HDLC encoding mode enable */ - IRCFG_SIR = 0x0010, /* SIR encoding mode enable */ - IRCFG_SIRFILT = 0x0008, /* enable SIR decode filter (receiver unwrapping) */ - IRCFG_SIRTEST = 0x0004, /* allow SIR decode filter when not in SIR mode */ - IRCFG_TXPOL = 0x0002, /* invert tx polarity when set */ - IRCFG_RXPOL = 0x0001 /* invert rx polarity when set */ -}; - -/* ------------------------------------------ */ - -/* VLSI_PIO_SIRFLAG: SIR Flag Register (u16, ro) */ - -/* register contains hardcoded BOF=0xc0 at [7:0] and EOF=0xc1 at [15:8] - * which is used for unwrapping received frames in SIR decode-filter mode - */ - -/* ------------------------------------------ */ - -/* VLSI_PIO_IRENABLE: IR Enable Register (u16, rw/ro) */ - -/* notes: - * - IREN acts as gate for latching the configured IR mode information - * from IRCFG and IRPHYCTL when IREN=reset and applying them when - * IREN gets set afterwards. - * - ENTXST reflects IRCFG_ENTX - * - ENRXST = IRCFG_ENRX && (!IRCFG_ENTX || IRCFG_LOOP) - */ - -enum vlsi_pio_irenable { - IRENABLE_PHYANDCLOCK = 0x8000, /* enable IR phy and gate the mode config (rw) */ - IRENABLE_CFGER = 0x4000, /* mode configuration error (ro) */ - IRENABLE_FIR_ON = 0x2000, /* FIR on status (ro) */ - IRENABLE_MIR_ON = 0x1000, /* MIR on status (ro) */ - IRENABLE_SIR_ON = 0x0800, /* SIR on status (ro) */ - IRENABLE_ENTXST = 0x0400, /* transmit enable status (ro) */ - IRENABLE_ENRXST = 0x0200, /* Receive enable status (ro) */ - IRENABLE_CRC16_ON = 0x0100 /* 16bit (not 32bit) CRC enabled status (ro) */ -}; - -#define IRENABLE_MASK 0xff00 /* Read mask */ - -/* ------------------------------------------ */ - -/* VLSI_PIO_PHYCTL: IR Physical Layer Current Control Register (u16, ro) */ - -/* read-back of the currently applied physical layer status. - * applied from VLSI_PIO_NPHYCTL at rising edge of IRENABLE_PHYANDCLOCK - * contents identical to VLSI_PIO_NPHYCTL (see below) - */ - -/* ------------------------------------------ */ - -/* VLSI_PIO_NPHYCTL: IR Physical Layer Next Control Register (u16, rw) */ - -/* latched during IRENABLE_PHYANDCLOCK=0 and applied at 0-1 transition - * - * consists of BAUD[15:10], PLSWID[9:5] and PREAMB[4:0] bits defined as follows: - * - * SIR-mode: BAUD = (115.2kHz / baudrate) - 1 - * PLSWID = (pulsetime * freq / (BAUD+1)) - 1 - * where pulsetime is the requested IrPHY pulse width - * and freq is 8(16)MHz for 40(48)MHz primary input clock - * PREAMB: don't care for SIR - * - * The nominal SIR pulse width is 3/16 bit time so we have PLSWID=12 - * fixed for all SIR speeds at 40MHz input clock (PLSWID=24 at 48MHz). - * IrPHY also allows shorter pulses down to the nominal pulse duration - * at 115.2kbaud (minus some tolerance) which is 1.41 usec. - * Using the expression PLSWID = 12/(BAUD+1)-1 (multiplied by two for 48MHz) - * we get the minimum acceptable PLSWID values according to the VLSI - * specification, which provides 1.5 usec pulse width for all speeds (except - * for 2.4kbaud getting 6usec). This is fine with IrPHY v1.3 specs and - * reduces the transceiver power which drains the battery. At 9.6kbaud for - * example this amounts to more than 90% battery power saving! - * - * MIR-mode: BAUD = 0 - * PLSWID = 9(10) for 40(48) MHz input clock - * to get nominal MIR pulse width - * PREAMB = 1 - * - * FIR-mode: BAUD = 0 - * PLSWID: don't care - * PREAMB = 15 - */ - -#define PHYCTL_BAUD_SHIFT 10 -#define PHYCTL_BAUD_MASK 0xfc00 -#define PHYCTL_PLSWID_SHIFT 5 -#define PHYCTL_PLSWID_MASK 0x03e0 -#define PHYCTL_PREAMB_SHIFT 0 -#define PHYCTL_PREAMB_MASK 0x001f - -#define PHYCTL_TO_BAUD(bwp) (((bwp)&PHYCTL_BAUD_MASK)>>PHYCTL_BAUD_SHIFT) -#define PHYCTL_TO_PLSWID(bwp) (((bwp)&PHYCTL_PLSWID_MASK)>>PHYCTL_PLSWID_SHIFT) -#define PHYCTL_TO_PREAMB(bwp) (((bwp)&PHYCTL_PREAMB_MASK)>>PHYCTL_PREAMB_SHIFT) - -#define BWP_TO_PHYCTL(b,w,p) ((((b)<0) ? (tmp-1) : 0; -} - -#define PHYCTL_SIR(br,ws,cs) BWP_TO_PHYCTL(BAUD_BITS(br),calc_width_bits((br),(ws),(cs)),0) -#define PHYCTL_MIR(cs) BWP_TO_PHYCTL(0,((cs)?9:10),1) -#define PHYCTL_FIR BWP_TO_PHYCTL(0,0,15) - -/* quite ugly, I know. But implementing these calculations here avoids - * having magic numbers in the code and allows some playing with pulsewidths - * without risk to violate the standards. - * FWIW, here is the table for reference: - * - * baudrate BAUD min-PLSWID nom-PLSWID PREAMB - * 2400 47 0(0) 12(24) 0 - * 9600 11 0(0) 12(24) 0 - * 19200 5 1(2) 12(24) 0 - * 38400 2 3(6) 12(24) 0 - * 57600 1 5(10) 12(24) 0 - * 115200 0 11(22) 12(24) 0 - * MIR 0 - 9(10) 1 - * FIR 0 - 0 15 - * - * note: x(y) means x-value for 40MHz / y-value for 48MHz primary input clock - */ - -/* ------------------------------------------ */ - - -/* VLSI_PIO_MAXPKT: Maximum Packet Length register (u16, rw) */ - -/* maximum acceptable length for received packets */ - -/* hw imposed limitation - register uses only [11:0] */ -#define MAX_PACKET_LENGTH 0x0fff - -/* IrLAP I-field (apparently not defined elsewhere) */ -#define IRDA_MTU 2048 - -/* complete packet consists of A(1)+C(1)+I(<=IRDA_MTU) */ -#define IRLAP_SKB_ALLOCSIZE (1+1+IRDA_MTU) - -/* the buffers we use to exchange frames with the hardware need to be - * larger than IRLAP_SKB_ALLOCSIZE because we may have up to 4 bytes FCS - * appended and, in SIR mode, a lot of frame wrapping bytes. The worst - * case appears to be a SIR packet with I-size==IRDA_MTU and all bytes - * requiring to be escaped to provide transparency. Furthermore, the peer - * might ask for quite a number of additional XBOFs: - * up to 115+48 XBOFS 163 - * regular BOF 1 - * A-field 1 - * C-field 1 - * I-field, IRDA_MTU, all escaped 4096 - * FCS (16 bit at SIR, escaped) 4 - * EOF 1 - * AFAICS nothing in IrLAP guarantees A/C field not to need escaping - * (f.e. 0xc0/0xc1 - i.e. BOF/EOF - are legal values there) so in the - * worst case we have 4269 bytes total frame size. - * However, the VLSI uses 12 bits only for all buffer length values, - * which limits the maximum useable buffer size <= 4095. - * Note this is not a limitation in the receive case because we use - * the SIR filtering mode where the hw unwraps the frame and only the - * bare packet+fcs is stored into the buffer - in contrast to the SIR - * tx case where we have to pass frame-wrapped packets to the hw. - * If this would ever become an issue in real life, the only workaround - * I see would be using the legacy UART emulation in SIR mode. - */ - -#define XFER_BUF_SIZE MAX_PACKET_LENGTH - -/* ------------------------------------------ */ - -/* VLSI_PIO_RCVBCNT: Receive Byte Count Register (u16, ro) */ - -/* receive packet counter gets incremented on every non-filtered - * byte which was put in the receive fifo and reset for each - * new packet. Used to decide whether we are just in the middle - * of receiving - */ - -/* better apply the [11:0] mask when reading, as some docs say the - * reserved [15:12] would return 1 when reading - which is wrong AFAICS - */ -#define RCVBCNT_MASK 0x0fff - -/******************************************************************/ - -/* descriptors for rx/tx ring - * - * accessed by hardware - don't change! - * - * the descriptor is owned by hardware, when the ACTIVE status bit - * is set and nothing (besides reading status to test the bit) - * shall be done. The bit gets cleared by hw, when the descriptor - * gets closed. Premature reaping of descriptors owned be the chip - * can be achieved by disabling IRCFG_MSTR - * - * Attention: Writing addr overwrites status! - * - * ### FIXME: depends on endianess (but there ain't no non-i586 ob800 ;-) - */ - -struct ring_descr_hw { - volatile u16 rd_count; /* tx/rx count [11:0] */ - u16 reserved; - union { - u32 addr; /* [23:0] of the buffer's busaddress */ - struct { - u8 addr_res[3]; - volatile u8 status; /* descriptor status */ - } rd_s __attribute__((packed)); - } rd_u __attribute((packed)); -} __attribute__ ((packed)); - -#define rd_addr rd_u.addr -#define rd_status rd_u.rd_s.status - -/* ring descriptor status bits */ - -#define RD_ACTIVE 0x80 /* descriptor owned by hw (both TX,RX) */ - -/* TX ring descriptor status */ - -#define RD_TX_DISCRC 0x40 /* do not send CRC (for SIR) */ -#define RD_TX_BADCRC 0x20 /* force a bad CRC */ -#define RD_TX_PULSE 0x10 /* send indication pulse after this frame (MIR/FIR) */ -#define RD_TX_FRCEUND 0x08 /* force underrun */ -#define RD_TX_CLRENTX 0x04 /* clear ENTX after this frame */ -#define RD_TX_UNDRN 0x01 /* TX fifo underrun (probably PCI problem) */ - -/* RX ring descriptor status */ - -#define RD_RX_PHYERR 0x40 /* physical encoding error */ -#define RD_RX_CRCERR 0x20 /* CRC error (MIR/FIR) */ -#define RD_RX_LENGTH 0x10 /* frame exceeds buffer length */ -#define RD_RX_OVER 0x08 /* RX fifo overrun (probably PCI problem) */ -#define RD_RX_SIRBAD 0x04 /* EOF missing: BOF follows BOF (SIR, filtered) */ - -#define RD_RX_ERROR 0x7c /* any error in received frame */ - -/* the memory required to hold the 2 descriptor rings */ -#define HW_RING_AREA_SIZE (2 * MAX_RING_DESCR * sizeof(struct ring_descr_hw)) - -/******************************************************************/ - -/* sw-ring descriptors consists of a bus-mapped transfer buffer with - * associated skb and a pointer to the hw entry descriptor - */ - -struct ring_descr { - struct ring_descr_hw *hw; - struct sk_buff *skb; - void *buf; -}; - -/* wrappers for operations on hw-exposed ring descriptors - * access to the hw-part of the descriptors must use these. - */ - -static inline int rd_is_active(struct ring_descr *rd) -{ - return ((rd->hw->rd_status & RD_ACTIVE) != 0); -} - -static inline void rd_activate(struct ring_descr *rd) -{ - rd->hw->rd_status |= RD_ACTIVE; -} - -static inline void rd_set_status(struct ring_descr *rd, u8 s) -{ - rd->hw->rd_status = s; /* may pass ownership to the hardware */ -} - -static inline void rd_set_addr_status(struct ring_descr *rd, dma_addr_t a, u8 s) -{ - /* order is important for two reasons: - * - overlayed: writing addr overwrites status - * - we want to write status last so we have valid address in - * case status has RD_ACTIVE set - */ - - if ((a & ~DMA_MASK_MSTRPAGE)>>24 != MSTRPAGE_VALUE) { - ERROR("%s: pci busaddr inconsistency!\n", __FUNCTION__); - dump_stack(); - return; - } - - a &= DMA_MASK_MSTRPAGE; /* clear highbyte to make sure we won't write - * to status - just in case MSTRPAGE_VALUE!=0 - */ - rd->hw->rd_addr = cpu_to_le32(a); - wmb(); - rd_set_status(rd, s); /* may pass ownership to the hardware */ -} - -static inline void rd_set_count(struct ring_descr *rd, u16 c) -{ - rd->hw->rd_count = cpu_to_le16(c); -} - -static inline u8 rd_get_status(struct ring_descr *rd) -{ - return rd->hw->rd_status; -} - -static inline dma_addr_t rd_get_addr(struct ring_descr *rd) -{ - dma_addr_t a; - - a = le32_to_cpu(rd->hw->rd_addr); - return (a & DMA_MASK_MSTRPAGE) | (MSTRPAGE_VALUE << 24); -} - -static inline u16 rd_get_count(struct ring_descr *rd) -{ - return le16_to_cpu(rd->hw->rd_count); -} - -/******************************************************************/ - -/* sw descriptor rings for rx, tx: - * - * operations follow producer-consumer paradigm, with the hw - * in the middle doing the processing. - * ring size must be power of two. - * - * producer advances r->tail after inserting for processing - * consumer advances r->head after removing processed rd - * ring is empty if head==tail / full if (tail+1)==head - */ - -struct vlsi_ring { - struct pci_dev *pdev; - int dir; - unsigned len; - unsigned size; - unsigned mask; - atomic_t head, tail; - struct ring_descr *rd; -}; - -/* ring processing helpers */ - -static inline struct ring_descr *ring_last(struct vlsi_ring *r) -{ - int t; - - t = atomic_read(&r->tail) & r->mask; - return (((t+1) & r->mask) == (atomic_read(&r->head) & r->mask)) ? NULL : &r->rd[t]; -} - -static inline struct ring_descr *ring_put(struct vlsi_ring *r) -{ - atomic_inc(&r->tail); - return ring_last(r); -} - -static inline struct ring_descr *ring_first(struct vlsi_ring *r) -{ - int h; - - h = atomic_read(&r->head) & r->mask; - return (h == (atomic_read(&r->tail) & r->mask)) ? NULL : &r->rd[h]; -} - -static inline struct ring_descr *ring_get(struct vlsi_ring *r) -{ - atomic_inc(&r->head); - return ring_first(r); -} - -/******************************************************************/ - -/* our private compound VLSI-PCI-IRDA device information */ - -typedef struct vlsi_irda_dev { - struct pci_dev *pdev; - struct net_device_stats stats; - - struct irlap_cb *irlap; - - struct qos_info qos; - - unsigned mode; - int baud, new_baud; - - dma_addr_t busaddr; - void *virtaddr; - struct vlsi_ring *tx_ring, *rx_ring; - - struct timeval last_rx; - - spinlock_t lock; - struct semaphore sem; - - u32 cfg_space[64/sizeof(u32)]; - u8 resume_ok; - struct proc_dir_entry *proc_entry; - -} vlsi_irda_dev_t; - -/********************************************************/ - -/* the remapped error flags we use for returning from frame - * post-processing in vlsi_process_tx/rx() after it was completed - * by the hardware. These functions either return the >=0 number - * of transfered bytes in case of success or the negative (-) - * of the or'ed error flags. - */ - -#define VLSI_TX_DROP 0x0001 -#define VLSI_TX_FIFO 0x0002 - -#define VLSI_RX_DROP 0x0100 -#define VLSI_RX_OVER 0x0200 -#define VLSI_RX_LENGTH 0x0400 -#define VLSI_RX_FRAME 0x0800 -#define VLSI_RX_CRC 0x1000 - -/********************************************************/ - -#endif /* IRDA_VLSI_FIR_H */ - --- linux-2.6.6-rc1/include/net/irda/w83977af.h 2003-06-14 12:18:34.000000000 -0700 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,53 +0,0 @@ -#ifndef W83977AF_H -#define W83977AF_H - -#define W977_EFIO_BASE 0x370 -#define W977_EFIO2_BASE 0x3f0 -#define W977_DEVICE_IR 0x06 - - -/* - * Enter extended function mode - */ -static inline void w977_efm_enter(unsigned int efio) -{ - outb(0x87, efio); - outb(0x87, efio); -} - -/* - * Select a device to configure - */ - -static inline void w977_select_device(__u8 devnum, unsigned int efio) -{ - outb(0x07, efio); - outb(devnum, efio+1); -} - -/* - * Write a byte to a register - */ -static inline void w977_write_reg(__u8 reg, __u8 value, unsigned int efio) -{ - outb(reg, efio); - outb(value, efio+1); -} - -/* - * read a byte from a register - */ -static inline __u8 w977_read_reg(__u8 reg, unsigned int efio) -{ - outb(reg, efio); - return inb(efio+1); -} - -/* - * Exit extended function mode - */ -static inline void w977_efm_exit(unsigned int efio) -{ - outb(0xAA, efio); -} -#endif --- linux-2.6.6-rc1/include/net/irda/w83977af_ir.h 2004-03-10 20:41:31.000000000 -0800 +++ /dev/null 2003-09-15 06:40:47.000000000 -0700 @@ -1,196 +0,0 @@ -/********************************************************************* - * - * Filename: w83977af_ir.h - * Version: - * Description: - * Status: Experimental. - * Author: Paul VanderSpek - * Created at: Thu Nov 19 13:55:34 1998 - * Modified at: Tue Jan 11 13:08:19 2000 - * Modified by: Dag Brattli - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#ifndef W83977AF_IR_H -#define W83977AF_IR_H - -#include - -/* Flags for configuration register CRF0 */ -#define ENBNKSEL 0x01 -#define APEDCRC 0x02 -#define TXW4C 0x04 -#define RXW4C 0x08 - -/* Bank 0 */ -#define RBR 0x00 /* Receiver buffer register */ -#define TBR 0x00 /* Transmitter buffer register */ - -#define ICR 0x01 /* Interrupt configuration register */ -#define ICR_ERBRI 0x01 /* Receiver buffer register interrupt */ -#define ICR_ETBREI 0x02 /* Transeiver empty interrupt */ -#define ICR_EUSRI 0x04//* IR status interrupt */ -#define ICR_EHSRI 0x04 -#define ICR_ETXURI 0x04 /* Tx underrun */ -#define ICR_EDMAI 0x10 /* DMA interrupt */ -#define ICR_ETXTHI 0x20 /* Transmitter threshold interrupt */ -#define ICR_EFSFI 0x40 /* Frame status FIFO interrupt */ -#define ICR_ETMRI 0x80 /* Timer interrupt */ - -#define UFR 0x02 /* FIFO control register */ -#define UFR_EN_FIFO 0x01 /* Enable FIFO's */ -#define UFR_RXF_RST 0x02 /* Reset Rx FIFO */ -#define UFR_TXF_RST 0x04 /* Reset Tx FIFO */ -#define UFR_RXTL 0x80 /* Rx FIFO threshold (set to 16) */ -#define UFR_TXTL 0x20 /* Tx FIFO threshold (set to 17) */ - -#define ISR 0x02 /* Interrupt status register */ -#define ISR_RXTH_I 0x01 /* Receive threshold interrupt */ -#define ISR_TXEMP_I 0x02 /* Transmitter empty interrupt */ -#define ISR_FEND_I 0x04 -#define ISR_DMA_I 0x10 -#define ISR_TXTH_I 0x20 /* Transmitter threshold interrupt */ -#define ISR_FSF_I 0x40 -#define ISR_TMR_I 0x80 /* Timer interrupt */ - -#define UCR 0x03 /* Uart control register */ -#define UCR_DLS8 0x03 /* 8N1 */ - -#define SSR 0x03 /* Sets select register */ -#define SET0 UCR_DLS8 /* Make sure we keep 8N1 */ -#define SET1 (0x80|UCR_DLS8) /* Make sure we keep 8N1 */ -#define SET2 0xE0 -#define SET3 0xE4 -#define SET4 0xE8 -#define SET5 0xEC -#define SET6 0xF0 -#define SET7 0xF4 - -#define HCR 0x04 -#define HCR_MODE_MASK ~(0xD0) -#define HCR_SIR 0x60 -#define HCR_MIR_576 0x20 -#define HCR_MIR_1152 0x80 -#define HCR_FIR 0xA0 -#define HCR_EN_DMA 0x04 -#define HCR_EN_IRQ 0x08 -#define HCR_TX_WT 0x08 - -#define USR 0x05 /* IR status register */ -#define USR_RDR 0x01 /* Receive data ready */ -#define USR_TSRE 0x40 /* Transmitter empty? */ - -#define AUDR 0x07 -#define AUDR_SFEND 0x08 /* Set a frame end */ -#define AUDR_RXBSY 0x20 /* Rx busy */ -#define AUDR_UNDR 0x40 /* Transeiver underrun */ - -/* Set 2 */ -#define ABLL 0x00 /* Advanced baud rate divisor latch (low byte) */ -#define ABHL 0x01 /* Advanced baud rate divisor latch (high byte) */ - -#define ADCR1 0x02 -#define ADCR1_ADV_SL 0x01 -#define ADCR1_D_CHSW 0x08 /* the specs are wrong. its bit 3, not 4 */ -#define ADCR1_DMA_F 0x02 - -#define ADCR2 0x04 -#define ADCR2_TXFS32 0x01 -#define ADCR2_RXFS32 0x04 - -#define RXFDTH 0x07 - -/* Set 3 */ -#define AUID 0x00 - -/* Set 4 */ -#define TMRL 0x00 /* Timer value register (low byte) */ -#define TMRH 0x01 /* Timer value register (high byte) */ - -#define IR_MSL 0x02 /* Infrared mode select */ -#define IR_MSL_EN_TMR 0x01 /* Enable timer */ - -#define TFRLL 0x04 /* Transmitter frame length (low byte) */ -#define TFRLH 0x05 /* Transmitter frame length (high byte) */ -#define RFRLL 0x06 /* Receiver frame length (low byte) */ -#define RFRLH 0x07 /* Receiver frame length (high byte) */ - -/* Set 5 */ - -#define FS_FO 0x05 /* Frame status FIFO */ -#define FS_FO_FSFDR 0x80 /* Frame status FIFO data ready */ -#define FS_FO_LST_FR 0x40 /* Frame lost */ -#define FS_FO_MX_LEX 0x10 /* Max frame len exceeded */ -#define FS_FO_PHY_ERR 0x08 /* Physical layer error */ -#define FS_FO_CRC_ERR 0x04 -#define FS_FO_RX_OV 0x02 /* Receive overrun */ -#define FS_FO_FSF_OV 0x01 /* Frame status FIFO overrun */ -#define FS_FO_ERR_MSK 0x5f /* Error mask */ - -#define RFLFL 0x06 -#define RFLFH 0x07 - -/* Set 6 */ -#define IR_CFG2 0x00 -#define IR_CFG2_DIS_CRC 0x02 - -/* Set 7 */ -#define IRM_CR 0x07 /* Infrared module control register */ -#define IRM_CR_IRX_MSL 0x40 -#define IRM_CR_AF_MNT 0x80 /* Automatic format */ - -/* For storing entries in the status FIFO */ -struct st_fifo_entry { - int status; - int len; -}; - -struct st_fifo { - struct st_fifo_entry entries[10]; - int head; - int tail; - int len; -}; - -/* Private data for each instance */ -struct w83977af_ir { - struct st_fifo st_fifo; - - int tx_buff_offsets[10]; /* Offsets between frames in tx_buff */ - int tx_len; /* Number of frames in tx_buff */ - - struct net_device *netdev; /* Yes! we are some kind of netdevice */ - struct net_device_stats stats; - - struct irlap_cb *irlap; /* The link layer we are binded to */ - struct qos_info qos; /* QoS capabilities for this device */ - - chipio_t io; /* IrDA controller information */ - iobuff_t tx_buff; /* Transmit buffer */ - iobuff_t rx_buff; /* Receive buffer */ - - /* Note : currently locking is *very* incomplete, but this - * will get you started. Check in nsc-ircc.c for a proper - * locking strategy. - Jean II */ - spinlock_t lock; /* For serializing operations */ - - __u32 new_speed; -}; - -static inline void switch_bank( int iobase, int set) -{ - outb(set, iobase+SSR); -} - -#endif --- linux-2.6.6-rc1/include/net/neighbour.h 2004-02-03 20:42:39.000000000 -0800 +++ 25/include/net/neighbour.h 2004-04-18 22:25:25.013027920 -0700 @@ -281,6 +281,8 @@ __neigh_lookup_errno(struct neigh_table return neigh_create(tbl, pkey, dev); } +#define LOCALLY_ENQUEUED -2 + #endif #endif --- linux-2.6.6-rc1/include/net/sctp/sctp.h 2004-02-03 20:42:39.000000000 -0800 +++ 25/include/net/sctp/sctp.h 2004-04-18 22:25:25.014027768 -0700 @@ -1,5 +1,5 @@ /* SCTP kernel reference Implementation - * (C) Copyright IBM Corp. 2001, 2003 + * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2003 Intel Corp. @@ -78,6 +78,7 @@ #include #include #include +#include #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include @@ -223,24 +224,6 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_ #define SCTP_INC_STATS_USER(field) SNMP_INC_STATS_USER(sctp_statistics, field) #define SCTP_DEC_STATS(field) SNMP_DEC_STATS(sctp_statistics, field) -/* Determine if this is a valid kernel address. */ -static inline int sctp_is_valid_kaddr(unsigned long addr) -{ - struct page *page; - - /* Make sure the address is not in the user address space. */ - if (addr < PAGE_OFFSET) - return 0; - - page = virt_to_page(addr); - - /* Is this page valid? */ - if (!virt_addr_valid(addr) || PageReserved(page)) - return 0; - - return 1; -} - #endif /* !TEST_FRAME */ @@ -357,7 +340,7 @@ static inline void sctp_v6_exit(void) { /* Map an association to an assoc_id. */ static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc) { - return (sctp_assoc_t) asoc; + return (asoc?asoc->assoc_id:NULL); } /* Look up the association by its id. */ @@ -519,6 +502,9 @@ extern struct proto sctp_prot; extern struct proc_dir_entry *proc_net_sctp; void sctp_put_port(struct sock *sk); +extern struct idr sctp_assocs_id; +extern spinlock_t sctp_assocs_id_lock; + /* Static inline functions. */ /* Convert from an IP version number to an Address Family symbol. */ --- linux-2.6.6-rc1/include/net/sctp/structs.h 2004-02-17 20:48:46.000000000 -0800 +++ 25/include/net/sctp/structs.h 2004-04-18 22:25:25.015027616 -0700 @@ -1,5 +1,5 @@ /* SCTP kernel reference Implementation - * (C) Copyright IBM Corp. 2001, 2003 + * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001 Intel Corp. @@ -1282,11 +1282,8 @@ struct sctp_association { /* Associations on the same socket. */ struct list_head asocs; - /* This is a signature that lets us know that this is a - * struct sctp_association data structure. Used for mapping an - * association id to an association. - */ - __u32 eyecatcher; + /* association id. */ + sctp_assoc_t assoc_id; /* This is our parent endpoint. */ struct sctp_endpoint *ep; --- linux-2.6.6-rc1/include/net/sctp/ulpevent.h 2003-07-27 12:14:40.000000000 -0700 +++ 25/include/net/sctp/ulpevent.h 2004-04-18 22:25:25.016027464 -0700 @@ -1,7 +1,7 @@ /* SCTP kernel reference Implementation + * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. - * Copyright (c) 2001 International Business Machines, Corp. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll @@ -54,7 +54,13 @@ * growing this structure as it is at the maximum limit now. */ struct sctp_ulpevent { - struct sctp_sndrcvinfo sndrcvinfo; + struct sctp_association *asoc; + __u16 stream; + __u16 ssn; + __u16 flags; + __u32 ppid; + __u32 tsn; + __u32 cumtsn; int msg_flags; int iif; }; --- linux-2.6.6-rc1/include/net/sock.h 2004-02-03 20:42:39.000000000 -0800 +++ 25/include/net/sock.h 2004-04-18 22:25:25.017027312 -0700 @@ -382,6 +382,7 @@ enum sock_flags { SOCK_LINGER, SOCK_DESTROY, SOCK_BROADCAST, + SOCK_TIMESTAMP, }; static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) @@ -561,8 +562,8 @@ extern void __lock_sock(struct sock *sk) extern void __release_sock(struct sock *sk); #define sock_owned_by_user(sk) ((sk)->sk_lock.owner) -extern void lock_sock(struct sock *sk); -extern void release_sock(struct sock *sk); +extern void FASTCALL(lock_sock(struct sock *sk)); +extern void FASTCALL(release_sock(struct sock *sk)); /* BH context may only use the following locking interface. */ #define bh_lock_sock(__sk) spin_lock(&((__sk)->sk_lock.slock)) @@ -623,9 +624,9 @@ extern int sock_no_ extern int sock_no_listen(struct socket *, int); extern int sock_no_shutdown(struct socket *, int); extern int sock_no_getsockopt(struct socket *, int , int, - char *, int *); + char __user *, int __user *); extern int sock_no_setsockopt(struct socket *, int, int, - char *, int); + char __user *, int); extern int sock_no_sendmsg(struct kiocb *, struct socket *, struct msghdr *, size_t); extern int sock_no_recvmsg(struct kiocb *, struct socket *, @@ -1023,11 +1024,33 @@ static inline int sock_intr_errno(long t static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - if (sk->sk_rcvtstamp) - put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(skb->stamp), &skb->stamp); - else - sk->sk_stamp = skb->stamp; -} + struct timeval *stamp = &skb->stamp; + if (sk->sk_rcvtstamp) { + /* Race occurred between timestamp enabling and packet + receiving. Fill in the current time for now. */ + if (stamp->tv_sec == 0) + do_gettimeofday(stamp); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), + stamp); + } else + sk->sk_stamp = *stamp; +} + +extern atomic_t netstamp_needed; +extern void sock_enable_timestamp(struct sock *sk); +extern void sock_disable_timestamp(struct sock *sk); + +static inline void net_timestamp(struct timeval *stamp) +{ + if (atomic_read(&netstamp_needed)) + do_gettimeofday(stamp); + else { + stamp->tv_sec = 0; + stamp->tv_usec = 0; + } +} + +extern int sock_get_timestamp(struct sock *, struct timeval *); /* * Enable debug/info messages @@ -1035,8 +1058,10 @@ sock_recv_timestamp(struct msghdr *msg, #if 0 #define NETDEBUG(x) do { } while (0) +#define LIMIT_NETDEBUG(x) do {} while(0) #else #define NETDEBUG(x) do { x; } while (0) +#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) #endif /* --- linux-2.6.6-rc1/include/sound/sndmagic.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/include/sound/sndmagic.h 2004-04-18 22:25:25.017027312 -0700 @@ -203,6 +203,7 @@ static inline int _snd_magic_bad(void *o #define pdacf_t_magic 0xa15a4500 #define vortex_t_magic 0xa15a4601 #define atiixp_t_magic 0xa15a4701 +#define amd7930_t_magic 0xa15a4801 #else --- linux-2.6.6-rc1/init/Kconfig 2004-04-14 23:14:49.000000000 -0700 +++ 25/init/Kconfig 2004-04-18 22:25:35.264469464 -0700 @@ -43,7 +43,7 @@ config CLEAN_COMPILE config STANDALONE bool "Select only drivers that don't need compile-time external firmware" if EXPERIMENTAL - default y + default n help Select this option if you don't have magic firmware for drivers that need it. --- linux-2.6.6-rc1/init/main.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/init/main.c 2004-04-18 22:25:57.667063752 -0700 @@ -84,11 +84,11 @@ extern void signals_init(void); extern void buffer_init(void); extern void pidhash_init(void); extern void pidmap_init(void); -extern void pte_chain_init(void); extern void radix_tree_init(void); extern void free_initmem(void); extern void populate_rootfs(void); extern void driver_init(void); +extern void prepare_namespace(void); #ifdef CONFIG_TC extern void tc_init(void); @@ -107,6 +107,9 @@ extern void time_init(void); void (*late_time_init)(void); extern void softirq_init(void); +/* Untouched command line (eg. for /proc) saved by arch-specific code. */ +char saved_command_line[COMMAND_LINE_SIZE]; + static char *execute_command; /* Setup configured maximum number of CPUs to activate */ @@ -153,6 +156,9 @@ static int __init obsolete_checksetup(ch do { int n = strlen(p->str); if (!strncmp(line, p->str, n)) { + /* Already done in parse_early_param? */ + if (p->early) + return 1; if (!p->setup_func) { printk(KERN_WARNING "Parameter %s is obsolete, ignored\n", p->str); return 1; @@ -391,6 +397,38 @@ static void noinline rest_init(void) cpu_idle(); } +/* Check for early params. */ +static int __init do_early_param(char *param, char *val) +{ + struct obs_kernel_param *p; + extern struct obs_kernel_param __setup_start, __setup_end; + + for (p = &__setup_start; p < &__setup_end; p++) { + if (p->early && strcmp(param, p->str) == 0) { + if (p->setup_func(val) != 0) + printk(KERN_WARNING + "Malformed early option '%s'\n", param); + } + } + /* We accept everything at this stage. */ + return 0; +} + +/* Arch code calls this early on, or if not, just before other parsing. */ +void __init parse_early_param(void) +{ + static __initdata int done = 0; + static __initdata char tmp_cmdline[COMMAND_LINE_SIZE]; + + if (done) + return; + + /* All fall through to do_early_param. */ + strlcpy(tmp_cmdline, saved_command_line, COMMAND_LINE_SIZE); + parse_args("early options", tmp_cmdline, NULL, 0, do_early_param); + done = 1; +} + /* * Activate the first processor. */ @@ -398,7 +436,6 @@ static void noinline rest_init(void) asmlinkage void __init start_kernel(void) { char * command_line; - extern char saved_command_line[]; extern struct kernel_param __start___param[], __stop___param[]; /* * Interrupts are still disabled. Do necessary setups, then @@ -416,18 +453,26 @@ asmlinkage void __init start_kernel(void */ smp_prepare_boot_cpu(); + /* + * Set up the scheduler prior starting any interrupts (such as the + * timer interrupt). Full topology setup happens at smp_init() + * time - but meanwhile we still have a functioning scheduler. + */ + sched_init(); + build_all_zonelists(); page_alloc_init(); + trap_init(); printk("Kernel command line: %s\n", saved_command_line); + parse_early_param(); parse_args("Booting kernel", command_line, __start___param, __stop___param - __start___param, &unknown_bootoption); sort_main_extable(); - trap_init(); rcu_init(); init_IRQ(); pidhash_init(); - sched_init(); + init_timers(); softirq_init(); time_init(); @@ -456,7 +501,6 @@ asmlinkage void __init start_kernel(void calibrate_delay(); pidmap_init(); pgtable_cache_init(); - pte_chain_init(); #ifdef CONFIG_X86 if (efi_enabled) efi_enter_virtual_mode(); @@ -471,7 +515,6 @@ asmlinkage void __init start_kernel(void signals_init(); /* rootfs populating might need page-writeback */ page_writeback_init(); - populate_rootfs(); #ifdef CONFIG_PROC_FS proc_root_init(); #endif @@ -567,7 +610,6 @@ static void do_pre_smp_initcalls(void) migration_init(); #endif - node_nr_running_init(); spawn_ksoftirqd(); } @@ -577,8 +619,6 @@ static void run_init_process(char *init_ execve(init_filename, argv_init, envp_init); } -extern void prepare_namespace(void); - static int init(void * unused) { lock_kernel(); @@ -598,16 +638,18 @@ static int init(void * unused) do_pre_smp_initcalls(); smp_init(); + sched_init_smp(); do_basic_setup(); - /* - * check if there is an early userspace init, if yes - * let it do all the work - */ - if (sys_access("/init", 0) == 0) - execute_command = "/init"; - else - prepare_namespace(); + populate_rootfs(); + /* + * check if there is an early userspace init. If yes, let it do all + * the work + */ + if (sys_access("/init", 0) == 0) + execute_command = "/init"; + else + prepare_namespace(); /* * Ok, we have completed the initial bootup, and @@ -641,3 +683,10 @@ static int init(void * unused) panic("No init found. Try passing init= option to kernel."); } + +static int early_param_test(char *rest) +{ + printk("early_parm_test: %s\n", rest ?: "(null)"); + return rest ? 0 : -EINVAL; +} +early_param("testsetup", early_param_test); --- linux-2.6.6-rc1/ipc/mqueue.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/ipc/mqueue.c 2004-04-18 22:25:25.019027008 -0700 @@ -65,8 +65,8 @@ struct mqueue_inode_info { struct msg_msg **messages; struct mq_attr attr; - struct sigevent notify; /* notify.sigev_notify == SIGEV_NONE means */ - pid_t notify_owner; /* no notification registered */ + struct sigevent notify; + pid_t notify_owner; struct sock *notify_sock; struct sk_buff *notify_cookie; @@ -122,7 +122,7 @@ static struct inode *mqueue_get_inode(st init_waitqueue_head(&info->wait_q); INIT_LIST_HEAD(&info->e_wait_q[0].list); INIT_LIST_HEAD(&info->e_wait_q[1].list); - info->notify.sigev_notify = SIGEV_NONE; + info->notify_owner = 0; info->qsize = 0; memset(&info->attr, 0, sizeof(info->attr)); info->attr.mq_maxmsg = DFLT_MSGMAX; @@ -153,7 +153,7 @@ static int mqueue_fill_super(struct supe sb->s_magic = MQUEUE_MAGIC; sb->s_op = &mqueue_super_ops; - inode = mqueue_get_inode(sb, S_IFDIR | S_IRWXUGO); + inode = mqueue_get_inode(sb, S_IFDIR | S_ISVTX | S_IRWXUGO); if (!inode) return -ENOMEM; @@ -286,11 +286,11 @@ static ssize_t mqueue_read_file(struct f snprintf(buffer, sizeof(buffer), "QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n", info->qsize, - info->notify.sigev_notify, - (info->notify.sigev_notify == SIGEV_SIGNAL ) ? + info->notify_owner ? info->notify.sigev_notify : 0, + (info->notify_owner && + info->notify.sigev_notify == SIGEV_SIGNAL) ? info->notify.sigev_signo : 0, - (info->notify.sigev_notify != SIGEV_NONE) ? - info->notify_owner : 0); + info->notify_owner); spin_unlock(&info->lock); buffer[sizeof(buffer)-1] = '\0'; slen = strlen(buffer)+1; @@ -315,8 +315,7 @@ static int mqueue_flush_file(struct file struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode); spin_lock(&info->lock); - if (info->notify.sigev_notify != SIGEV_NONE && - current->tgid == info->notify_owner) + if (current->tgid == info->notify_owner) remove_notification(info); spin_unlock(&info->lock); @@ -455,11 +454,14 @@ static void __do_notify(struct mqueue_in * waiting synchronously for message AND state of queue changed from * empty to not empty. Here we are sure that no one is waiting * synchronously. */ - if (info->notify.sigev_notify != SIGEV_NONE && - info->attr.mq_curmsgs == 1) { - /* sends signal */ - if (info->notify.sigev_notify == SIGEV_SIGNAL) { - struct siginfo sig_i; + if (info->notify_owner && + info->attr.mq_curmsgs == 1) { + struct siginfo sig_i; + switch (info->notify.sigev_notify) { + case SIGEV_NONE: + break; + case SIGEV_SIGNAL: + /* sends signal */ sig_i.si_signo = info->notify.sigev_signo; sig_i.si_errno = 0; @@ -470,13 +472,15 @@ static void __do_notify(struct mqueue_in kill_proc_info(info->notify.sigev_signo, &sig_i, info->notify_owner); - } else if (info->notify.sigev_notify == SIGEV_THREAD) { + break; + case SIGEV_THREAD: set_cookie(info->notify_cookie, NOTIFY_WOKENUP); netlink_sendskb(info->notify_sock, info->notify_cookie, 0); + break; } /* after notification unregisters process */ - info->notify.sigev_notify = SIGEV_NONE; + info->notify_owner = 0; } wake_up(&info->wait_q); } @@ -514,11 +518,12 @@ static long prepare_timeout(const struct static void remove_notification(struct mqueue_inode_info *info) { - if (info->notify.sigev_notify == SIGEV_THREAD) { + if (info->notify_owner != 0 && + info->notify.sigev_notify == SIGEV_THREAD) { set_cookie(info->notify_cookie, NOTIFY_REMOVED); netlink_sendskb(info->notify_sock, info->notify_cookie, 0); } - info->notify.sigev_notify = SIGEV_NONE; + info->notify_owner = 0; } /* @@ -641,6 +646,7 @@ asmlinkage long sys_mq_open(const char _ goto out_putfd; } + set_close_on_exec(fd, 1); fd_install(fd, filp); goto out_upsem; @@ -679,10 +685,6 @@ asmlinkage long sys_mq_unlink(const char goto out_err; } - if (permission(dentry->d_inode, MAY_WRITE, NULL)) { - err = -EACCES; - goto out_err; - } inode = dentry->d_inode; if (inode) atomic_inc(&inode->i_count); @@ -908,9 +910,9 @@ out: } /* - * Notes: the case when user wants us to deregister (with NULL as pointer - * or SIGEV_NONE) and he isn't currently owner of notification will be - * silently discarded. It isn't explicitly defined in the POSIX. + * Notes: the case when user wants us to deregister (with NULL as pointer) + * and he isn't currently owner of notification, will be silently discarded. + * It isn't explicitly defined in the POSIX. */ asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification) @@ -925,9 +927,7 @@ asmlinkage long sys_mq_notify(mqd_t mqde nc = NULL; sock = NULL; - if (u_notification == NULL) { - notification.sigev_notify = SIGEV_NONE; - } else { + if (u_notification != NULL) { if (copy_from_user(¬ification, u_notification, sizeof(struct sigevent))) return -EFAULT; @@ -993,35 +993,31 @@ retry: ret = 0; spin_lock(&info->lock); - switch (notification.sigev_notify) { - case SIGEV_NONE: - if (info->notify.sigev_notify != SIGEV_NONE && - info->notify_owner == current->tgid) { + if (u_notification == NULL) { + if (info->notify_owner == current->tgid) { remove_notification(info); inode->i_atime = inode->i_ctime = CURRENT_TIME; } - break; - case SIGEV_THREAD: - if (info->notify.sigev_notify != SIGEV_NONE) { - ret = -EBUSY; + } else if (info->notify_owner != 0) { + ret = -EBUSY; + } else { + switch (notification.sigev_notify) { + case SIGEV_NONE: + info->notify.sigev_notify = SIGEV_NONE; break; - } - info->notify_sock = sock; - info->notify_cookie = nc; - sock = NULL; - nc = NULL; - info->notify.sigev_notify = SIGEV_THREAD; - info->notify_owner = current->tgid; - inode->i_atime = inode->i_ctime = CURRENT_TIME; - break; - case SIGEV_SIGNAL: - if (info->notify.sigev_notify != SIGEV_NONE) { - ret = -EBUSY; + case SIGEV_THREAD: + info->notify_sock = sock; + info->notify_cookie = nc; + sock = NULL; + nc = NULL; + info->notify.sigev_notify = SIGEV_THREAD; + break; + case SIGEV_SIGNAL: + info->notify.sigev_signo = notification.sigev_signo; + info->notify.sigev_value = notification.sigev_value; + info->notify.sigev_notify = SIGEV_SIGNAL; break; } - info->notify.sigev_signo = notification.sigev_signo; - info->notify.sigev_value = notification.sigev_value; - info->notify.sigev_notify = SIGEV_SIGNAL; info->notify_owner = current->tgid; inode->i_atime = inode->i_ctime = CURRENT_TIME; } --- linux-2.6.6-rc1/ipc/shm.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/ipc/shm.c 2004-04-18 22:25:50.348176392 -0700 @@ -163,6 +163,10 @@ static struct vm_operations_struct shm_v .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ .nopage = shmem_nopage, +#ifdef CONFIG_NUMA + .set_policy = shmem_set_policy, + .get_policy = shmem_get_policy, +#endif }; static int newseg (key_t key, int shmflg, size_t size) @@ -792,10 +796,12 @@ asmlinkage long sys_shmdt(char __user *s */ if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { - + int ret; size = vma->vm_file->f_dentry->d_inode->i_size; - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + ret = do_munmap(mm, vma->vm_start, + vma->vm_end - vma->vm_start); + WARN_ON(ret); /* * We discovered the size of the shm segment, so * break out of here and fall through to the next @@ -819,9 +825,13 @@ asmlinkage long sys_shmdt(char __user *s /* finding a matching vma now does not alter retval */ if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && - (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) + (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { + int ret; - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + ret = do_munmap(mm, vma->vm_start, + vma->vm_end - vma->vm_start); + WARN_ON(ret); + } vma = next; } --- linux-2.6.6-rc1/kernel/exit.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/exit.c 2004-04-18 22:25:49.822256344 -0700 @@ -790,6 +790,7 @@ asmlinkage NORET_TYPE void do_exit(long __exit_fs(tsk); exit_namespace(tsk); exit_thread(); + mpol_free(tsk->mempolicy); if (tsk->signal->leader) disassociate_ctty(1); --- linux-2.6.6-rc1/kernel/fork.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/fork.c 2004-04-18 22:25:49.823256192 -0700 @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -215,11 +216,8 @@ void __init fork_init(unsigned long memp #endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = - kmem_cache_create("task_struct", - sizeof(struct task_struct),ARCH_MIN_TASKALIGN, - 0, NULL, NULL); - if (!task_struct_cachep) - panic("fork_init(): cannot create task_struct SLAB cache"); + kmem_cache_create("task_struct", sizeof(struct task_struct), + ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL, NULL); #endif /* @@ -272,6 +270,7 @@ static inline int dup_mmap(struct mm_str struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge = 0; + struct mempolicy *pol; down_write(&oldmm->mmap_sem); flush_cache_mm(current->mm); @@ -313,6 +312,11 @@ static inline int dup_mmap(struct mm_str if (!tmp) goto fail_nomem; *tmp = *mpnt; + pol = mpol_copy(vma_policy(mpnt)); + retval = PTR_ERR(pol); + if (IS_ERR(pol)) + goto fail_nomem_policy; + vma_set_policy(tmp, pol); tmp->vm_flags &= ~VM_LOCKED; tmp->vm_mm = mm; tmp->vm_next = NULL; @@ -359,6 +363,8 @@ out: flush_tlb_mm(current->mm); up_write(&oldmm->mmap_sem); return retval; +fail_nomem_policy: + kmem_cache_free(vm_area_cachep, tmp); fail_nomem: retval = -ENOMEM; fail: @@ -421,9 +427,14 @@ struct mm_struct * mm_alloc(void) mm = allocate_mm(); if (mm) { memset(mm, 0, sizeof(*mm)); - return mm_init(mm); + mm = mm_init(mm); + if (mm && exec_rmap(mm)) { + mm_free_pgd(mm); + free_mm(mm); + mm = NULL; + } } - return NULL; + return mm; } /* @@ -450,6 +461,7 @@ void mmput(struct mm_struct *mm) spin_unlock(&mmlist_lock); exit_aio(mm); exit_mmap(mm); + exit_rmap(mm); mmdrop(mm); } } @@ -553,6 +565,12 @@ static int copy_mm(unsigned long clone_f if (!mm_init(mm)) goto fail_nomem; + if (dup_rmap(mm, oldmm)) { + mm_free_pgd(mm); + free_mm(mm); + goto fail_nomem; + } + if (init_new_context(tsk,mm)) goto fail_nocontext; @@ -953,10 +971,16 @@ struct task_struct *copy_process(unsigne p->security = NULL; p->io_context = NULL; p->audit_context = NULL; + p->mempolicy = mpol_copy(p->mempolicy); + if (IS_ERR(p->mempolicy)) { + retval = PTR_ERR(p->mempolicy); + p->mempolicy = NULL; + goto bad_fork_cleanup; + } retval = -ENOMEM; if ((retval = security_task_alloc(p))) - goto bad_fork_cleanup; + goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) goto bad_fork_cleanup_security; /* copy all the process information */ @@ -1102,6 +1126,8 @@ bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_security: security_task_free(p); +bad_fork_cleanup_policy: + mpol_free(p->mempolicy); bad_fork_cleanup: if (p->pid > 0) free_pidmap(p->pid); @@ -1180,9 +1206,23 @@ long do_fork(unsigned long clone_flags, set_tsk_thread_flag(p, TIF_SIGPENDING); } - if (!(clone_flags & CLONE_STOPPED)) - wake_up_forked_process(p); /* do this last */ - else + if (!(clone_flags & CLONE_STOPPED)) { + /* + * Do the wakeup last. On SMP we treat fork() and + * CLONE_VM separately, because fork() has already + * created cache footprint on this CPU (due to + * copying the pagetables), hence migration would + * probably be costy. Threads on the other hand + * have less traction to the current CPU, and if + * there's an imbalance then the scheduler can + * migrate this fresh thread now, before it + * accumulates a larger cache footprint: + */ + if (clone_flags & CLONE_VM) + wake_up_forked_thread(p); + else + wake_up_forked_process(p); + } else p->state = TASK_STOPPED; ++total_forks; @@ -1227,37 +1267,21 @@ void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!sighand_cachep) - panic("Cannot create sighand SLAB cache"); - + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!signal_cachep) - panic("Cannot create signal SLAB cache"); - + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); files_cachep = kmem_cache_create("files_cache", - sizeof(struct files_struct), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!files_cachep) - panic("Cannot create files SLAB cache"); - + sizeof(struct files_struct), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); fs_cachep = kmem_cache_create("fs_cache", - sizeof(struct fs_struct), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (!fs_cachep) - panic("Cannot create fs_struct SLAB cache"); - + sizeof(struct fs_struct), 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); vm_area_cachep = kmem_cache_create("vm_area_struct", sizeof(struct vm_area_struct), 0, - 0, NULL, NULL); - if(!vm_area_cachep) - panic("vma_init: Cannot alloc vm_area_struct SLAB cache"); - + SLAB_PANIC, NULL, NULL); mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if(!mm_cachep) - panic("vma_init: Cannot alloc mm_struct SLAB cache"); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + init_rmap(); } --- linux-2.6.6-rc1/kernel/kmod.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/kmod.c 2004-04-18 22:25:27.706618432 -0700 @@ -35,11 +35,13 @@ #include #include #include +#include #include extern int max_threads; #ifdef CONFIG_KMOD +static struct workqueue_struct *khelper_wq; /* modprobe_path is set via /proc/sys. @@ -109,6 +111,7 @@ int request_module(const char *fmt, ...) atomic_dec(&kmod_concurrent); return ret; } +EXPORT_SYMBOL(request_module); #endif /* CONFIG_KMOD */ #ifdef CONFIG_HOTPLUG @@ -197,9 +200,7 @@ static int wait_for_helper(void *data) return 0; } -/* - * This is run by keventd. - */ +/* This is run by khelper thread */ static void __call_usermodehelper(void *data) { struct subprocess_info *sub_info = data; @@ -249,26 +250,22 @@ int call_usermodehelper(char *path, char }; DECLARE_WORK(work, __call_usermodehelper, &sub_info); - if (system_state != SYSTEM_RUNNING) + if (!khelper_wq) return -EBUSY; if (path[0] == '\0') - goto out; + return 0; - if (current_is_keventd()) { - /* We can't wait on keventd! */ - __call_usermodehelper(&sub_info); - } else { - schedule_work(&work); - wait_for_completion(&done); - } -out: + queue_work(khelper_wq, &work); + wait_for_completion(&done); return sub_info.retval; } - EXPORT_SYMBOL(call_usermodehelper); -#ifdef CONFIG_KMOD -EXPORT_SYMBOL(request_module); -#endif - +static __init int usermodehelper_init(void) +{ + khelper_wq = create_singlethread_workqueue("khelper"); + BUG_ON(!khelper_wq); + return 0; +} +__initcall(usermodehelper_init); --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/kernel/lockmeter.c 2004-04-18 22:25:47.932543624 -0700 @@ -0,0 +1,1178 @@ +/* + * Copyright (C) 1999,2000 Silicon Graphics, Inc. + * + * Written by John Hawkes (hawkes@sgi.com) + * Based on klstat.c by Jack Steiner (steiner@sgi.com) + * + * Modified by Ray Bryant (raybry@us.ibm.com) + * Changes Copyright (C) 2000 IBM, Inc. + * Added save of index in spinlock_t to improve efficiency + * of "hold" time reporting for spinlocks + * Added support for hold time statistics for read and write + * locks. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define ASSERT(cond) +#define bzero(loc,size) memset(loc,0,size) + +/*<---------------------------------------------------*/ +/* lockmeter.c */ +/*>---------------------------------------------------*/ + +static lstat_control_t lstat_control __cacheline_aligned = + { LSTAT_OFF, SPIN_LOCK_UNLOCKED, SPIN_LOCK_UNLOCKED, + 19 * 0, NR_CPUS * 0, 0, NR_CPUS * 0 }; + +static ushort lstat_make_dir_entry(void *, void *); + +/* + * lstat_lookup + * + * Given a RA, locate the directory entry for the lock. + */ +static ushort +lstat_lookup(void *lock_ptr, void *caller_ra) +{ + ushort index; + lstat_directory_entry_t *dirp; + + dirp = lstat_control.dir; + + index = lstat_control.hashtab[DIRHASH(caller_ra)]; + while (dirp[index].caller_ra != caller_ra) { + if (index == 0) { + return lstat_make_dir_entry(lock_ptr, caller_ra); + } + index = dirp[index].next_stat_index; + } + + if (dirp[index].lock_ptr != NULL && dirp[index].lock_ptr != lock_ptr) { + dirp[index].lock_ptr = NULL; + } + + return index; +} + +/* + * lstat_make_dir_entry + * Called to add a new lock to the lock directory. + */ +static ushort +lstat_make_dir_entry(void *lock_ptr, void *caller_ra) +{ + lstat_directory_entry_t *dirp; + ushort index, hindex; + unsigned long flags; + + /* lock the table without recursively reentering this metering code */ + local_irq_save(flags); + _raw_spin_lock(&lstat_control.directory_lock); + + hindex = DIRHASH(caller_ra); + index = lstat_control.hashtab[hindex]; + dirp = lstat_control.dir; + while (index && dirp[index].caller_ra != caller_ra) + index = dirp[index].next_stat_index; + + if (index == 0) { + if (lstat_control.next_free_dir_index < LSTAT_MAX_STAT_INDEX) { + index = lstat_control.next_free_dir_index++; + lstat_control.dir[index].caller_ra = caller_ra; + lstat_control.dir[index].lock_ptr = lock_ptr; + lstat_control.dir[index].next_stat_index = + lstat_control.hashtab[hindex]; + lstat_control.hashtab[hindex] = index; + } else { + lstat_control.dir_overflow++; + } + } + _raw_spin_unlock(&lstat_control.directory_lock); + local_irq_restore(flags); + return index; +} + +int +lstat_update(void *lock_ptr, void *caller_ra, int action) +{ + int index; + int cpu; + + ASSERT(action < LSTAT_ACT_MAX_VALUES); + + if (lstat_control.state == LSTAT_OFF) + return 0; + + index = lstat_lookup(lock_ptr, caller_ra); + cpu = THIS_CPU_NUMBER; + (*lstat_control.counts[cpu])[index].count[action]++; + (*lstat_control.counts[cpu])[index].acquire_time = get_cycles(); + + return index; +} + +int +lstat_update_time(void *lock_ptr, void *caller_ra, int action, uint32_t ticks) +{ + ushort index; + int cpu; + + ASSERT(action < LSTAT_ACT_MAX_VALUES); + + if (lstat_control.state == LSTAT_OFF) + return 0; + + index = lstat_lookup(lock_ptr, caller_ra); + cpu = THIS_CPU_NUMBER; + (*lstat_control.counts[cpu])[index].count[action]++; + (*lstat_control.counts[cpu])[index].cum_wait_ticks += (uint64_t) ticks; + if ((*lstat_control.counts[cpu])[index].max_wait_ticks < ticks) + (*lstat_control.counts[cpu])[index].max_wait_ticks = ticks; + + (*lstat_control.counts[cpu])[index].acquire_time = get_cycles(); + + return index; +} + +void +_metered_spin_lock(spinlock_t * lock_ptr) +{ + if (lstat_control.state == LSTAT_OFF) { + _raw_spin_lock(lock_ptr); /* do the real lock */ + PUT_INDEX(lock_ptr, 0); /* clean index in case lockmetering */ + /* gets turned on before unlock */ + } else { + void *this_pc = LSTAT_RA(LSTAT_RA_SPIN); + int index; + + if (_raw_spin_trylock(lock_ptr)) { + index = lstat_update(lock_ptr, this_pc, + LSTAT_ACT_NO_WAIT); + } else { + uint32_t start_cycles = get_cycles(); + _raw_spin_lock(lock_ptr); /* do the real lock */ + index = lstat_update_time(lock_ptr, this_pc, + LSTAT_ACT_SPIN, get_cycles() - start_cycles); + } + /* save the index in the lock itself for use in spin unlock */ + PUT_INDEX(lock_ptr, index); + } +} + +int +_metered_spin_trylock(spinlock_t * lock_ptr) +{ + if (lstat_control.state == LSTAT_OFF) { + return _raw_spin_trylock(lock_ptr); + } else { + int retval; + void *this_pc = LSTAT_RA(LSTAT_RA_SPIN); + + if ((retval = _raw_spin_trylock(lock_ptr))) { + int index = lstat_update(lock_ptr, this_pc, + LSTAT_ACT_NO_WAIT); + /* + * save the index in the lock itself for use in spin + * unlock + */ + PUT_INDEX(lock_ptr, index); + } else { + lstat_update(lock_ptr, this_pc, LSTAT_ACT_REJECT); + } + + return retval; + } +} + +void +_metered_spin_unlock(spinlock_t * lock_ptr) +{ + int index = -1; + + if (lstat_control.state != LSTAT_OFF) { + index = GET_INDEX(lock_ptr); + /* + * If statistics were turned off when we set the lock, + * then the index can be zero. If that is the case, + * then collect no stats on this call. + */ + if (index > 0) { + uint32_t hold_time; + int cpu = THIS_CPU_NUMBER; + hold_time = get_cycles() - + (*lstat_control.counts[cpu])[index].acquire_time; + (*lstat_control.counts[cpu])[index].cum_hold_ticks += + (uint64_t) hold_time; + if ((*lstat_control.counts[cpu])[index].max_hold_ticks < + hold_time) + (*lstat_control.counts[cpu])[index]. + max_hold_ticks = hold_time; + } + } + + /* make sure we don't have a stale index value saved */ + PUT_INDEX(lock_ptr, 0); + _raw_spin_unlock(lock_ptr); /* do the real unlock */ +} + +/* + * allocate the next global read lock structure and store its index + * in the rwlock at "lock_ptr". + */ +uint32_t +alloc_rwlock_struct(rwlock_t * rwlock_ptr) +{ + int index; + unsigned long flags; + int cpu = THIS_CPU_NUMBER; + + /* If we've already overflowed, then do a quick exit */ + if (lstat_control.next_free_read_lock_index > + LSTAT_MAX_READ_LOCK_INDEX) { + lstat_control.rwlock_overflow++; + return 0; + } + + local_irq_save(flags); + _raw_spin_lock(&lstat_control.directory_lock); + + /* It is possible this changed while we were waiting for the directory_lock */ + if (lstat_control.state == LSTAT_OFF) { + index = 0; + goto unlock; + } + + /* It is possible someone else got here first and set the index */ + if ((index = GET_RWINDEX(rwlock_ptr)) == 0) { + /* + * we can't turn on read stats for this lock while there are + * readers (this would mess up the running hold time sum at + * unlock time) + */ + if (RWLOCK_READERS(rwlock_ptr) != 0) { + index = 0; + goto unlock; + } + + /* + * if stats are turned on after being off, we may need to + * return an old index from when the statistics were on last + * time. + */ + for (index = 1; index < lstat_control.next_free_read_lock_index; + index++) + if ((*lstat_control.read_lock_counts[cpu])[index]. + lock_ptr == rwlock_ptr) + goto put_index_and_unlock; + + /* allocate the next global read lock structure */ + if (lstat_control.next_free_read_lock_index >= + LSTAT_MAX_READ_LOCK_INDEX) { + lstat_control.rwlock_overflow++; + index = 0; + goto unlock; + } + index = lstat_control.next_free_read_lock_index++; + + /* + * initialize the global read stats data structure for each + * cpu + */ + for (cpu = 0; cpu < num_online_cpus(); cpu++) { + (*lstat_control.read_lock_counts[cpu])[index].lock_ptr = + rwlock_ptr; + } +put_index_and_unlock: + /* store the index for the read lock structure into the lock */ + PUT_RWINDEX(rwlock_ptr, index); + } + +unlock: + _raw_spin_unlock(&lstat_control.directory_lock); + local_irq_restore(flags); + return index; +} + +void +_metered_read_lock(rwlock_t * rwlock_ptr) +{ + void *this_pc; + uint32_t start_cycles; + int index; + int cpu; + unsigned long flags; + int readers_before, readers_after; + uint64_t cycles64; + + if (lstat_control.state == LSTAT_OFF) { + _raw_read_lock(rwlock_ptr); + /* clean index in case lockmetering turns on before an unlock */ + PUT_RWINDEX(rwlock_ptr, 0); + return; + } + + this_pc = LSTAT_RA(LSTAT_RA_READ); + cpu = THIS_CPU_NUMBER; + index = GET_RWINDEX(rwlock_ptr); + + /* allocate the global stats entry for this lock, if needed */ + if (index == 0) + index = alloc_rwlock_struct(rwlock_ptr); + + readers_before = RWLOCK_READERS(rwlock_ptr); + if (_raw_read_trylock(rwlock_ptr)) { + /* + * We have decremented the lock to count a new reader, + * and have confirmed that no writer has it locked. + */ + /* update statistics if enabled */ + if (index > 0) { + local_irq_save(flags); + lstat_update((void *) rwlock_ptr, this_pc, + LSTAT_ACT_NO_WAIT); + /* preserve value of TSC so cum_hold_ticks and start_busy use same value */ + cycles64 = get_cycles64(); + (*lstat_control.read_lock_counts[cpu])[index]. + cum_hold_ticks -= cycles64; + + /* record time and cpu of start of busy period */ + /* this is not perfect (some race conditions are possible) */ + if (readers_before == 0) { + (*lstat_control.read_lock_counts[cpu])[index]. + start_busy = cycles64; + PUT_RW_CPU(rwlock_ptr, cpu); + } + readers_after = RWLOCK_READERS(rwlock_ptr); + if (readers_after > + (*lstat_control.read_lock_counts[cpu])[index]. + max_readers) + (*lstat_control.read_lock_counts[cpu])[index]. + max_readers = readers_after; + local_irq_restore(flags); + } + + return; + } + /* If we get here, then we could not quickly grab the read lock */ + + start_cycles = get_cycles(); /* start counting the wait time */ + + /* Now spin until read_lock is successful */ + _raw_read_lock(rwlock_ptr); + + lstat_update_time((void *) rwlock_ptr, this_pc, LSTAT_ACT_SPIN, + get_cycles() - start_cycles); + + /* update statistics if they are enabled for this lock */ + if (index > 0) { + local_irq_save(flags); + cycles64 = get_cycles64(); + (*lstat_control.read_lock_counts[cpu])[index].cum_hold_ticks -= + cycles64; + + /* this is not perfect (some race conditions are possible) */ + if (readers_before == 0) { + (*lstat_control.read_lock_counts[cpu])[index]. + start_busy = cycles64; + PUT_RW_CPU(rwlock_ptr, cpu); + } + readers_after = RWLOCK_READERS(rwlock_ptr); + if (readers_after > + (*lstat_control.read_lock_counts[cpu])[index].max_readers) + (*lstat_control.read_lock_counts[cpu])[index]. + max_readers = readers_after; + local_irq_restore(flags); + } +} + +void +_metered_read_unlock(rwlock_t * rwlock_ptr) +{ + int index; + int cpu; + unsigned long flags; + uint64_t busy_length; + uint64_t cycles64; + + if (lstat_control.state == LSTAT_OFF) { + _raw_read_unlock(rwlock_ptr); + return; + } + + index = GET_RWINDEX(rwlock_ptr); + cpu = THIS_CPU_NUMBER; + + if (index > 0) { + local_irq_save(flags); + /* + * preserve value of TSC so cum_hold_ticks and busy_ticks are + * consistent. + */ + cycles64 = get_cycles64(); + (*lstat_control.read_lock_counts[cpu])[index].cum_hold_ticks += + cycles64; + (*lstat_control.read_lock_counts[cpu])[index].read_lock_count++; + + /* + * once again, this is not perfect (some race conditions are + * possible) + */ + if (RWLOCK_READERS(rwlock_ptr) == 1) { + int cpu1 = GET_RW_CPU(rwlock_ptr); + uint64_t last_start_busy = + (*lstat_control.read_lock_counts[cpu1])[index]. + start_busy; + (*lstat_control.read_lock_counts[cpu])[index]. + busy_periods++; + if (cycles64 > last_start_busy) { + busy_length = cycles64 - last_start_busy; + (*lstat_control.read_lock_counts[cpu])[index]. + busy_ticks += busy_length; + if (busy_length > + (*lstat_control. + read_lock_counts[cpu])[index]. + max_busy) + (*lstat_control. + read_lock_counts[cpu])[index]. + max_busy = busy_length; + } + } + local_irq_restore(flags); + } + _raw_read_unlock(rwlock_ptr); +} + +void +_metered_write_lock(rwlock_t * rwlock_ptr) +{ + uint32_t start_cycles; + void *this_pc; + uint32_t spin_ticks = 0; /* in anticipation of a potential wait */ + int index; + int write_index = 0; + int cpu; + enum { + writer_writer_conflict, + writer_reader_conflict + } why_wait = writer_writer_conflict; + + if (lstat_control.state == LSTAT_OFF) { + _raw_write_lock(rwlock_ptr); + /* clean index in case lockmetering turns on before an unlock */ + PUT_RWINDEX(rwlock_ptr, 0); + return; + } + + this_pc = LSTAT_RA(LSTAT_RA_WRITE); + cpu = THIS_CPU_NUMBER; + index = GET_RWINDEX(rwlock_ptr); + + /* allocate the global stats entry for this lock, if needed */ + if (index == 0) { + index = alloc_rwlock_struct(rwlock_ptr); + } + + if (_raw_write_trylock(rwlock_ptr)) { + /* We acquired the lock on the first try */ + write_index = lstat_update((void *) rwlock_ptr, this_pc, + LSTAT_ACT_NO_WAIT); + /* save the write_index for use in unlock if stats enabled */ + if (index > 0) + (*lstat_control.read_lock_counts[cpu])[index]. + write_index = write_index; + return; + } + + /* If we get here, then we could not quickly grab the write lock */ + start_cycles = get_cycles(); /* start counting the wait time */ + + why_wait = RWLOCK_READERS(rwlock_ptr) ? + writer_reader_conflict : writer_writer_conflict; + + /* Now set the lock and wait for conflicts to disappear */ + _raw_write_lock(rwlock_ptr); + + spin_ticks = get_cycles() - start_cycles; + + /* update stats -- if enabled */ + if (index > 0 && spin_ticks) { + if (why_wait == writer_reader_conflict) { + /* waited due to a reader holding the lock */ + write_index = lstat_update_time((void *)rwlock_ptr, + this_pc, LSTAT_ACT_SPIN, spin_ticks); + } else { + /* + * waited due to another writer holding the lock + */ + write_index = lstat_update_time((void *)rwlock_ptr, + this_pc, LSTAT_ACT_WW_SPIN, spin_ticks); + (*lstat_control.counts[cpu])[write_index]. + cum_wait_ww_ticks += spin_ticks; + if (spin_ticks > + (*lstat_control.counts[cpu])[write_index]. + max_wait_ww_ticks) { + (*lstat_control.counts[cpu])[write_index]. + max_wait_ww_ticks = spin_ticks; + } + } + + /* save the directory index for use on write_unlock */ + (*lstat_control.read_lock_counts[cpu])[index]. + write_index = write_index; + } +} + +void +_metered_write_unlock(rwlock_t * rwlock_ptr) +{ + int index; + int cpu; + int write_index; + uint32_t hold_time; + + if (lstat_control.state == LSTAT_OFF) { + _raw_write_unlock(rwlock_ptr); + return; + } + + cpu = THIS_CPU_NUMBER; + index = GET_RWINDEX(rwlock_ptr); + + /* update statistics if stats enabled for this lock */ + if (index > 0) { + write_index = + (*lstat_control.read_lock_counts[cpu])[index].write_index; + + hold_time = get_cycles() - + (*lstat_control.counts[cpu])[write_index].acquire_time; + (*lstat_control.counts[cpu])[write_index].cum_hold_ticks += + (uint64_t) hold_time; + if ((*lstat_control.counts[cpu])[write_index].max_hold_ticks < + hold_time) + (*lstat_control.counts[cpu])[write_index]. + max_hold_ticks = hold_time; + } + _raw_write_unlock(rwlock_ptr); +} + +int +_metered_write_trylock(rwlock_t * rwlock_ptr) +{ + int retval; + void *this_pc = LSTAT_RA(LSTAT_RA_WRITE); + + if ((retval = _raw_write_trylock(rwlock_ptr))) { + lstat_update(rwlock_ptr, this_pc, LSTAT_ACT_NO_WAIT); + } else { + lstat_update(rwlock_ptr, this_pc, LSTAT_ACT_REJECT); + } + + return retval; +} + +static void +init_control_space(void) +{ + /* Set all control space pointers to null and indices to "empty" */ + int cpu; + + /* + * Access CPU_CYCLE_FREQUENCY at the outset, which in some + * architectures may trigger a runtime calculation that uses a + * spinlock. Let's do this before lockmetering is turned on. + */ + if (CPU_CYCLE_FREQUENCY == 0) + BUG(); + + lstat_control.hashtab = NULL; + lstat_control.dir = NULL; + for (cpu = 0; cpu < NR_CPUS; cpu++) { + lstat_control.counts[cpu] = NULL; + lstat_control.read_lock_counts[cpu] = NULL; + } +} + +static int +reset_lstat_data(void) +{ + int cpu, flags; + + flags = 0; + lstat_control.next_free_dir_index = 1; /* 0 is for overflows */ + lstat_control.next_free_read_lock_index = 1; + lstat_control.dir_overflow = 0; + lstat_control.rwlock_overflow = 0; + + lstat_control.started_cycles64 = 0; + lstat_control.ending_cycles64 = 0; + lstat_control.enabled_cycles64 = 0; + lstat_control.first_started_time = 0; + lstat_control.started_time = 0; + lstat_control.ending_time = 0; + lstat_control.intervals = 0; + + /* + * paranoia -- in case someone does a "lockstat reset" before + * "lockstat on" + */ + if (lstat_control.hashtab) { + bzero(lstat_control.hashtab, + LSTAT_HASH_TABLE_SIZE * sizeof (short)); + bzero(lstat_control.dir, LSTAT_MAX_STAT_INDEX * + sizeof (lstat_directory_entry_t)); + + for (cpu = 0; cpu < num_online_cpus(); cpu++) { + bzero(lstat_control.counts[cpu], + sizeof (lstat_cpu_counts_t)); + bzero(lstat_control.read_lock_counts[cpu], + sizeof (lstat_read_lock_cpu_counts_t)); + } + } +#ifdef NOTDEF + _raw_spin_unlock(&lstat_control.directory_lock); + local_irq_restore(flags); +#endif + return 1; +} + +static void +release_control_space(void) +{ + /* + * Called when either (1) allocation of kmem + * or (2) when user writes LSTAT_RELEASE to /pro/lockmeter. + * Assume that all pointers have been initialized to zero, + * i.e., nonzero pointers are valid addresses. + */ + int cpu; + + if (lstat_control.hashtab) { + kfree(lstat_control.hashtab); + lstat_control.hashtab = NULL; + } + + if (lstat_control.dir) { + vfree(lstat_control.dir); + lstat_control.dir = NULL; + } + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (lstat_control.counts[cpu]) { + vfree(lstat_control.counts[cpu]); + lstat_control.counts[cpu] = NULL; + } + if (lstat_control.read_lock_counts[cpu]) { + kfree(lstat_control.read_lock_counts[cpu]); + lstat_control.read_lock_counts[cpu] = NULL; + } + } +} + +int +get_lockmeter_info_size(void) +{ + return sizeof (lstat_user_request_t) + + num_online_cpus() * sizeof (lstat_cpu_counts_t) + + num_online_cpus() * sizeof (lstat_read_lock_cpu_counts_t) + + (LSTAT_MAX_STAT_INDEX * sizeof (lstat_directory_entry_t)); +} + +ssize_t +get_lockmeter_info(char *buffer, size_t max_len, loff_t * last_index) +{ + lstat_user_request_t req; + struct timeval tv; + ssize_t next_ret_bcount; + ssize_t actual_ret_bcount = 0; + int cpu; + + *last_index = 0; /* a one-shot read */ + + req.lstat_version = LSTAT_VERSION; + req.state = lstat_control.state; + req.maxcpus = num_online_cpus(); + req.cycleval = CPU_CYCLE_FREQUENCY; +#ifdef notyet + req.kernel_magic_addr = (void *) &_etext; + req.kernel_end_addr = (void *) &_etext; +#endif + req.uts = system_utsname; + req.intervals = lstat_control.intervals; + + req.first_started_time = lstat_control.first_started_time; + req.started_time = lstat_control.started_time; + req.started_cycles64 = lstat_control.started_cycles64; + + req.next_free_dir_index = lstat_control.next_free_dir_index; + req.next_free_read_lock_index = lstat_control.next_free_read_lock_index; + req.dir_overflow = lstat_control.dir_overflow; + req.rwlock_overflow = lstat_control.rwlock_overflow; + + if (lstat_control.state == LSTAT_OFF) { + if (req.intervals == 0) { + /* mesasurement is off and no valid data present */ + next_ret_bcount = sizeof (lstat_user_request_t); + req.enabled_cycles64 = 0; + + if ((actual_ret_bcount + next_ret_bcount) > max_len) + return actual_ret_bcount; + + copy_to_user(buffer, (void *) &req, next_ret_bcount); + actual_ret_bcount += next_ret_bcount; + return actual_ret_bcount; + } else { + /* + * measurement is off but valid data present + * fetch time info from lstat_control + */ + req.ending_time = lstat_control.ending_time; + req.ending_cycles64 = lstat_control.ending_cycles64; + req.enabled_cycles64 = lstat_control.enabled_cycles64; + } + } else { + /* + * this must be a read while data active--use current time, + * etc + */ + do_gettimeofday(&tv); + req.ending_time = tv.tv_sec; + req.ending_cycles64 = get_cycles64(); + req.enabled_cycles64 = req.ending_cycles64 - + req.started_cycles64 + lstat_control.enabled_cycles64; + } + + next_ret_bcount = sizeof (lstat_user_request_t); + if ((actual_ret_bcount + next_ret_bcount) > max_len) + return actual_ret_bcount; + + copy_to_user(buffer, (void *) &req, next_ret_bcount); + actual_ret_bcount += next_ret_bcount; + + if (!lstat_control.counts[0]) /* not initialized? */ + return actual_ret_bcount; + + next_ret_bcount = sizeof (lstat_cpu_counts_t); + for (cpu = 0; cpu < num_online_cpus(); cpu++) { + if ((actual_ret_bcount + next_ret_bcount) > max_len) + return actual_ret_bcount; /* leave early */ + copy_to_user(buffer + actual_ret_bcount, + lstat_control.counts[cpu], next_ret_bcount); + actual_ret_bcount += next_ret_bcount; + } + + next_ret_bcount = LSTAT_MAX_STAT_INDEX * + sizeof (lstat_directory_entry_t); + if (((actual_ret_bcount + next_ret_bcount) > max_len) + || !lstat_control.dir) + return actual_ret_bcount; /* leave early */ + + copy_to_user(buffer + actual_ret_bcount, lstat_control.dir, + next_ret_bcount); + actual_ret_bcount += next_ret_bcount; + + next_ret_bcount = sizeof (lstat_read_lock_cpu_counts_t); + for (cpu = 0; cpu < num_online_cpus(); cpu++) { + if (actual_ret_bcount + next_ret_bcount > max_len) + return actual_ret_bcount; + copy_to_user(buffer + actual_ret_bcount, + lstat_control.read_lock_counts[cpu], + next_ret_bcount); + actual_ret_bcount += next_ret_bcount; + } + + return actual_ret_bcount; +} + +/* + * Writing to the /proc lockmeter node enables or disables metering. + * based upon the first byte of the "written" data. + * The following values are defined: + * LSTAT_ON: 1st call: allocates storage, intializes and turns on measurement + * subsequent calls just turn on measurement + * LSTAT_OFF: turns off measurement + * LSTAT_RESET: resets statistics + * LSTAT_RELEASE: releases statistics storage + * + * This allows one to accumulate statistics over several lockstat runs: + * + * lockstat on + * lockstat off + * ...repeat above as desired... + * lockstat get + * ...now start a new set of measurements... + * lockstat reset + * lockstat on + * ... + * + */ +ssize_t +put_lockmeter_info(const char *buffer, size_t len) +{ + int error = 0; + int dirsize, countsize, read_lock_countsize, hashsize; + int cpu; + char put_char; + int i, read_lock_blocks; + unsigned long flags; + rwlock_t *lock_ptr; + struct timeval tv; + + if (len <= 0) + return -EINVAL; + + _raw_spin_lock(&lstat_control.control_lock); + + get_user(put_char, buffer); + switch (put_char) { + + case LSTAT_OFF: + if (lstat_control.state != LSTAT_OFF) { + /* + * To avoid seeing read lock hold times in an + * inconsisent state, we have to follow this protocol + * to turn off statistics + */ + local_irq_save(flags); + /* + * getting this lock will stop any read lock block + * allocations + */ + _raw_spin_lock(&lstat_control.directory_lock); + /* + * keep any more read lock blocks from being + * allocated + */ + lstat_control.state = LSTAT_OFF; + /* record how may read lock blocks there are */ + read_lock_blocks = + lstat_control.next_free_read_lock_index; + _raw_spin_unlock(&lstat_control.directory_lock); + /* now go through the list of read locks */ + cpu = THIS_CPU_NUMBER; + for (i = 1; i < read_lock_blocks; i++) { + lock_ptr = + (*lstat_control.read_lock_counts[cpu])[i]. + lock_ptr; + /* is this saved lock address still valid? */ + if (GET_RWINDEX(lock_ptr) == i) { + /* + * lock address appears to still be + * valid because we only hold one lock + * at a time, this can't cause a + * deadlock unless this is a lock held + * as part of the current system call + * path. At the moment there + * are no READ mode locks held to get + * here from user space, so we solve + * this by skipping locks held in + * write mode. + */ + if (RWLOCK_IS_WRITE_LOCKED(lock_ptr)) { + PUT_RWINDEX(lock_ptr, 0); + continue; + } + /* + * now we know there are no read + * holders of this lock! stop + * statistics collection for this + * lock + */ + _raw_write_lock(lock_ptr); + PUT_RWINDEX(lock_ptr, 0); + _raw_write_unlock(lock_ptr); + } + /* + * it may still be possible for the hold time + * sum to be negative e.g. if a lock is + * reallocated while "busy" we will have to fix + * this up in the data reduction program. + */ + } + local_irq_restore(flags); + lstat_control.intervals++; + lstat_control.ending_cycles64 = get_cycles64(); + lstat_control.enabled_cycles64 += + lstat_control.ending_cycles64 - + lstat_control.started_cycles64; + do_gettimeofday(&tv); + lstat_control.ending_time = tv.tv_sec; + /* + * don't deallocate the structures -- we may do a + * lockstat on to add to the data that is already + * there. Use LSTAT_RELEASE to release storage + */ + } else { + error = -EBUSY; /* already OFF */ + } + break; + + case LSTAT_ON: + if (lstat_control.state == LSTAT_OFF) { +#ifdef DEBUG_LOCKMETER + printk("put_lockmeter_info(cpu=%d): LSTAT_ON\n", + THIS_CPU_NUMBER); +#endif + lstat_control.next_free_dir_index = 1; /* 0 is for overflows */ + + dirsize = LSTAT_MAX_STAT_INDEX * + sizeof (lstat_directory_entry_t); + hashsize = + (1 + LSTAT_HASH_TABLE_SIZE) * sizeof (ushort); + countsize = sizeof (lstat_cpu_counts_t); + read_lock_countsize = + sizeof (lstat_read_lock_cpu_counts_t); +#ifdef DEBUG_LOCKMETER + printk(" dirsize:%d", dirsize); + printk(" hashsize:%d", hashsize); + printk(" countsize:%d", countsize); + printk(" read_lock_countsize:%d\n", + read_lock_countsize); +#endif +#ifdef DEBUG_LOCKMETER + { + int secs; + unsigned long cycles; + uint64_t cycles64; + + do_gettimeofday(&tv); + secs = tv.tv_sec; + do { + do_gettimeofday(&tv); + } while (secs == tv.tv_sec); + cycles = get_cycles(); + cycles64 = get_cycles64(); + secs = tv.tv_sec; + do { + do_gettimeofday(&tv); + } while (secs == tv.tv_sec); + cycles = get_cycles() - cycles; + cycles64 = get_cycles64() - cycles; + printk("lockmeter: cycleFrequency:%d " + "cycles:%d cycles64:%d\n", + CPU_CYCLE_FREQUENCY, cycles, cycles64); + } +#endif + + /* + * if this is the first call, allocate storage and + * initialize + */ + if (!lstat_control.hashtab) { + + spin_lock_init(&lstat_control.directory_lock); + + /* guarantee all pointers at zero */ + init_control_space(); + + lstat_control.hashtab = + kmalloc(hashsize, GFP_KERNEL); + if (!lstat_control.hashtab) { + error = -ENOSPC; +#ifdef DEBUG_LOCKMETER + printk("!!error kmalloc of hashtab\n"); +#endif + } + lstat_control.dir = vmalloc(dirsize); + if (!lstat_control.dir) { + error = -ENOSPC; +#ifdef DEBUG_LOCKMETER + printk("!!error kmalloc of dir\n"); +#endif + } + + for (cpu = 0; cpu < num_online_cpus(); cpu++) { + lstat_control.counts[cpu] = + vmalloc(countsize); + if (!lstat_control.counts[cpu]) { + error = -ENOSPC; +#ifdef DEBUG_LOCKMETER + printk("!!error vmalloc of " + "counts[%d]\n", cpu); +#endif + } + lstat_control.read_lock_counts[cpu] = + (lstat_read_lock_cpu_counts_t *) + kmalloc(read_lock_countsize, + GFP_KERNEL); + if (!lstat_control. + read_lock_counts[cpu]) { + error = -ENOSPC; +#ifdef DEBUG_LOCKMETER + printk("!!error kmalloc of " + "read_lock_counts[%d]\n", + cpu); +#endif + } + } + } + + if (error) { + /* + * One or more kmalloc failures -- free + * everything + */ + release_control_space(); + } else { + + if (!reset_lstat_data()) { + error = -EINVAL; + break; + }; + + /* + * record starting and ending times and the + * like + */ + if (lstat_control.intervals == 0) { + do_gettimeofday(&tv); + lstat_control.first_started_time = + tv.tv_sec; + } + lstat_control.started_cycles64 = get_cycles64(); + do_gettimeofday(&tv); + lstat_control.started_time = tv.tv_sec; + + lstat_control.state = LSTAT_ON; + } + } else { + error = -EBUSY; /* already ON */ + } + break; + + case LSTAT_RESET: + if (lstat_control.state == LSTAT_OFF) { + if (!reset_lstat_data()) + error = -EINVAL; + } else { + error = -EBUSY; /* still on; can't reset */ + } + break; + + case LSTAT_RELEASE: + if (lstat_control.state == LSTAT_OFF) { + release_control_space(); + lstat_control.intervals = 0; + lstat_control.enabled_cycles64 = 0; + } else { + error = -EBUSY; + } + break; + + default: + error = -EINVAL; + } /* switch */ + + _raw_spin_unlock(&lstat_control.control_lock); + return error ? error : len; +} + +#ifdef USER_MODE_TESTING +/* following used for user mode testing */ +void +lockmeter_init() +{ + int dirsize, hashsize, countsize, read_lock_countsize, cpu; + + printf("lstat_control is at %x size=%d\n", &lstat_control, + sizeof (lstat_control)); + printf("sizeof(spinlock_t)=%d\n", sizeof (spinlock_t)); + lstat_control.state = LSTAT_ON; + + lstat_control.directory_lock = SPIN_LOCK_UNLOCKED; + lstat_control.next_free_dir_index = 1; /* 0 is for overflows */ + lstat_control.next_free_read_lock_index = 1; + + dirsize = LSTAT_MAX_STAT_INDEX * sizeof (lstat_directory_entry_t); + hashsize = (1 + LSTAT_HASH_TABLE_SIZE) * sizeof (ushort); + countsize = sizeof (lstat_cpu_counts_t); + read_lock_countsize = sizeof (lstat_read_lock_cpu_counts_t); + + lstat_control.hashtab = (ushort *) malloc(hashsize); + + if (lstat_control.hashtab == 0) { + printf("malloc failure for at line %d in lockmeter.c\n", + __LINE__); + exit(0); + } + + lstat_control.dir = (lstat_directory_entry_t *) malloc(dirsize); + + if (lstat_control.dir == 0) { + printf("malloc failure for at line %d in lockmeter.c\n", cpu, + __LINE__); + exit(0); + } + + for (cpu = 0; cpu < num_online_cpus(); cpu++) { + int j, k; + j = (int) (lstat_control.counts[cpu] = + (lstat_cpu_counts_t *) malloc(countsize)); + k = (int) (lstat_control.read_lock_counts[cpu] = + (lstat_read_lock_cpu_counts_t *) + malloc(read_lock_countsize)); + if (j * k == 0) { + printf("malloc failure for cpu=%d at line %d in " + "lockmeter.c\n", cpu, __LINE__); + exit(0); + } + } + + memset(lstat_control.hashtab, 0, hashsize); + memset(lstat_control.dir, 0, dirsize); + + for (cpu = 0; cpu < num_online_cpus(); cpu++) { + memset(lstat_control.counts[cpu], 0, countsize); + memset(lstat_control.read_lock_counts[cpu], 0, + read_lock_countsize); + } +} + +asm(" \ +.align 4 \ +.globl __write_lock_failed \ +__write_lock_failed: \ + " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax) \ +1: cmpl $" RW_LOCK_BIAS_STR ",(%eax) \ + jne 1b \ +\ + " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax) \ + jnz __write_lock_failed \ + ret \ +\ +\ +.align 4 \ +.globl __read_lock_failed \ +__read_lock_failed: \ + lock ; incl (%eax) \ +1: cmpl $1,(%eax) \ + js 1b \ +\ + lock ; decl (%eax) \ + js __read_lock_failed \ + ret \ +"); +#endif + +EXPORT_SYMBOL(_metered_spin_lock); +EXPORT_SYMBOL(_metered_spin_unlock); +EXPORT_SYMBOL(_metered_spin_trylock); +EXPORT_SYMBOL(_metered_read_lock); +EXPORT_SYMBOL(_metered_read_unlock); +EXPORT_SYMBOL(_metered_write_lock); +EXPORT_SYMBOL(_metered_write_unlock); --- linux-2.6.6-rc1/kernel/Makefile 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/Makefile 2004-04-18 22:25:47.932543624 -0700 @@ -12,6 +12,7 @@ obj-y = sched.o fork.o exec_domain.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o +obj-$(CONFIG_LOCKMETER) += lockmeter.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o --- linux-2.6.6-rc1/kernel/module.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/module.c 2004-04-18 22:26:02.148382488 -0700 @@ -36,7 +36,6 @@ #include #include #include -#include #include #if 0 @@ -1003,6 +1002,8 @@ static int simplify_symbols(Elf_Shdr *se /* We compiled with -fno-common. These are not supposed to happen. */ DEBUGP("Common symbol: %s\n", strtab + sym[i].st_name); + printk("%s: please compile with -fno-common\n", + mod->name); ret = -ENOEXEC; break; @@ -1541,6 +1542,10 @@ static struct module *load_module(void _ / sizeof(struct obsolete_modparm), sechdrs, symindex, (char *)sechdrs[strindex].sh_addr); + if (setupindex) + printk(KERN_WARNING "%s: Ignoring new-style " + "parameters in presence of obsolete ones\n", + mod->name); } else { /* Size of section 0 is 0, so this works well if no params */ err = parse_args(mod->name, mod->args, --- linux-2.6.6-rc1/kernel/pid.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/pid.c 2004-04-18 22:25:42.468374304 -0700 @@ -122,6 +122,8 @@ return_pid: } if (!offset || !atomic_read(&map->nr_free)) { + if (!offset) + map--; next_map: map = next_free_map(map, &max_steps); if (!map) @@ -268,6 +270,9 @@ void switch_exec_pids(task_t *leader, ta * machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or * more. */ +#ifdef CONFIG_KGDB +int kgdb_pid_init_done; /* so we don't call prior to... */ +#endif void __init pidhash_init(void) { int i, j, pidhash_size; @@ -289,6 +294,9 @@ void __init pidhash_init(void) for (j = 0; j < pidhash_size; j++) INIT_LIST_HEAD(&pid_hash[i][j]); } +#ifdef CONFIG_KGDB + kgdb_pid_init_done++; +#endif } void __init pidmap_init(void) --- linux-2.6.6-rc1/kernel/sched.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/sched.c 2004-04-18 22:25:54.264581008 -0700 @@ -15,6 +15,7 @@ * and per-CPU runqueues. Cleanups and useful suggestions * by Davide Libenzi, preemptible kernel bits by Robert Love. * 2003-09-03 Interactivity tuning by Con Kolivas. + * 2004-04-02 Scheduler domains code by Nick Piggin */ #include @@ -25,6 +26,8 @@ #include #include #include +#include +#include #include #include #include @@ -39,6 +42,8 @@ #include #include #include +#include +#include #ifdef CONFIG_NUMA #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu)) @@ -72,6 +77,13 @@ #define NS_TO_JIFFIES(TIME) ((TIME) / (1000000000 / HZ)) #define JIFFIES_TO_NS(TIME) ((TIME) * (1000000000 / HZ)) +#ifndef JIFFIES_TO_MSEC +# define JIFFIES_TO_MSEC(x) ((x) * 1000 / HZ) +#endif +#ifndef MSEC_TO_JIFFIES +# define MSEC_TO_JIFFIES(x) ((x) * HZ / 1000) +#endif + /* * These are the 'tuning knobs' of the scheduler: * @@ -91,7 +103,6 @@ #define MAX_SLEEP_AVG (AVG_TIMESLICE * MAX_BONUS) #define STARVATION_LIMIT (MAX_SLEEP_AVG) #define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG)) -#define NODE_THRESHOLD 125 #define CREDIT_LIMIT 100 /* @@ -173,11 +184,13 @@ ((MAX_TIMESLICE - MIN_TIMESLICE) * \ (MAX_PRIO-1 - (p)->static_prio) / (MAX_USER_PRIO-1))) -static inline unsigned int task_timeslice(task_t *p) +static unsigned int task_timeslice(task_t *p) { return BASE_TIMESLICE(p); } +#define task_hot(p, now, sd) ((now) - (p)->timestamp < (sd)->cache_hot_time) + /* * These are the runqueue data structures: */ @@ -187,7 +200,7 @@ static inline unsigned int task_timeslic typedef struct runqueue runqueue_t; struct prio_array { - int nr_active; + unsigned int nr_active; unsigned long bitmap[BITMAP_SIZE]; struct list_head queue[MAX_PRIO]; }; @@ -201,37 +214,63 @@ struct prio_array { */ struct runqueue { spinlock_t lock; + + /* + * nr_running and cpu_load should be in the same cacheline because + * remote CPUs use both these fields when doing load calculation. + */ + unsigned long nr_running; +#ifdef CONFIG_SMP + unsigned long cpu_load; +#endif unsigned long long nr_switches; - unsigned long nr_running, expired_timestamp, nr_uninterruptible, - timestamp_last_tick; + unsigned long expired_timestamp, nr_uninterruptible; + unsigned long long timestamp_last_tick; task_t *curr, *idle; + struct mm_struct *prev_mm; prio_array_t *active, *expired, arrays[2]; - int best_expired_prio, prev_cpu_load[NR_CPUS]; -#ifdef CONFIG_NUMA - atomic_t *node_nr_running; - int prev_node_load[MAX_NUMNODES]; -#endif + int best_expired_prio; + atomic_t nr_iowait; + +#ifdef CONFIG_SMP + struct sched_domain *sd; + + /* For active balancing */ + int active_balance; + int push_cpu; + task_t *migration_thread; struct list_head migration_queue; - - atomic_t nr_iowait; +#endif +#ifdef CONFIG_SCHEDSTATS + /* sys_sched_yield stats */ + unsigned long yld_exp_empty; + unsigned long yld_act_empty; + unsigned long yld_both_empty; + unsigned long yld_cnt; + + /* schedule stats */ + unsigned long sched_cnt; + unsigned long sched_switch; + unsigned long sched_idle; + + /* wake stats */ + unsigned long sched_wake; + unsigned long sched_wake_local; +#endif }; static DEFINE_PER_CPU(struct runqueue, runqueues); +#define for_each_domain(cpu, domain) \ + for (domain = cpu_rq(cpu)->sd; domain; domain = domain->parent) + #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) #define this_rq() (&__get_cpu_var(runqueues)) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -extern unsigned long __scheduling_functions_start_here; -extern unsigned long __scheduling_functions_end_here; -const unsigned long scheduling_functions_start_here = - (unsigned long)&__scheduling_functions_start_here; -const unsigned long scheduling_functions_end_here = - (unsigned long)&__scheduling_functions_end_here; - /* * Default context-switch locking: */ @@ -241,57 +280,12 @@ const unsigned long scheduling_functions # define task_running(rq, p) ((rq)->curr == (p)) #endif -#ifdef CONFIG_NUMA - -/* - * Keep track of running tasks. - */ - -static atomic_t node_nr_running[MAX_NUMNODES] ____cacheline_maxaligned_in_smp = - {[0 ...MAX_NUMNODES-1] = ATOMIC_INIT(0)}; - -static inline void nr_running_init(struct runqueue *rq) -{ - rq->node_nr_running = &node_nr_running[0]; -} - -static inline void nr_running_inc(runqueue_t *rq) -{ - atomic_inc(rq->node_nr_running); - rq->nr_running++; -} - -static inline void nr_running_dec(runqueue_t *rq) -{ - atomic_dec(rq->node_nr_running); - rq->nr_running--; -} - -__init void node_nr_running_init(void) -{ - int i; - - for (i = 0; i < NR_CPUS; i++) { - if (cpu_possible(i)) - cpu_rq(i)->node_nr_running = - &node_nr_running[cpu_to_node(i)]; - } -} - -#else /* !CONFIG_NUMA */ - -# define nr_running_init(rq) do { } while (0) -# define nr_running_inc(rq) do { (rq)->nr_running++; } while (0) -# define nr_running_dec(rq) do { (rq)->nr_running--; } while (0) - -#endif /* CONFIG_NUMA */ - /* * task_rq_lock - lock the runqueue a given task resides on and disable * interrupts. Note the ordering: we can safely lookup the task_rq without * explicitly disabling preemption. */ -static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) +static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags) { struct runqueue *rq; @@ -311,10 +305,95 @@ static inline void task_rq_unlock(runque spin_unlock_irqrestore(&rq->lock, *flags); } + +#ifdef CONFIG_SCHEDSTATS + +/* + * bump this up when changing the output format or the meaning of an existing + * format, so that tools can adapt (or abort) + */ +#define SCHEDSTAT_VERSION 7 + +static int show_schedstat(struct seq_file *seq, void *v) +{ + int i; + + seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); + seq_printf(seq, "timestamp %lu\n", jiffies); + for_each_cpu(i) { + /* Include offline CPUs */ + runqueue_t *rq = cpu_rq(i); +#ifdef CONFIG_SMP + struct sched_domain *sd; + int j = 0; +#endif + + seq_printf(seq, + "cpu%d %lu %lu %lu %lu %lu %lu %lu %lu %lu", + i, + rq->yld_both_empty, rq->yld_act_empty, + rq->yld_exp_empty, rq->yld_cnt, + rq->sched_switch, rq->sched_cnt, + rq->sched_idle, rq->sched_wake, rq->sched_wake_local); +#ifdef CONFIG_SMP + for_each_domain(i, sd) { + char str[NR_CPUS]; + int k; + cpumask_scnprintf(str, NR_CPUS, sd->span); + seq_printf(seq, " domain%d %s", j++, str); + + for (k = 0; k < 3; k++) { + seq_printf(seq, " %lu %lu %lu %lu %lu %lu", + sd->lb_cnt[k], sd->lb_balanced[k], + sd->lb_failed[k], sd->lb_pulled[k], + sd->lb_hot_pulled[k], sd->lb_imbalance[k]); + } + + seq_printf(seq, " %lu %lu %lu %lu %lu %lu %lu %lu", + sd->alb_cnt, sd->alb_failed, + sd->alb_pushed, sd->sched_wake_remote, + sd->plb_pulled, sd->afw_pulled, + sd->sbe_pushed, sd->sbc_pushed); + } +#endif + + seq_printf(seq, "\n"); + } + + return 0; +} + +static int schedstat_open(struct inode *inode, struct file *file) +{ + unsigned size = 4096 * (1 + num_online_cpus() / 32); + char *buf = kmalloc(size, GFP_KERNEL); + struct seq_file *m; + int res; + + if (!buf) + return -ENOMEM; + res = single_open(file, show_schedstat, NULL); + if (!res) { + m = file->private_data; + m->buf = buf; + m->size = size; + } else + kfree(buf); + return res; +} + +struct file_operations proc_schedstat_operations = { + .open = schedstat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + /* * rq_lock - lock a given runqueue and disable interrupts. */ -static inline runqueue_t *this_rq_lock(void) +static runqueue_t *this_rq_lock(void) { runqueue_t *rq; @@ -333,7 +412,7 @@ static inline void rq_unlock(runqueue_t /* * Adding/removing a task to/from a priority array: */ -static inline void dequeue_task(struct task_struct *p, prio_array_t *array) +static void dequeue_task(struct task_struct *p, prio_array_t *array) { array->nr_active--; list_del(&p->run_list); @@ -341,7 +420,7 @@ static inline void dequeue_task(struct t __clear_bit(p->prio, array->bitmap); } -static inline void enqueue_task(struct task_struct *p, prio_array_t *array) +static void enqueue_task(struct task_struct *p, prio_array_t *array) { list_add_tail(&p->run_list, array->queue + p->prio); __set_bit(p->prio, array->bitmap); @@ -349,6 +428,21 @@ static inline void enqueue_task(struct t p->array = array; } +#ifdef CONFIG_SMP +/* + * Used by the migration code - we pull tasks from the head of the + * remote queue so we want these tasks to show up at the head of the + * local queue: + */ +static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array) +{ + list_add(&p->run_list, array->queue + p->prio); + __set_bit(p->prio, array->bitmap); + array->nr_active++; + p->array = array; +} +#endif + /* * effective_prio - return the priority that is based on the static * priority but is modified by bonuses/penalties. @@ -386,7 +480,7 @@ static int effective_prio(task_t *p) static inline void __activate_task(task_t *p, runqueue_t *rq) { enqueue_task(p, rq->active); - nr_running_inc(rq); + rq->nr_running++; } static void recalc_task_prio(task_t *p, unsigned long long now) @@ -469,7 +563,7 @@ static void recalc_task_prio(task_t *p, * Update all the scheduling statistics stuff. (sleep average * calculation, priority modifiers, etc.) */ -static inline void activate_task(task_t *p, runqueue_t *rq) +static void activate_task(task_t *p, runqueue_t *rq) { unsigned long long now = sched_clock(); @@ -505,9 +599,9 @@ static inline void activate_task(task_t /* * deactivate_task - remove a task from the runqueue. */ -static inline void deactivate_task(struct task_struct *p, runqueue_t *rq) +static void deactivate_task(struct task_struct *p, runqueue_t *rq) { - nr_running_dec(rq); + rq->nr_running--; if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; dequeue_task(p, p->array); @@ -521,9 +615,9 @@ static inline void deactivate_task(struc * might also involve a cross-CPU call to trigger the scheduler on * the target CPU. */ -static inline void resched_task(task_t *p) -{ #ifdef CONFIG_SMP +static void resched_task(task_t *p) +{ int need_resched, nrpolling; preempt_disable(); @@ -535,10 +629,13 @@ static inline void resched_task(task_t * if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id())) smp_send_reschedule(task_cpu(p)); preempt_enable(); +} #else +static inline void resched_task(task_t *p) +{ set_tsk_need_resched(p); -#endif } +#endif /** * task_curr - is this task currently executing on a CPU? @@ -550,40 +647,46 @@ inline int task_curr(task_t *p) } #ifdef CONFIG_SMP +enum request_type { + REQ_MOVE_TASK, + REQ_SET_DOMAIN, +}; + typedef struct { struct list_head list; + enum request_type type; + + /* For REQ_MOVE_TASK */ task_t *task; + int dest_cpu; + + /* For REQ_SET_DOMAIN */ + struct sched_domain *sd; + struct completion done; } migration_req_t; /* - * The task's runqueue lock must be held, and the new mask must be valid. + * The task's runqueue lock must be held. * Returns true if you have to wait for migration thread. */ -static int __set_cpus_allowed(task_t *p, cpumask_t new_mask, - migration_req_t *req) +static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req) { runqueue_t *rq = task_rq(p); - p->cpus_allowed = new_mask; - /* - * Can the task run on the task's current CPU? If not then - * migrate the thread off to a proper CPU. - */ - if (cpu_isset(task_cpu(p), new_mask)) - return 0; - /* * If the task is not on a runqueue (and not running), then * it is sufficient to simply update the task's cpu field. */ if (!p->array && !task_running(rq, p)) { - set_task_cpu(p, any_online_cpu(p->cpus_allowed)); + set_task_cpu(p, dest_cpu); return 0; } init_completion(&req->done); + req->type = REQ_MOVE_TASK; req->task = p; + req->dest_cpu = dest_cpu; list_add(&req->list, &rq->migration_queue); return 1; } @@ -638,6 +741,71 @@ void kick_process(task_t *p) EXPORT_SYMBOL_GPL(kick_process); +/* + * Return a low guess at the load of a migration-source cpu. + * + * We want to under-estimate the load of migration sources, to + * balance conservatively. + */ +static inline unsigned long source_load(int cpu) +{ + runqueue_t *rq = cpu_rq(cpu); + unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + + return min(rq->cpu_load, load_now); +} + +/* + * Return a high guess at the load of a migration-target cpu + */ +static inline unsigned long target_load(int cpu) +{ + runqueue_t *rq = cpu_rq(cpu); + unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; + + return max(rq->cpu_load, load_now); +} + +#endif + +/* + * wake_idle() is useful especially on SMT architectures to wake a + * task onto an idle sibling if we would otherwise wake it onto a + * busy sibling. + * + * Returns the CPU we should wake onto. + */ +#if defined(ARCH_HAS_SCHED_WAKE_IDLE) +static int wake_idle(int cpu, task_t *p) +{ + cpumask_t tmp; + runqueue_t *rq = cpu_rq(cpu); + struct sched_domain *sd; + int i; + + if (idle_cpu(cpu)) + return cpu; + + sd = rq->sd; + if (!(sd->flags & SD_WAKE_IDLE)) + return cpu; + + cpus_and(tmp, sd->span, cpu_online_map); + for_each_cpu_mask(i, tmp) { + if (!cpu_isset(i, p->cpus_allowed)) + continue; + + if (idle_cpu(i)) + return i; + } + + return cpu; +} +#else +static inline int wake_idle(int cpu, task_t *p) +{ + return cpu; +} #endif /*** @@ -656,52 +824,151 @@ EXPORT_SYMBOL_GPL(kick_process); */ static int try_to_wake_up(task_t * p, unsigned int state, int sync) { + int cpu, this_cpu, success = 0; unsigned long flags; - int success = 0; long old_state; runqueue_t *rq; +#ifdef CONFIG_SMP + unsigned long load, this_load; + struct sched_domain *sd; + int new_cpu; +#endif -repeat_lock_task: rq = task_rq_lock(p, &flags); old_state = p->state; - if (old_state & state) { - if (!p->array) { + if (!(old_state & state)) + goto out; + + if (p->array) + goto out_running; + + cpu = task_cpu(p); + this_cpu = smp_processor_id(); + + schedstat_inc(rq, sched_wake); +#ifndef CONFIG_SMP + schedstat_inc(rq, sched_wake_local); +#endif + +#ifdef CONFIG_SMP +#ifdef CONFIG_SCHEDSTATS + if (cpu == this_cpu) + schedstat_inc(rq, sched_wake_local); + else { + for_each_domain(this_cpu, sd) + if (cpu_isset(cpu, sd->span)) + break; + if (sd) + schedstat_inc(sd, sched_wake_remote); + } +#endif + + if (unlikely(task_running(rq, p) || cpu_is_offline(this_cpu))) + goto out_activate; + + new_cpu = cpu; + + if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) + goto out_set_cpu; + + load = source_load(cpu); + this_load = target_load(this_cpu); + + /* Don't pull the task off an idle CPU to a busy one */ + if (load < SCHED_LOAD_SCALE/2 && this_load > SCHED_LOAD_SCALE/2) + goto out_set_cpu; + + new_cpu = this_cpu; /* Wake to this CPU if we can */ + + /* + * Scan domains for affine wakeup and passive balancing + * possibilities. + */ + for_each_domain(this_cpu, sd) { + unsigned int imbalance; + /* + * Start passive balancing when half the imbalance_pct + * limit is reached. + */ + imbalance = sd->imbalance_pct + (sd->imbalance_pct - 100) / 2; + + if ( ((sd->flags & SD_WAKE_AFFINE) && + !task_hot(p, rq->timestamp_last_tick, sd)) + || ((sd->flags & SD_WAKE_BALANCE) && + imbalance*this_load <= 100*load) ) { + /* - * Fast-migrate the task if it's not running or runnable - * currently. Do not violate hard affinity. + * Now sd has SD_WAKE_AFFINE and p is cache cold in sd + * or sd has SD_WAKE_BALANCE and there is an imbalance */ - if (unlikely(sync && !task_running(rq, p) && - (task_cpu(p) != smp_processor_id()) && - cpu_isset(smp_processor_id(), - p->cpus_allowed) && - !cpu_is_offline(smp_processor_id()))) { - set_task_cpu(p, smp_processor_id()); - task_rq_unlock(rq, &flags); - goto repeat_lock_task; - } - if (old_state == TASK_UNINTERRUPTIBLE) { - rq->nr_uninterruptible--; - /* - * Tasks on involuntary sleep don't earn - * sleep_avg beyond just interactive state. - */ - p->activated = -1; - } - if (sync && (task_cpu(p) == smp_processor_id())) - __activate_task(p, rq); - else { - activate_task(p, rq); - if (TASK_PREEMPTS_CURR(p, rq)) - resched_task(rq->curr); + if (cpu_isset(cpu, sd->span)) { +#ifdef CONFIG_SCHEDSTATS + if ((sd->flags & SD_WAKE_AFFINE) && + !task_hot(p, rq->timestamp_last_tick, sd)) + schedstat_inc(sd, afw_pulled); + else if ((sd->flags & SD_WAKE_BALANCE) && + imbalance*this_load <= 100*load) + schedstat_inc(sd, plb_pulled); +#endif + goto out_set_cpu; } - success = 1; } - p->state = TASK_RUNNING; } + + new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ +out_set_cpu: + new_cpu = wake_idle(new_cpu, p); + if (new_cpu != cpu && cpu_isset(new_cpu, p->cpus_allowed)) { + set_task_cpu(p, new_cpu); + task_rq_unlock(rq, &flags); + /* might preempt at this point */ + rq = task_rq_lock(p, &flags); + old_state = p->state; + if (!(old_state & state)) + goto out; + if (p->array) + goto out_running; + + this_cpu = smp_processor_id(); + cpu = task_cpu(p); + } + +out_activate: +#endif /* CONFIG_SMP */ + if (old_state == TASK_UNINTERRUPTIBLE) { + rq->nr_uninterruptible--; + /* + * Tasks on involuntary sleep don't earn + * sleep_avg beyond just interactive state. + */ + p->activated = -1; + } + + /* + * Sync wakeups (i.e. those types of wakeups where the waker + * has indicated that it will leave the CPU in short order) + * don't trigger a preemption, if the woken up task will run on + * this cpu. (in this case the 'I will reschedule' promise of + * the waker guarantees that the freshly woken up task is going + * to be considered on this CPU.) + */ + if (sync && cpu == this_cpu) { + __activate_task(p, rq); + } else { + activate_task(p, rq); + if (TASK_PREEMPTS_CURR(p, rq)) + resched_task(rq->curr); + } + success = 1; + +out_running: + p->state = TASK_RUNNING; +out: task_rq_unlock(rq, &flags); return success; } + int fastcall wake_up_process(task_t * p) { return try_to_wake_up(p, TASK_STOPPED | @@ -756,8 +1023,8 @@ void fastcall sched_fork(task_t *p) p->timestamp = sched_clock(); if (!current->time_slice) { /* - * This case is rare, it happens when the parent has only - * a single jiffy left from its timeslice. Taking the + * This case is rare, it happens when the parent has only + * a single jiffy left from its timeslice. Taking the * runqueue lock is not a problem. */ current->time_slice = 1; @@ -798,15 +1065,10 @@ void fastcall wake_up_forked_process(tas p->prio = effective_prio(p); set_task_cpu(p, smp_processor_id()); - if (unlikely(!current->array)) - __activate_task(p, rq); - else { - p->prio = current->prio; - list_add_tail(&p->run_list, ¤t->run_list); - p->array = current->array; - p->array->nr_active++; - nr_running_inc(rq); - } + __activate_task(p, rq); + if (TASK_PREEMPTS_CURR(p, rq)) + resched_task(rq->curr); + task_rq_unlock(rq, &flags); } @@ -856,7 +1118,7 @@ void fastcall sched_exit(task_t * p) * with the lock held can cause deadlocks; see schedule() for * details.) */ -static inline void finish_task_switch(task_t *prev) +static void finish_task_switch(task_t *prev) { runqueue_t *rq = this_rq(); struct mm_struct *mm = rq->prev_mm; @@ -873,7 +1135,7 @@ static inline void finish_task_switch(ta * still held, otherwise prev could be scheduled on another cpu, die * there before we look at prev->state, and then the reference would * be dropped twice. - * Manfred Spraul + * Manfred Spraul */ prev_task_flags = prev->flags; finish_arch_switch(rq, prev); @@ -935,7 +1197,7 @@ unsigned long nr_running(void) { unsigned long i, sum = 0; - for (i = 0; i < NR_CPUS; i++) + for_each_cpu(i) sum += cpu_rq(i)->nr_running; return sum; @@ -977,7 +1239,7 @@ unsigned long nr_iowait(void) * Note this does not disable interrupts like task_rq_lock, * you need to do so manually before calling. */ -static inline void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) +static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2) { if (rq1 == rq2) spin_lock(&rq1->lock); @@ -998,14 +1260,139 @@ static inline void double_rq_lock(runque * Note this does not restore interrupts like task_rq_unlock, * you need to do so manually after calling. */ -static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) +static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) { spin_unlock(&rq1->lock); if (rq1 != rq2) spin_unlock(&rq2->lock); } -#ifdef CONFIG_NUMA +enum idle_type +{ + IDLE, + NOT_IDLE, + NEWLY_IDLE, +}; + +#ifdef CONFIG_SMP + +/* + * find_idlest_cpu - find the least busy runqueue. + */ +static int find_idlest_cpu(struct task_struct *p, int this_cpu, + struct sched_domain *sd) +{ + unsigned long load, min_load, this_load; + int i, min_cpu; + cpumask_t mask; + + min_cpu = UINT_MAX; + min_load = ULONG_MAX; + + cpus_and(mask, sd->span, cpu_online_map); + cpus_and(mask, mask, p->cpus_allowed); + + for_each_cpu_mask(i, mask) { + load = target_load(i); + + if (load < min_load) { + min_cpu = i; + min_load = load; + + /* break out early on an idle CPU: */ + if (!min_load) + break; + } + } + + /* add +1 to account for the new task */ + this_load = source_load(this_cpu) + SCHED_LOAD_SCALE; + + /* + * Would with the addition of the new task to the + * current CPU there be an imbalance between this + * CPU and the idlest CPU? + * + * Use half of the balancing threshold - new-context is + * a good opportunity to balance. + */ + if (min_load*(100 + (sd->imbalance_pct-100)/2) < this_load*100) + return min_cpu; + + return this_cpu; +} + +/* + * wake_up_forked_thread - wake up a freshly forked thread. + * + * This function will do some initial scheduler statistics housekeeping + * that must be done for every newly created context, and it also does + * runqueue balancing. + */ +void fastcall wake_up_forked_thread(task_t * p) +{ + unsigned long flags; + int this_cpu = get_cpu(), cpu; + struct sched_domain *tmp, *sd = NULL; + runqueue_t *this_rq = cpu_rq(this_cpu), *rq; + + /* + * Find the largest domain that this CPU is part of that + * is willing to balance on clone: + */ + for_each_domain(this_cpu, tmp) + if (tmp->flags & SD_BALANCE_CLONE) + sd = tmp; + if (sd) + cpu = find_idlest_cpu(p, this_cpu, sd); + else + cpu = this_cpu; + + local_irq_save(flags); +lock_again: + rq = cpu_rq(cpu); + double_rq_lock(this_rq, rq); + + BUG_ON(p->state != TASK_RUNNING); + + /* + * We did find_idlest_cpu() unlocked, so in theory + * the mask could have changed - just dont migrate + * in this case: + */ + if (unlikely(!cpu_isset(cpu, p->cpus_allowed))) { + cpu = this_cpu; + double_rq_unlock(this_rq, rq); + goto lock_again; + } + /* + * We decrease the sleep average of forking parents + * and children as well, to keep max-interactive tasks + * from forking tasks that are max-interactive. + */ + current->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(current) * + PARENT_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); + + p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) * + CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS); + + p->interactive_credit = 0; + + p->prio = effective_prio(p); + set_task_cpu(p, cpu); + + if (cpu != this_cpu) + schedstat_inc(sd, sbc_pushed); + + __activate_task(p, rq); + if (TASK_PREEMPTS_CURR(p, rq)) + resched_task(rq->curr); + + double_rq_unlock(this_rq, rq); + local_irq_restore(flags); + put_cpu(); +} + /* * If dest_cpu is allowed for this process, migrate the task to it. * This is accomplished by forcing the cpu_allowed mask to only @@ -1014,32 +1401,31 @@ static inline void double_rq_unlock(runq */ static void sched_migrate_task(task_t *p, int dest_cpu) { - runqueue_t *rq; migration_req_t req; + runqueue_t *rq; unsigned long flags; - cpumask_t old_mask, new_mask = cpumask_of_cpu(dest_cpu); lock_cpu_hotplug(); rq = task_rq_lock(p, &flags); - old_mask = p->cpus_allowed; - if (!cpu_isset(dest_cpu, old_mask) || !cpu_online(dest_cpu)) + if (!cpu_isset(dest_cpu, p->cpus_allowed)) goto out; /* force the process onto the specified CPU */ - if (__set_cpus_allowed(p, new_mask, &req)) { + if (migrate_task(p, dest_cpu, &req)) { /* Need to wait for migration thread. */ task_rq_unlock(rq, &flags); wake_up_process(rq->migration_thread); wait_for_completion(&req.done); - /* If we raced with sys_sched_setaffinity, don't - * restore mask. */ - rq = task_rq_lock(p, &flags); - if (likely(cpus_equal(p->cpus_allowed, new_mask))) { - /* Restore old mask: won't need migration - * thread, since current cpu is allowed. */ - BUG_ON(__set_cpus_allowed(p, old_mask, NULL)); - } + /* + * we want a new context here. This eliminates TLB + * flushes on the cpus where the process executed prior to + * the migration. + */ + tlb_migrate_prepare(current->mm); + unlock_cpu_hotplug(); + + return; } out: task_rq_unlock(rq, &flags); @@ -1047,203 +1433,51 @@ out: } /* - * Find the least loaded CPU. Slightly favor the current CPU by - * setting its runqueue length as the minimum to start. + * sched_balance_exec(): find the highest-level, exec-balance-capable + * domain and try to migrate the task to the least loaded CPU. + * + * execve() is a valuable balancing opportunity, because at this point + * the task has the smallest effective memory and cache footprint. */ -static int sched_best_cpu(struct task_struct *p) -{ - int i, minload, load, best_cpu, node = 0; - cpumask_t cpumask; - - best_cpu = task_cpu(p); - if (cpu_rq(best_cpu)->nr_running <= 2) - return best_cpu; - - minload = 10000000; - for_each_node_with_cpus(i) { - /* - * Node load is always divided by nr_cpus_node to normalise - * load values in case cpu count differs from node to node. - * We first multiply node_nr_running by 10 to get a little - * better resolution. - */ - load = 10 * atomic_read(&node_nr_running[i]) / nr_cpus_node(i); - if (load < minload) { - minload = load; - node = i; - } - } - - minload = 10000000; - cpumask = node_to_cpumask(node); - for (i = 0; i < NR_CPUS; ++i) { - if (!cpu_isset(i, cpumask)) - continue; - if (cpu_rq(i)->nr_running < minload) { - best_cpu = i; - minload = cpu_rq(i)->nr_running; - } - } - return best_cpu; -} - void sched_balance_exec(void) { - int new_cpu; + struct sched_domain *tmp, *sd = NULL; + int new_cpu, this_cpu = get_cpu(); - if (numnodes > 1) { - new_cpu = sched_best_cpu(current); - if (new_cpu != smp_processor_id()) - sched_migrate_task(current, new_cpu); - } -} + /* Prefer the current CPU if there's only this task running */ + if (this_rq()->nr_running <= 1) + goto out; -/* - * Find the busiest node. All previous node loads contribute with a - * geometrically deccaying weight to the load measure: - * load_{t} = load_{t-1}/2 + nr_node_running_{t} - * This way sudden load peaks are flattened out a bit. - * Node load is divided by nr_cpus_node() in order to compare nodes - * of different cpu count but also [first] multiplied by 10 to - * provide better resolution. - */ -static int find_busiest_node(int this_node) -{ - int i, node = -1, load, this_load, maxload; - - if (!nr_cpus_node(this_node)) - return node; - this_load = maxload = (this_rq()->prev_node_load[this_node] >> 1) - + (10 * atomic_read(&node_nr_running[this_node]) - / nr_cpus_node(this_node)); - this_rq()->prev_node_load[this_node] = this_load; - for_each_node_with_cpus(i) { - if (i == this_node) - continue; - load = (this_rq()->prev_node_load[i] >> 1) - + (10 * atomic_read(&node_nr_running[i]) - / nr_cpus_node(i)); - this_rq()->prev_node_load[i] = load; - if (load > maxload && (100*load > NODE_THRESHOLD*this_load)) { - maxload = load; - node = i; + for_each_domain(this_cpu, tmp) + if (tmp->flags & SD_BALANCE_EXEC) + sd = tmp; + + if (sd) { + new_cpu = find_idlest_cpu(current, this_cpu, sd); + if (new_cpu != this_cpu) { + schedstat_inc(sd, sbe_pushed); + put_cpu(); + sched_migrate_task(current, new_cpu); + return; } } - return node; +out: + put_cpu(); } -#endif /* CONFIG_NUMA */ - -#ifdef CONFIG_SMP - /* - * double_lock_balance - lock the busiest runqueue - * - * this_rq is locked already. Recalculate nr_running if we have to - * drop the runqueue lock. + * double_lock_balance - lock the busiest runqueue, this_rq is locked already. */ -static inline -unsigned int double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest, - int this_cpu, int idle, - unsigned int nr_running) +static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest) { if (unlikely(!spin_trylock(&busiest->lock))) { if (busiest < this_rq) { spin_unlock(&this_rq->lock); spin_lock(&busiest->lock); spin_lock(&this_rq->lock); - /* Need to recalculate nr_running */ - if (idle || (this_rq->nr_running > - this_rq->prev_cpu_load[this_cpu])) - nr_running = this_rq->nr_running; - else - nr_running = this_rq->prev_cpu_load[this_cpu]; } else spin_lock(&busiest->lock); } - return nr_running; -} - -/* - * find_busiest_queue - find the busiest runqueue among the cpus in cpumask. - */ -static inline -runqueue_t *find_busiest_queue(runqueue_t *this_rq, int this_cpu, int idle, - int *imbalance, cpumask_t cpumask) -{ - int nr_running, load, max_load, i; - runqueue_t *busiest, *rq_src; - - /* - * We search all runqueues to find the most busy one. - * We do this lockless to reduce cache-bouncing overhead, - * we re-check the 'best' source CPU later on again, with - * the lock held. - * - * We fend off statistical fluctuations in runqueue lengths by - * saving the runqueue length (as seen by the balancing CPU) during - * the previous load-balancing operation and using the smaller one - * of the current and saved lengths. If a runqueue is long enough - * for a longer amount of time then we recognize it and pull tasks - * from it. - * - * The 'current runqueue length' is a statistical maximum variable, - * for that one we take the longer one - to avoid fluctuations in - * the other direction. So for a load-balance to happen it needs - * stable long runqueue on the target CPU and stable short runqueue - * on the local runqueue. - * - * We make an exception if this CPU is about to become idle - in - * that case we are less picky about moving a task across CPUs and - * take what can be taken. - */ - if (idle || (this_rq->nr_running > this_rq->prev_cpu_load[this_cpu])) - nr_running = this_rq->nr_running; - else - nr_running = this_rq->prev_cpu_load[this_cpu]; - - busiest = NULL; - max_load = 1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpumask)) - continue; - - rq_src = cpu_rq(i); - if (idle || (rq_src->nr_running < this_rq->prev_cpu_load[i])) - load = rq_src->nr_running; - else - load = this_rq->prev_cpu_load[i]; - this_rq->prev_cpu_load[i] = rq_src->nr_running; - - if ((load > max_load) && (rq_src != this_rq)) { - busiest = rq_src; - max_load = load; - } - } - - if (likely(!busiest)) - goto out; - - *imbalance = max_load - nr_running; - - /* It needs an at least ~25% imbalance to trigger balancing. */ - if (!idle && ((*imbalance)*4 < max_load)) { - busiest = NULL; - goto out; - } - - nr_running = double_lock_balance(this_rq, busiest, this_cpu, - idle, nr_running); - /* - * Make sure nothing changed since we checked the - * runqueue length. - */ - if (busiest->nr_running <= nr_running) { - spin_unlock(&busiest->lock); - busiest = NULL; - } -out: - return busiest; } /* @@ -1252,13 +1486,13 @@ out: */ static inline void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, - runqueue_t *this_rq, int this_cpu) + runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) { dequeue_task(p, src_array); - nr_running_dec(src_rq); + src_rq->nr_running--; set_task_cpu(p, this_cpu); - nr_running_inc(this_rq); - enqueue_task(p, this_rq->active); + this_rq->nr_running++; + enqueue_task_head(p, this_array); p->timestamp = sched_clock() - (src_rq->timestamp_last_tick - p->timestamp); /* @@ -1266,193 +1500,570 @@ void pull_task(runqueue_t *src_rq, prio_ * to be always true for them. */ if (TASK_PREEMPTS_CURR(p, this_rq)) - set_need_resched(); + resched_task(this_rq->curr); } /* * can_migrate_task - may task p from runqueue rq be migrated to this_cpu? */ static inline -int can_migrate_task(task_t *tsk, runqueue_t *rq, int this_cpu, int idle) +int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu, + struct sched_domain *sd, enum idle_type idle) { - unsigned long delta = rq->timestamp_last_tick - tsk->timestamp; - /* * We do not migrate tasks that are: * 1) running (obviously), or * 2) cannot be migrated to this CPU due to cpus_allowed, or * 3) are cache-hot on their current CPU. */ - if (task_running(rq, tsk)) - return 0; - if (!cpu_isset(this_cpu, tsk->cpus_allowed)) + if (task_running(rq, p)) return 0; - if (!idle && (delta <= JIFFIES_TO_NS(cache_decay_ticks))) + if (!cpu_isset(this_cpu, p->cpus_allowed)) return 0; + + /* Aggressive migration if we've failed balancing */ + if (idle == NEWLY_IDLE || + sd->nr_balance_failed < sd->cache_nice_tries) { + if (task_hot(p, rq->timestamp_last_tick, sd)) + return -1; + } + +#ifdef CONFIG_SCHEDSTATS + if (!task_hot(p, rq->timestamp_last_tick, sd)) + schedstat_inc(sd, lb_pulled[idle]); + else + schedstat_inc(sd, lb_hot_pulled[idle]); +#endif + return 1; } /* - * Current runqueue is empty, or rebalance tick: if there is an - * inbalance (current runqueue is too short) then pull from - * busiest runqueue(s). + * move_tasks tries to move up to max_nr_move tasks from busiest to this_rq, + * as part of a balancing operation within "domain". Returns the number of + * tasks moved. + * + * Called with both runqueues locked. + */ +static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest, + unsigned long max_nr_move, struct sched_domain *sd, + enum idle_type idle) +{ + prio_array_t *array, *dst_array; + struct list_head *head, *curr; + int ret, idx, pulled = 0; + task_t *tmp; + + if (max_nr_move <= 0 || busiest->nr_running <= 1) + goto out; + + /* We first consider active tasks. */ + if (busiest->active->nr_active) { + array = busiest->active; + dst_array = this_rq->active; + } else { + array = busiest->expired; + dst_array = this_rq->expired; + } + +new_array: + /* Start searching at priority 0: */ + idx = 0; +skip_bitmap: + if (!idx) + idx = sched_find_first_bit(array->bitmap); + else + idx = find_next_bit(array->bitmap, MAX_PRIO, idx); + if (idx >= MAX_PRIO) { + if (array == busiest->active && busiest->expired->nr_active) { + array = busiest->expired; + dst_array = this_rq->expired; + goto new_array; + } + goto out; + } + + head = array->queue + idx; + curr = head->next; +skip_queue: + tmp = list_entry(curr, task_t, run_list); + + curr = curr->next; + + ret = can_migrate_task(tmp, busiest, this_cpu, sd, idle); + if (ret == -1) { + idx++; + goto skip_bitmap; + } + if (!ret) { + if (curr != head) + goto skip_queue; + idx++; + goto skip_bitmap; + } + pull_task(busiest, array, tmp, this_rq, dst_array, this_cpu); + pulled++; + + /* We only want to steal up to the prescribed number of tasks. */ + if (pulled < max_nr_move) { + if (curr != head) + goto skip_queue; + idx++; + goto skip_bitmap; + } +out: + return pulled; +} + +/* + * find_busiest_group finds and returns the busiest CPU group within the + * domain. It calculates and returns the number of tasks which should be + * moved to restore balance via the imbalance parameter. + */ +static struct sched_group * +find_busiest_group(struct sched_domain *sd, int this_cpu, + unsigned long *imbalance, enum idle_type idle) +{ + struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; + unsigned long max_load, avg_load, total_load, this_load, total_pwr; + + max_load = this_load = total_load = total_pwr = 0; + + do { + cpumask_t tmp; + unsigned long load; + int local_group; + int i, nr_cpus = 0; + + local_group = cpu_isset(this_cpu, group->cpumask); + + /* Tally up the load of all CPUs in the group */ + avg_load = 0; + cpus_and(tmp, group->cpumask, cpu_online_map); + if (unlikely(cpus_empty(tmp))) + goto nextgroup; + + for_each_cpu_mask(i, tmp) { + /* Bias balancing toward cpus of our domain */ + if (local_group) + load = target_load(i); + else + load = source_load(i); + + nr_cpus++; + avg_load += load; + } + + if (!nr_cpus) + goto nextgroup; + + total_load += avg_load; + total_pwr += group->cpu_power; + + /* Adjust by relative CPU power of the group */ + avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; + + if (local_group) { + this_load = avg_load; + this = group; + goto nextgroup; + } else if (avg_load > max_load) { + max_load = avg_load; + busiest = group; + } +nextgroup: + group = group->next; + } while (group != sd->groups); + + if (!busiest || this_load >= max_load) + goto out_balanced; + + avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; + + if (this_load >= avg_load || + 100*max_load <= sd->imbalance_pct*this_load) + goto out_balanced; + + /* + * We're trying to get all the cpus to the average_load, so we don't + * want to push ourselves above the average load, nor do we wish to + * reduce the max loaded cpu below the average load, as either of these + * actions would just result in more rebalancing later, and ping-pong + * tasks around. Thus we look for the minimum possible imbalance. + * Negative imbalances (*we* are more loaded than anyone else) will + * be counted as no imbalance for these purposes -- we can't fix that + * by pulling tasks to us. Be careful of negative numbers as they'll + * appear as very large values with unsigned longs. + */ + *imbalance = min(max_load - avg_load, avg_load - this_load); + + /* How much load to actually move to equalise the imbalance */ + *imbalance = (*imbalance * min(busiest->cpu_power, this->cpu_power)) + / SCHED_LOAD_SCALE; + + if (*imbalance < SCHED_LOAD_SCALE - 1) { + unsigned long pwr_now = 0, pwr_move = 0; + unsigned long tmp; + + if (max_load - this_load >= SCHED_LOAD_SCALE*2) { + *imbalance = 1; + return busiest; + } + + /* + * OK, we don't have enough imbalance to justify moving tasks, + * however we may be able to increase total CPU power used by + * moving them. + */ + + pwr_now += busiest->cpu_power*min(SCHED_LOAD_SCALE, max_load); + pwr_now += this->cpu_power*min(SCHED_LOAD_SCALE, this_load); + pwr_now /= SCHED_LOAD_SCALE; + + /* Amount of load we'd subtract */ + tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/busiest->cpu_power; + if (max_load > tmp) + pwr_move += busiest->cpu_power*min(SCHED_LOAD_SCALE, + max_load - tmp); + + /* Amount of load we'd add */ + tmp = SCHED_LOAD_SCALE*SCHED_LOAD_SCALE/this->cpu_power; + if (max_load < tmp) + tmp = max_load; + pwr_move += this->cpu_power*min(SCHED_LOAD_SCALE, this_load + tmp); + pwr_move /= SCHED_LOAD_SCALE; + + /* Move if we gain another 8th of a CPU worth of throughput */ + if (pwr_move < pwr_now + SCHED_LOAD_SCALE / 8) + goto out_balanced; + + *imbalance = 1; + return busiest; + } + + /* Get rid of the scaling factor, rounding down as we divide */ + *imbalance = (*imbalance + 1) / SCHED_LOAD_SCALE; + + return busiest; + +out_balanced: + if (busiest && (idle == NEWLY_IDLE || + (idle == IDLE && max_load > SCHED_LOAD_SCALE)) ) { + *imbalance = 1; + return busiest; + } + + *imbalance = 0; + return NULL; +} + +/* + * find_busiest_queue - find the busiest runqueue among the cpus in group. + */ +static runqueue_t *find_busiest_queue(struct sched_group *group) +{ + cpumask_t tmp; + unsigned long load, max_load = 0; + runqueue_t *busiest = NULL; + int i; + + cpus_and(tmp, group->cpumask, cpu_online_map); + for_each_cpu_mask(i, tmp) { + load = source_load(i); + + if (load > max_load) { + max_load = load; + busiest = cpu_rq(i); + } + } + + return busiest; +} + +/* + * Check this_cpu to ensure it is balanced within domain. Attempt to move + * tasks if there is an imbalance. + * + * Called with this_rq unlocked. + */ +static int load_balance(int this_cpu, runqueue_t *this_rq, + struct sched_domain *sd, enum idle_type idle) +{ + struct sched_group *group; + runqueue_t *busiest; + unsigned long imbalance; + int nr_moved; + + spin_lock(&this_rq->lock); + schedstat_inc(sd, lb_cnt[idle]); + + group = find_busiest_group(sd, this_cpu, &imbalance, idle); + if (!group) { + schedstat_inc(sd, lb_balanced[idle]); + goto out_balanced; + } + + busiest = find_busiest_queue(group); + if (!busiest) { + schedstat_inc(sd, lb_balanced[idle]); + goto out_balanced; + } + + if (unlikely(busiest == this_rq)) { + WARN_ON(1); + goto out_balanced; + } + schedstat_add(sd, lb_imbalance[idle], imbalance); + + /* Attempt to move tasks */ + double_lock_balance(this_rq, busiest); + + nr_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, idle); + spin_unlock(&this_rq->lock); + spin_unlock(&busiest->lock); + + if (!nr_moved) { + schedstat_inc(sd, lb_failed[idle]); + sd->nr_balance_failed++; + + if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { + int wake = 0; + + spin_lock(&busiest->lock); + if (!busiest->active_balance) { + busiest->active_balance = 1; + busiest->push_cpu = this_cpu; + wake = 1; + } + spin_unlock(&busiest->lock); + if (wake) + wake_up_process(busiest->migration_thread); + + /* + * We've kicked active balancing, reset the failure + * counter. + */ + sd->nr_balance_failed = sd->cache_nice_tries; + } + } else + sd->nr_balance_failed = 0; + + /* We were unbalanced, so reset the balancing interval */ + sd->balance_interval = sd->min_interval; + + return nr_moved; + +out_balanced: + spin_unlock(&this_rq->lock); + + /* tune up the balancing interval */ + if (sd->balance_interval < sd->max_interval) + sd->balance_interval *= 2; + + return 0; +} + +/* + * Check this_cpu to ensure it is balanced within domain. Attempt to move + * tasks if there is an imbalance. * - * We call this with the current runqueue locked, - * irqs disabled. + * Called from schedule when this_rq is about to become idle (NEWLY_IDLE). + * this_rq is locked. */ -static void load_balance(runqueue_t *this_rq, int idle, cpumask_t cpumask) +static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, + struct sched_domain *sd) { - int imbalance, idx, this_cpu = smp_processor_id(); - runqueue_t *busiest; - prio_array_t *array; - struct list_head *head, *curr; - task_t *tmp; - - if (cpu_is_offline(this_cpu)) + struct sched_group *group; + runqueue_t *busiest = NULL; + unsigned long imbalance; + int nr_moved = 0; + + schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); + group = find_busiest_group(sd, this_cpu, &imbalance, NEWLY_IDLE); + if (!group) { + schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); goto out; + } - busiest = find_busiest_queue(this_rq, this_cpu, idle, - &imbalance, cpumask); - if (!busiest) + busiest = find_busiest_queue(group); + if (!busiest || busiest == this_rq) { + schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); goto out; + } - /* - * We only want to steal a number of tasks equal to 1/2 the imbalance, - * otherwise we'll just shift the imbalance to the new queue: - */ - imbalance /= 2; + schedstat_add(sd, lb_imbalance[NEWLY_IDLE], imbalance); - /* - * We first consider expired tasks. Those will likely not be - * executed in the near future, and they are most likely to - * be cache-cold, thus switching CPUs has the least effect - * on them. - */ - if (busiest->expired->nr_active) - array = busiest->expired; - else - array = busiest->active; + /* Attempt to move tasks */ + double_lock_balance(this_rq, busiest); -new_array: - /* Start searching at priority 0: */ - idx = 0; -skip_bitmap: - if (!idx) - idx = sched_find_first_bit(array->bitmap); - else - idx = find_next_bit(array->bitmap, MAX_PRIO, idx); - if (idx >= MAX_PRIO) { - if (array == busiest->expired) { - array = busiest->active; - goto new_array; - } - goto out_unlock; - } + nr_moved = move_tasks(this_rq, this_cpu, busiest, + imbalance, sd, NEWLY_IDLE); - head = array->queue + idx; - curr = head->prev; -skip_queue: - tmp = list_entry(curr, task_t, run_list); + spin_unlock(&busiest->lock); - curr = curr->prev; +out: + return nr_moved; +} - if (!can_migrate_task(tmp, busiest, this_cpu, idle)) { - if (curr != head) - goto skip_queue; - idx++; - goto skip_bitmap; - } - pull_task(busiest, array, tmp, this_rq, this_cpu); +/* + * idle_balance is called by schedule() if this_cpu is about to become + * idle. Attempts to pull tasks from other CPUs. + */ +static inline void idle_balance(int this_cpu, runqueue_t *this_rq) +{ + struct sched_domain *sd; - /* Only migrate one task if we are idle */ - if (!idle && --imbalance) { - if (curr != head) - goto skip_queue; - idx++; - goto skip_bitmap; + if (unlikely(cpu_is_offline(this_cpu))) + return; + + for_each_domain(this_cpu, sd) { + if (sd->flags & SD_BALANCE_NEWIDLE) { + if (load_balance_newidle(this_cpu, this_rq, sd)) { + /* We've pulled tasks over so stop searching */ + break; + } + } } -out_unlock: - spin_unlock(&busiest->lock); -out: - ; } /* - * One of the idle_cpu_tick() and busy_cpu_tick() functions will - * get called every timer tick, on every CPU. Our balancing action - * frequency and balancing agressivity depends on whether the CPU is - * idle or not. + * active_load_balance is run by migration threads. It pushes a running + * task off the cpu. It can be required to correctly have at least 1 task + * running on each physical CPU where possible, and not have a physical / + * logical imbalance. * - * busy-rebalance every 200 msecs. idle-rebalance every 1 msec. (or on - * systems with HZ=100, every 10 msecs.) - * - * On NUMA, do a node-rebalance every 400 msecs. + * Called with busiest locked. */ -#define IDLE_REBALANCE_TICK (HZ/1000 ?: 1) -#define BUSY_REBALANCE_TICK (HZ/5 ?: 1) -#define IDLE_NODE_REBALANCE_TICK (IDLE_REBALANCE_TICK * 5) -#define BUSY_NODE_REBALANCE_TICK (BUSY_REBALANCE_TICK * 2) - -#ifdef CONFIG_NUMA -static void balance_node(runqueue_t *this_rq, int idle, int this_cpu) +static void active_load_balance(runqueue_t *busiest, int busiest_cpu) { - int node = find_busiest_node(cpu_to_node(this_cpu)); + struct sched_domain *sd; + struct sched_group *group, *busy_group; + int i; + int moved = 0; + + if (busiest->nr_running <= 1) + return; - if (node >= 0) { - cpumask_t cpumask = node_to_cpumask(node); - cpu_set(this_cpu, cpumask); - spin_lock(&this_rq->lock); - load_balance(this_rq, idle, cpumask); - spin_unlock(&this_rq->lock); + for_each_domain(busiest_cpu, sd) + if (cpu_isset(busiest->push_cpu, sd->span)) + break; + if (!sd) { + WARN_ON(1); + return; } + schedstat_inc(sd, alb_cnt); + + group = sd->groups; + while (!cpu_isset(busiest_cpu, group->cpumask)) + group = group->next; + busy_group = group; + + group = sd->groups; + do { + cpumask_t tmp; + runqueue_t *rq; + int push_cpu = 0; + + if (group == busy_group) + goto next_group; + + cpus_and(tmp, group->cpumask, cpu_online_map); + if (!cpus_weight(tmp)) + goto next_group; + + for_each_cpu_mask(i, tmp) { + if (!idle_cpu(i)) + goto next_group; + push_cpu = i; + } + + rq = cpu_rq(push_cpu); + double_lock_balance(busiest, rq); + moved += move_tasks(rq, push_cpu, busiest, 1, sd, IDLE); + spin_unlock(&rq->lock); +next_group: + group = group->next; + } while (group != sd->groups); + + if (moved) + schedstat_add(sd, alb_pushed, moved); + else + schedstat_inc(sd, alb_failed); } -#endif -static void rebalance_tick(runqueue_t *this_rq, int idle) +/* + * rebalance_tick will get called every timer tick, on every CPU. + * + * It checks each scheduling domain to see if it is due to be balanced, + * and initiates a balancing operation if so. + * + * Balancing parameters are set up in arch_init_sched_domains. + */ + +/* Don't have all balancing operations going off at once */ +#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS) + +static void rebalance_tick(int this_cpu, runqueue_t *this_rq, + enum idle_type idle) { -#ifdef CONFIG_NUMA - int this_cpu = smp_processor_id(); -#endif - unsigned long j = jiffies; + unsigned long old_load, this_load; + unsigned long j = jiffies + CPU_OFFSET(this_cpu); + struct sched_domain *sd; - /* - * First do inter-node rebalancing, then intra-node rebalancing, - * if both events happen in the same tick. The inter-node - * rebalancing does not necessarily have to create a perfect - * balance within the node, since we load-balance the most loaded - * node with the current CPU. (ie. other CPUs in the local node - * are not balanced.) - */ - if (idle) { -#ifdef CONFIG_NUMA - if (!(j % IDLE_NODE_REBALANCE_TICK)) - balance_node(this_rq, idle, this_cpu); -#endif - if (!(j % IDLE_REBALANCE_TICK)) { - spin_lock(&this_rq->lock); - load_balance(this_rq, idle, cpu_to_node_mask(this_cpu)); - spin_unlock(&this_rq->lock); - } + if (unlikely(cpu_is_offline(this_cpu))) return; - } -#ifdef CONFIG_NUMA - if (!(j % BUSY_NODE_REBALANCE_TICK)) - balance_node(this_rq, idle, this_cpu); -#endif - if (!(j % BUSY_REBALANCE_TICK)) { - spin_lock(&this_rq->lock); - load_balance(this_rq, idle, cpu_to_node_mask(this_cpu)); - spin_unlock(&this_rq->lock); + + /* Update our load */ + old_load = this_rq->cpu_load; + this_load = this_rq->nr_running * SCHED_LOAD_SCALE; + this_rq->cpu_load = (old_load + this_load) / 2; + + for_each_domain(this_cpu, sd) { + unsigned long interval = sd->balance_interval; + + if (idle != IDLE) + interval *= sd->busy_factor; + + /* scale ms to jiffies */ + interval = MSEC_TO_JIFFIES(interval); + if (unlikely(!interval)) + interval = 1; + + if (j - sd->last_balance >= interval) { + if (load_balance(this_cpu, this_rq, sd, idle)) { + /* We've pulled tasks over so no longer idle */ + idle = NOT_IDLE; + } + sd->last_balance += interval; + } } } #else /* * on UP we do not need to balance between CPUs: */ -static inline void rebalance_tick(runqueue_t *this_rq, int idle) +static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle) { } +static inline void idle_balance(int cpu, runqueue_t *rq) +{ +} +#endif + +static inline int wake_priority_sleeper(runqueue_t *rq) +{ +#ifdef CONFIG_SCHED_SMT + /* + * If an SMT sibling task has been put to sleep for priority + * reasons reschedule the idle task to see if it can now run. + */ + if (rq->nr_running) { + resched_task(rq->idle); + return 1; + } #endif + return 0; +} DEFINE_PER_CPU(struct kernel_stat, kstat); @@ -1507,7 +2118,9 @@ void scheduler_tick(int user_ticks, int cpustat->iowait += sys_ticks; else cpustat->idle += sys_ticks; - rebalance_tick(rq, 1); + if (wake_priority_sleeper(rq)) + goto out; + rebalance_tick(cpu, rq, IDLE); return; } if (TASK_NICE(p) > 0) @@ -1591,9 +2204,94 @@ void scheduler_tick(int user_ticks, int out_unlock: spin_unlock(&rq->lock); out: - rebalance_tick(rq, 0); + rebalance_tick(cpu, rq, NOT_IDLE); +} + +#ifdef CONFIG_SCHED_SMT +static inline void wake_sleeping_dependent(int cpu, runqueue_t *rq) +{ + int i; + struct sched_domain *sd = rq->sd; + cpumask_t sibling_map; + + if (!(sd->flags & SD_SHARE_CPUPOWER)) + return; + + cpus_and(sibling_map, sd->span, cpu_online_map); + for_each_cpu_mask(i, sibling_map) { + runqueue_t *smt_rq; + + if (i == cpu) + continue; + + smt_rq = cpu_rq(i); + + /* + * If an SMT sibling task is sleeping due to priority + * reasons wake it up now. + */ + if (smt_rq->curr == smt_rq->idle && smt_rq->nr_running) + resched_task(smt_rq->idle); + } +} + +static inline int dependent_sleeper(int cpu, runqueue_t *rq, task_t *p) +{ + struct sched_domain *sd = rq->sd; + cpumask_t sibling_map; + int ret = 0, i; + + if (!(sd->flags & SD_SHARE_CPUPOWER)) + return 0; + + cpus_and(sibling_map, sd->span, cpu_online_map); + for_each_cpu_mask(i, sibling_map) { + runqueue_t *smt_rq; + task_t *smt_curr; + + if (i == cpu) + continue; + + smt_rq = cpu_rq(i); + smt_curr = smt_rq->curr; + + /* + * If a user task with lower static priority than the + * running task on the SMT sibling is trying to schedule, + * delay it till there is proportionately less timeslice + * left of the sibling task to prevent a lower priority + * task from using an unfair proportion of the + * physical cpu's resources. -ck + */ + if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) / 100) > + task_timeslice(p) || rt_task(smt_curr)) && + p->mm && smt_curr->mm && !rt_task(p)) + ret = 1; + + /* + * Reschedule a lower priority task on the SMT sibling, + * or wake it up if it has been put to sleep for priority + * reasons. + */ + if ((((p->time_slice * (100 - sd->per_cpu_gain) / 100) > + task_timeslice(smt_curr) || rt_task(p)) && + smt_curr->mm && p->mm && !rt_task(smt_curr)) || + (smt_curr == smt_rq->idle && smt_rq->nr_running)) + resched_task(smt_curr); + } + return ret; +} +#else +static inline void wake_sleeping_dependent(int cpu, runqueue_t *rq) +{ } +static inline int dependent_sleeper(int cpu, runqueue_t *rq, task_t *p) +{ + return 0; +} +#endif + /* * schedule() is the main scheduler function. */ @@ -1606,7 +2304,7 @@ asmlinkage void __sched schedule(void) struct list_head *queue; unsigned long long now; unsigned long run_time; - int idx; + int cpu, idx; /* * Test if we are atomic. Since do_exit() needs to call into @@ -1626,6 +2324,7 @@ need_resched: rq = this_rq(); release_kernel_lock(prev); + schedstat_inc(rq, sched_cnt); now = sched_clock(); if (likely(now - prev->timestamp < NS_MAX_SLEEP_AVG)) run_time = now - prev->timestamp; @@ -1656,13 +2355,14 @@ need_resched: deactivate_task(prev, rq); } + cpu = smp_processor_id(); if (unlikely(!rq->nr_running)) { -#ifdef CONFIG_SMP - load_balance(rq, 1, cpu_to_node_mask(smp_processor_id())); -#endif + idle_balance(cpu, rq); if (!rq->nr_running) { next = rq->idle; rq->expired_timestamp = 0; + wake_sleeping_dependent(cpu, rq); + schedstat_inc(rq, sched_idle); goto switch_tasks; } } @@ -1672,6 +2372,7 @@ need_resched: /* * Switch the active and expired arrays. */ + schedstat_inc(rq, sched_switch); rq->active = rq->expired; rq->expired = array; array = rq->active; @@ -1683,6 +2384,11 @@ need_resched: queue = array->queue + idx; next = list_entry(queue->next, task_t, run_list); + if (dependent_sleeper(cpu, rq, next)) { + next = rq->idle; + goto switch_tasks; + } + if (!rt_task(next) && next->activated > 0) { unsigned long long delta = now - next->timestamp; @@ -1836,15 +2542,16 @@ void fastcall __wake_up_locked(wait_queu void fastcall __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) { unsigned long flags; + int sync = 1; if (unlikely(!q)) return; + if (unlikely(!nr_exclusive)) + sync = 0; + spin_lock_irqsave(&q->lock, flags); - if (likely(nr_exclusive)) - __wake_up_common(q, mode, nr_exclusive, 1); - else - __wake_up_common(q, mode, nr_exclusive, 0); + __wake_up_common(q, mode, nr_exclusive, sync); spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ @@ -2015,6 +2722,13 @@ out_unlock: EXPORT_SYMBOL(set_user_nice); +#if defined( CONFIG_KGDB) +struct task_struct * kgdb_get_idle(int this_cpu) +{ + return cpu_rq(this_cpu)->idle; +} +#endif + #ifndef __alpha__ /* @@ -2199,7 +2913,7 @@ static int setscheduler(pid_t pid, int p if (task_running(rq, p)) { if (p->prio > oldprio) resched_task(rq->curr); - } else if (p->prio < rq->curr->prio) + } else if (TASK_PREEMPTS_CURR(p, rq)) resched_task(rq->curr); } @@ -2398,7 +3112,9 @@ asmlinkage long sys_sched_yield(void) { runqueue_t *rq = this_rq_lock(); prio_array_t *array = current->array; + prio_array_t *target = rq->expired; + schedstat_inc(rq, yld_cnt); /* * We implement yielding by moving the task into the expired * queue. @@ -2406,13 +3122,12 @@ asmlinkage long sys_sched_yield(void) * (special rule: RT tasks will just roundrobin in the active * array.) */ - if (likely(!rt_task(current))) { - dequeue_task(current, array); - enqueue_task(current, rq->expired); - } else { - list_del(¤t->run_list); - list_add_tail(¤t->run_list, array->queue + current->prio); - } + if (unlikely(rt_task(current))) + target = rq->active; + + dequeue_task(current, array); + enqueue_task(current, target); + /* * Since we are going to call schedule() anyway, there's * no need to preempt: @@ -2723,7 +3438,12 @@ int set_cpus_allowed(task_t *p, cpumask_ goto out; } - if (__set_cpus_allowed(p, new_mask, &req)) { + p->cpus_allowed = new_mask; + /* Can the task run on the task's current CPU? If so, we're done */ + if (cpu_isset(task_cpu(p), new_mask)) + goto out; + + if (migrate_task(p, any_online_cpu(new_mask), &req)) { /* Need help from migration thread: drop lock and wait. */ task_rq_unlock(rq, &flags); wake_up_process(rq->migration_thread); @@ -2737,22 +3457,34 @@ out: EXPORT_SYMBOL_GPL(set_cpus_allowed); -/* Move (not current) task off this cpu, onto dest cpu. */ -static void move_task_away(struct task_struct *p, int dest_cpu) +/* + * Move (not current) task off this cpu, onto dest cpu. We're doing + * this because either it can't run here any more (set_cpus_allowed() + * away from this CPU, or CPU going down), or because we're + * attempting to rebalance this task on exec (sched_balance_exec). + * + * So we race with normal scheduler movements, but that's OK, as long + * as the task is no longer on this CPU. + */ +static void __migrate_task(struct task_struct *p, int dest_cpu) { runqueue_t *rq_dest; rq_dest = cpu_rq(dest_cpu); double_rq_lock(this_rq(), rq_dest); + /* Already moved. */ if (task_cpu(p) != smp_processor_id()) - goto out; /* Already moved */ + goto out; + /* Affinity changed (again). */ + if (!cpu_isset(dest_cpu, p->cpus_allowed)) + goto out; set_task_cpu(p, dest_cpu); if (p->array) { deactivate_task(p, this_rq()); activate_task(p, rq_dest); - if (p->prio < rq_dest->curr->prio) + if (TASK_PREEMPTS_CURR(p, rq_dest)) resched_task(rq_dest->curr); } p->timestamp = rq_dest->timestamp_last_tick; @@ -2782,7 +3514,13 @@ static int migration_thread(void * data) refrigerator(PF_IOTHREAD); spin_lock_irq(&rq->lock); + if (rq->active_balance) { + active_load_balance(rq, cpu); + rq->active_balance = 0; + } + head = &rq->migration_queue; + current->state = TASK_INTERRUPTIBLE; if (list_empty(head)) { spin_unlock_irq(&rq->lock); @@ -2791,11 +3529,19 @@ static int migration_thread(void * data) } req = list_entry(head->next, migration_req_t, list); list_del_init(head->next); + spin_unlock(&rq->lock); - move_task_away(req->task, - any_online_cpu(req->task->cpus_allowed)); + if (req->type == REQ_MOVE_TASK) { + __migrate_task(req->task, req->dest_cpu); + } else if (req->type == REQ_SET_DOMAIN) { + rq->sd = req->sd; + } else { + WARN_ON(1); + } + local_irq_enable(); + complete(&req->done); } return 0; @@ -2851,7 +3597,7 @@ void migrate_all_tasks(void) tsk->pid, tsk->comm, src_cpu); } - move_task_away(tsk, dest_cpu); + __migrate_task(tsk, dest_cpu); } while_each_thread(t, tsk); write_unlock(&tasklist_lock); @@ -2930,23 +3676,299 @@ int __init migration_init(void) spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; EXPORT_SYMBOL(kernel_flag); +#ifdef CONFIG_SMP +/* Attach the domain 'sd' to 'cpu' as its base domain */ +void cpu_attach_domain(struct sched_domain *sd, int cpu) +{ + migration_req_t req; + unsigned long flags; + runqueue_t *rq = cpu_rq(cpu); + int local = 1; + + lock_cpu_hotplug(); + + spin_lock_irqsave(&rq->lock, flags); + + if (cpu == smp_processor_id() || cpu_is_offline(cpu)) { + rq->sd = sd; + } else { + init_completion(&req.done); + req.type = REQ_SET_DOMAIN; + req.sd = sd; + list_add(&req.list, &rq->migration_queue); + local = 0; + } + + spin_unlock_irqrestore(&rq->lock, flags); + + if (!local) { + wake_up_process(rq->migration_thread); + wait_for_completion(&req.done); + } + + unlock_cpu_hotplug(); +} + +#ifdef ARCH_HAS_SCHED_DOMAIN +extern void __init arch_init_sched_domains(void); +#else +static struct sched_group sched_group_cpus[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, cpu_domains); +#ifdef CONFIG_NUMA +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, node_domains); +static void __init arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_node = NULL, *last_node = NULL; + + /* Set up domains */ + for_each_cpu(i) { + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + struct sched_domain *node_sd = &per_cpu(node_domains, i); + struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); + + *node_sd = SD_NODE_INIT; + node_sd->span = cpu_possible_map; + node_sd->groups = &sched_group_nodes[cpu_to_node(i)]; + + *cpu_sd = SD_CPU_INIT; + cpus_and(cpu_sd->span, nodemask, cpu_possible_map); + cpu_sd->groups = &sched_group_cpus[i]; + cpu_sd->parent = node_sd; + } + + /* Set up groups */ + for (i = 0; i < MAX_NUMNODES; i++) { + cpumask_t tmp = node_to_cpumask(i); + cpumask_t nodemask; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + struct sched_group *node = &sched_group_nodes[i]; + int j; + + cpus_and(nodemask, tmp, cpu_possible_map); + + if (cpus_empty(nodemask)) + continue; + + node->cpumask = nodemask; + node->cpu_power = SCHED_LOAD_SCALE * cpus_weight(node->cpumask); + + for_each_cpu_mask(j, node->cpumask) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + if (!first_node) + first_node = node; + if (last_node) + last_node->next = node; + last_node = node; + } + last_node->next = first_node; + + mb(); + for_each_cpu(i) { + struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_sd, i); + } +} + +#else /* !CONFIG_NUMA */ +static void __init arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu(i) { + struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); + + *cpu_sd = SD_CPU_INIT; + cpu_sd->cache_nice_tries = 2; + cpu_sd->span = cpu_possible_map; + cpu_sd->groups = &sched_group_cpus[i]; + } + + /* Set up CPU groups */ + for_each_cpu_mask(i, cpu_possible_map) { + struct sched_group *cpu = &sched_group_cpus[i]; + + cpus_clear(cpu->cpumask); + cpu_set(i, cpu->cpumask); + cpu->cpu_power = SCHED_LOAD_SCALE; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); /* domains were modified outside the lock */ + for_each_cpu(i) { + struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); + cpu_attach_domain(cpu_sd, i); + } +} + +#endif /* CONFIG_NUMA */ +#endif /* ARCH_HAS_SCHED_DOMAIN */ + +#define SCHED_DOMAIN_DEBUG +#ifdef SCHED_DOMAIN_DEBUG +void sched_domain_debug(void) +{ + int i; + + for_each_cpu(i) { + runqueue_t *rq = cpu_rq(i); + struct sched_domain *sd; + int level = 0; + + sd = rq->sd; + + printk(KERN_DEBUG "CPU%d: %s\n", + i, (cpu_online(i) ? " online" : "offline")); + + do { + int j; + char str[NR_CPUS]; + struct sched_group *group = sd->groups; + cpumask_t groupmask, tmp; + + cpumask_scnprintf(str, NR_CPUS, sd->span); + cpus_clear(groupmask); + + printk(KERN_DEBUG); + for (j = 0; j < level + 1; j++) + printk(" "); + printk("domain %d: span %s\n", level, str); + + if (!cpu_isset(i, sd->span)) + printk(KERN_DEBUG "ERROR domain->span does not contain CPU%d\n", i); + if (!cpu_isset(i, group->cpumask)) + printk(KERN_DEBUG "ERROR domain->groups does not contain CPU%d\n", i); + if (!group->cpu_power) + printk(KERN_DEBUG "ERROR domain->cpu_power not set\n"); + + printk(KERN_DEBUG); + for (j = 0; j < level + 2; j++) + printk(" "); + printk("groups:"); + do { + if (!group) { + printk(" ERROR: NULL"); + break; + } + + if (!cpus_weight(group->cpumask)) + printk(" ERROR empty group:"); + + cpus_and(tmp, groupmask, group->cpumask); + if (cpus_weight(tmp) > 0) + printk(" ERROR repeated CPUs:"); + + cpus_or(groupmask, groupmask, group->cpumask); + + cpumask_scnprintf(str, NR_CPUS, group->cpumask); + printk(" %s", str); + + group = group->next; + } while (group != sd->groups); + printk("\n"); + + if (!cpus_equal(sd->span, groupmask)) + printk(KERN_DEBUG "ERROR groups don't span domain->span\n"); + + level++; + sd = sd->parent; + + if (sd) { + cpus_and(tmp, groupmask, sd->span); + if (!cpus_equal(tmp, groupmask)) + printk(KERN_DEBUG "ERROR parent span is not a superset of domain->span\n"); + } + + } while (sd); + } +} +#else +#define sched_domain_debug() {} +#endif + +void __init sched_init_smp(void) +{ + arch_init_sched_domains(); + sched_domain_debug(); +} +#else +void __init sched_init_smp(void) +{ +} +#endif /* CONFIG_SMP */ + +int in_sched_functions(unsigned long addr) +{ + /* Linker adds these: start and end of __sched functions */ + extern char __sched_text_start[], __sched_text_end[]; + return addr >= (unsigned long)__sched_text_start + && addr < (unsigned long)__sched_text_end; +} + void __init sched_init(void) { runqueue_t *rq; int i, j, k; +#ifdef CONFIG_SMP + /* Set up an initial dummy domain for early boot */ + static struct sched_domain sched_domain_init; + static struct sched_group sched_group_init; + cpumask_t cpu_mask_all = CPU_MASK_ALL; + + memset(&sched_domain_init, 0, sizeof(struct sched_domain)); + sched_domain_init.span = cpu_mask_all; + sched_domain_init.groups = &sched_group_init; + sched_domain_init.last_balance = jiffies; + sched_domain_init.balance_interval = INT_MAX; /* Don't balance */ + + memset(&sched_group_init, 0, sizeof(struct sched_group)); + sched_group_init.cpumask = cpu_mask_all; + sched_group_init.next = &sched_group_init; + sched_group_init.cpu_power = SCHED_LOAD_SCALE; +#endif + for (i = 0; i < NR_CPUS; i++) { prio_array_t *array; rq = cpu_rq(i); + spin_lock_init(&rq->lock); rq->active = rq->arrays; rq->expired = rq->arrays + 1; rq->best_expired_prio = MAX_PRIO; - spin_lock_init(&rq->lock); +#ifdef CONFIG_SMP + rq->sd = &sched_domain_init; + rq->cpu_load = 0; + rq->active_balance = 0; + rq->push_cpu = 0; + rq->migration_thread = NULL; INIT_LIST_HEAD(&rq->migration_queue); +#endif atomic_set(&rq->nr_iowait, 0); - nr_running_init(rq); for (j = 0; j < 2; j++) { array = rq->arrays + j; @@ -2968,8 +3990,6 @@ void __init sched_init(void) set_task_cpu(current, smp_processor_id()); wake_up_forked_process(current); - init_timers(); - /* * The boot idle thread does lazy MMU switching as well: */ --- linux-2.6.6-rc1/kernel/signal.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/signal.c 2004-04-18 22:25:45.308942472 -0700 @@ -1386,12 +1386,12 @@ static void __wake_up_parent(struct task * Fortunately this is not necessary for thread groups: */ if (p->tgid == tsk->tgid) { - wake_up_interruptible(&tsk->wait_chldexit); + wake_up_interruptible_sync(&tsk->wait_chldexit); return; } do { - wake_up_interruptible(&tsk->wait_chldexit); + wake_up_interruptible_sync(&tsk->wait_chldexit); tsk = next_thread(tsk); if (tsk->signal != parent->signal) BUG(); @@ -1700,7 +1700,8 @@ static inline int handle_group_stop(void return 1; } -int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs, void *cookie) +int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, + struct pt_regs *regs, void *cookie) { sigset_t *mask = ¤t->blocked; int signr = 0; @@ -1769,8 +1770,15 @@ relock: ka = ¤t->sighand->action[signr-1]; if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; - if (ka->sa.sa_handler != SIG_DFL) /* Run the handler. */ + if (ka->sa.sa_handler != SIG_DFL) { + /* Run the handler. */ + *return_ka = *ka; + + if (ka->sa.sa_flags & SA_ONESHOT) + ka->sa.sa_handler = SIG_DFL; + break; /* will return non-zero "signr" value */ + } /* * Now we are doing the default action for this signal. @@ -2052,7 +2060,6 @@ int copy_siginfo_to_user(siginfo_t __use case __SI_MESGQ: /* But this is */ err |= __put_user(from->si_pid, &to->si_pid); err |= __put_user(from->si_uid, &to->si_uid); - err |= __put_user(from->si_int, &to->si_int); err |= __put_user(from->si_ptr, &to->si_ptr); break; default: /* this is just in case for now ... */ @@ -2552,7 +2559,5 @@ void __init signals_init(void) kmem_cache_create("sigqueue", sizeof(struct sigqueue), __alignof__(struct sigqueue), - 0, NULL, NULL); - if (!sigqueue_cachep) - panic("signals_init(): cannot create sigqueue SLAB cache"); + SLAB_PANIC, NULL, NULL); } --- linux-2.6.6-rc1/kernel/softirq.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/softirq.c 2004-04-18 22:25:35.647411248 -0700 @@ -132,11 +132,22 @@ EXPORT_SYMBOL(do_softirq); void local_bh_enable(void) { + if (in_irq()) { + printk("local_bh_enable() was called in hard irq context. " + "This is probably a bug\n"); + dump_stack(); + } + __local_bh_enable(); - WARN_ON(irqs_disabled()); - if (unlikely(!in_interrupt() && - local_softirq_pending())) + if (unlikely(!in_interrupt() && local_softirq_pending())) { + if (irqs_disabled()) { + printk("local_bh_enable() was called with local " + "interrupts disabled. This is probably a" + " bug\n"); + dump_stack(); + } invoke_softirq(); + } preempt_check_resched(); } EXPORT_SYMBOL(local_bh_enable); --- linux-2.6.6-rc1/kernel/sys.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/sys.c 2004-04-18 22:25:48.859402720 -0700 @@ -271,6 +271,9 @@ cond_syscall(compat_sys_mq_timedsend) cond_syscall(compat_sys_mq_timedreceive) cond_syscall(compat_sys_mq_notify) cond_syscall(compat_sys_mq_getsetattr) +cond_syscall(sys_mbind) +cond_syscall(sys_get_mempolicy) +cond_syscall(sys_set_mempolicy) /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read) --- linux-2.6.6-rc1/kernel/sysctl.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/kernel/sysctl.c 2004-04-18 22:25:25.023026400 -0700 @@ -93,6 +93,7 @@ extern int sem_ctls[]; #ifdef __sparc__ extern char reboot_command []; extern int stop_a_enabled; +extern int scons_pwroff; #endif #ifdef __hppa__ @@ -325,6 +326,14 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = KERN_SPARC_SCONS_PWROFF, + .procname = "scons-poweroff", + .data = &scons_pwroff, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #ifdef __hppa__ { --- linux-2.6.6-rc1/kernel/user.c 2003-08-08 22:55:14.000000000 -0700 +++ 25/kernel/user.c 2004-04-18 22:25:32.699859344 -0700 @@ -138,10 +138,7 @@ static int __init uid_cache_init(void) int n; uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct), - 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if(!uid_cachep) - panic("Cannot create uid taskcount SLAB cache\n"); + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); for(n = 0; n < UIDHASH_SZ; ++n) INIT_LIST_HEAD(uidhash_table + n); --- linux-2.6.6-rc1/kernel/workqueue.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/kernel/workqueue.c 2004-04-18 22:25:27.575638344 -0700 @@ -27,7 +27,7 @@ #include /* - * The per-CPU workqueue. + * The per-CPU workqueue (if single thread, we always use cpu 0's). * * The sequence counters are for flush_scheduled_work(). It wants to wait * until until all currently-scheduled works are completed, but it doesn't @@ -59,20 +59,19 @@ struct cpu_workqueue_struct { struct workqueue_struct { struct cpu_workqueue_struct cpu_wq[NR_CPUS]; const char *name; - struct list_head list; + struct list_head list; /* Empty if single thread */ }; -#ifdef CONFIG_HOTPLUG_CPU -/* All the workqueues on the system, for hotplug cpu to add/remove - threads to each one as cpus come/go. Protected by cpucontrol - sem. */ +/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove + threads to each one as cpus come/go. */ +static spinlock_t workqueue_lock = SPIN_LOCK_UNLOCKED; static LIST_HEAD(workqueues); -#define add_workqueue(wq) list_add(&(wq)->list, &workqueues) -#define del_workqueue(wq) list_del(&(wq)->list) -#else -#define add_workqueue(wq) -#define del_workqueue(wq) -#endif /* CONFIG_HOTPLUG_CPU */ + +/* If it's single threaded, it isn't in the list of workqueues. */ +static inline int is_single_threaded(struct workqueue_struct *wq) +{ + return list_empty(&wq->list); +} /* Preempt must be disabled. */ static void __queue_work(struct cpu_workqueue_struct *cwq, @@ -100,6 +99,8 @@ int fastcall queue_work(struct workqueue int ret = 0, cpu = get_cpu(); if (!test_and_set_bit(0, &work->pending)) { + if (unlikely(is_single_threaded(wq))) + cpu = 0; BUG_ON(!list_empty(&work->entry)); __queue_work(wq->cpu_wq + cpu, work); ret = 1; @@ -112,8 +113,12 @@ static void delayed_work_timer_fn(unsign { struct work_struct *work = (struct work_struct *)__data; struct workqueue_struct *wq = work->wq_data; + int cpu = smp_processor_id(); + + if (unlikely(is_single_threaded(wq))) + cpu = 0; - __queue_work(wq->cpu_wq + smp_processor_id(), work); + __queue_work(wq->cpu_wq + cpu, work); } int fastcall queue_delayed_work(struct workqueue_struct *wq, @@ -234,12 +239,12 @@ void fastcall flush_workqueue(struct wor might_sleep(); lock_cpu_hotplug(); - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_online_cpu(cpu) { DEFINE_WAIT(wait); long sequence_needed; - if (!cpu_online(cpu)) - continue; + if (is_single_threaded(wq)) + cpu = 0; cwq = wq->cpu_wq + cpu; if (cwq->thread == current) { @@ -266,7 +271,8 @@ void fastcall flush_workqueue(struct wor unlock_cpu_hotplug(); } -static int create_workqueue_thread(struct workqueue_struct *wq, int cpu) +static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq, + int cpu) { struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu; struct task_struct *p; @@ -282,16 +288,17 @@ static int create_workqueue_thread(struc p = kthread_create(worker_thread, cwq, "%s/%d", wq->name, cpu); if (IS_ERR(p)) - return PTR_ERR(p); + return NULL; cwq->thread = p; - kthread_bind(p, cpu); - return 0; + return p; } -struct workqueue_struct *create_workqueue(const char *name) +struct workqueue_struct *__create_workqueue(const char *name, + int singlethread) { int cpu, destroy = 0; struct workqueue_struct *wq; + struct task_struct *p; BUG_ON(strlen(name) > 10); @@ -303,15 +310,26 @@ struct workqueue_struct *create_workqueu wq->name = name; /* We don't need the distraction of CPUs appearing and vanishing. */ lock_cpu_hotplug(); - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu)) - continue; - if (create_workqueue_thread(wq, cpu) < 0) + if (singlethread) { + INIT_LIST_HEAD(&wq->list); + p = create_workqueue_thread(wq, 0); + if (!p) destroy = 1; else - wake_up_process(wq->cpu_wq[cpu].thread); + wake_up_process(p); + } else { + spin_lock(&workqueue_lock); + list_add(&wq->list, &workqueues); + spin_unlock_irq(&workqueue_lock); + for_each_online_cpu(cpu) { + p = create_workqueue_thread(wq, cpu); + if (p) { + kthread_bind(p, cpu); + wake_up_process(p); + } else + destroy = 1; + } } - add_workqueue(wq); /* * Was there any error during startup? If yes then clean up: @@ -347,11 +365,15 @@ void destroy_workqueue(struct workqueue_ /* We don't need the distraction of CPUs appearing and vanishing. */ lock_cpu_hotplug(); - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (cpu_online(cpu)) + if (is_single_threaded(wq)) + cleanup_workqueue_thread(wq, 0); + else { + for_each_online_cpu(cpu) cleanup_workqueue_thread(wq, cpu); + spin_lock(&workqueue_lock); + list_del(&wq->list); + spin_unlock_irq(&workqueue_lock); } - del_workqueue(wq); unlock_cpu_hotplug(); kfree(wq); } @@ -467,7 +489,7 @@ void init_workqueues(void) BUG_ON(!keventd_wq); } -EXPORT_SYMBOL_GPL(create_workqueue); +EXPORT_SYMBOL_GPL(__create_workqueue); EXPORT_SYMBOL_GPL(queue_work); EXPORT_SYMBOL_GPL(queue_delayed_work); EXPORT_SYMBOL_GPL(flush_workqueue); --- linux-2.6.6-rc1/lib/idr.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/lib/idr.c 2004-04-18 22:25:44.489067112 -0700 @@ -109,7 +109,7 @@ static kmem_cache_t *idr_layer_cache; -static inline struct idr_layer *alloc_layer(struct idr *idp) +static struct idr_layer *alloc_layer(struct idr *idp) { struct idr_layer *p; @@ -123,7 +123,7 @@ static inline struct idr_layer *alloc_la return(p); } -static inline void free_layer(struct idr *idp, struct idr_layer *p) +static void free_layer(struct idr *idp, struct idr_layer *p) { /* * Depends on the return element being zeroed. @@ -137,7 +137,7 @@ static inline void free_layer(struct idr int idr_pre_get(struct idr *idp, unsigned gfp_mask) { - while (idp->id_free_cnt < idp->layers + 1) { + while (idp->id_free_cnt < IDR_FREE_MAX) { struct idr_layer *new; new = kmem_cache_alloc(idr_layer_cache, gfp_mask); if(new == NULL) @@ -148,91 +148,125 @@ int idr_pre_get(struct idr *idp, unsigne } EXPORT_SYMBOL(idr_pre_get); -static inline int sub_alloc(struct idr *idp, int shift, void *ptr) +static int sub_alloc(struct idr *idp, void *ptr, int *starting_id) { - int n, v = 0; - struct idr_layer *p; - struct idr_layer **pa[MAX_LEVEL]; - struct idr_layer ***paa = &pa[0]; - - *paa = NULL; - *++paa = &idp->top; + int n, m, sh; + struct idr_layer *p, *new; + struct idr_layer *pa[MAX_LEVEL]; + int l, id; + long bm; - /* - * By keeping each pointer in an array we can do the - * "after" recursion processing. In this case, that means - * we can update the upper level bit map. - */ - - while (1){ - p = **paa; - n = ffz(p->bitmap); - if (shift){ - /* - * We run around this while until we - * reach the leaf node... - */ - if (!p->ary[n]){ - /* - * If no node, allocate one, AFTER - * we insure that we will not - * intrude on the reserved bit field. - */ - if ((n << shift) >= MAX_ID_BIT) - return -1; - p->ary[n] = alloc_layer(idp); - p->count++; + id = *starting_id; + p = idp->top; + l = idp->layers; + pa[l--] = NULL; + while (1) { + /* + * We run around this while until we reach the leaf node... + */ + n = (id >> (IDR_BITS*l)) & IDR_MASK; + bm = ~p->bitmap; + m = find_next_bit(&bm, IDR_SIZE, n); + if (m == IDR_SIZE) { + /* no space available go back to previous layer. */ + l++; + id = (id | ((1 << (IDR_BITS*l))-1)) + 1; + if (!(p = pa[l])) { + *starting_id = id; + return -2; } - *++paa = &p->ary[n]; - v += (n << shift); - shift -= IDR_BITS; - } else { - /* - * We have reached the leaf node, plant the - * users pointer and return the raw id. - */ - p->ary[n] = (struct idr_layer *)ptr; - __set_bit(n, &p->bitmap); - v += n; + continue; + } + if (m != n) { + sh = IDR_BITS*l; + id = ((id >> sh) ^ n ^ m) << sh; + } + if (id >= MAX_ID_BIT) + return -1; + if (l == 0) + break; + /* + * Create the layer below if it is missing. + */ + if (!p->ary[m]) { + if (!(new = alloc_layer(idp))) + return -1; + p->ary[m] = new; p->count++; - /* - * This is the post recursion processing. Once - * we find a bitmap that is not full we are - * done - */ - while (*(paa-1) && (**paa)->bitmap == IDR_FULL){ - n = *paa - &(**(paa-1))->ary[0]; - __set_bit(n, &(**--paa)->bitmap); - } - return(v); } + pa[l--] = p; + p = p->ary[m]; + } + /* + * We have reached the leaf node, plant the + * users pointer and return the raw id. + */ + p->ary[m] = (struct idr_layer *)ptr; + __set_bit(m, &p->bitmap); + p->count++; + /* + * If this layer is full mark the bit in the layer above + * to show that this part of the radix tree is full. + * This may complete the layer above and require walking + * up the radix tree. + */ + n = id; + while (p->bitmap == IDR_FULL) { + if (!(p = pa[++l])) + break; + n = n >> IDR_BITS; + __set_bit((n & IDR_MASK), &p->bitmap); } + return(id); } -int idr_get_new(struct idr *idp, void *ptr) +int idr_get_new_above(struct idr *idp, void *ptr, int starting_id) { - int v; + struct idr_layer *p, *new; + int layers, v, id; - if (idp->id_free_cnt < idp->layers + 1) - return (-1); + id = starting_id; +build_up: + p = idp->top; + layers = idp->layers; + if (unlikely(!p)) { + if (!(p = alloc_layer(idp))) + return -1; + layers = 1; + } /* - * Add a new layer if the array is full + * Add a new layer to the top of the tree if the requested + * id is larger than the currently allocated space. */ - if (unlikely(!idp->top || idp->top->bitmap == IDR_FULL)){ - /* - * This is a bit different than the lower layers because - * we have one branch already allocated and full. - */ - struct idr_layer *new = alloc_layer(idp); - new->ary[0] = idp->top; - if ( idp->top) - ++new->count; - idp->top = new; - if ( idp->layers++ ) + while (id >= (1 << (layers*IDR_BITS))) { + layers++; + if (!p->count) + continue; + if (!(new = alloc_layer(idp))) { + /* + * The allocation failed. If we built part of + * the structure tear it down. + */ + for (new = p; p && p != idp->top; new = p) { + p = p->ary[0]; + new->ary[0] = 0; + new->bitmap = new->count = 0; + free_layer(idp, new); + } + return -1; + } + new->ary[0] = p; + new->count = 1; + if (p->bitmap == IDR_FULL) __set_bit(0, &new->bitmap); + p = new; } - v = sub_alloc(idp, (idp->layers - 1) * IDR_BITS, ptr); - if ( likely(v >= 0 )){ + idp->top = p; + idp->layers = layers; + v = sub_alloc(idp, ptr, &id); + if (v == -2) + goto build_up; + if ( likely(v >= 0 )) { idp->count++; v += (idp->count << MAX_ID_SHIFT); if ( unlikely( v == -1 )) @@ -240,10 +274,16 @@ int idr_get_new(struct idr *idp, void *p } return(v); } +EXPORT_SYMBOL(idr_get_new_above); + +int idr_get_new(struct idr *idp, void *ptr) +{ + return idr_get_new_above(idp, ptr, 0); +} EXPORT_SYMBOL(idr_get_new); -static inline void sub_remove(struct idr *idp, int shift, int id) +static void sub_remove(struct idr *idp, int shift, int id) { struct idr_layer *p = idp->top; struct idr_layer **pa[MAX_LEVEL]; --- linux-2.6.6-rc1/lib/kobject.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/lib/kobject.c 2004-04-18 22:25:25.024026248 -0700 @@ -349,16 +349,16 @@ int kobject_set_name(struct kobject * ko /* * Need more space? Allocate it and try again */ - name = kmalloc(need,GFP_KERNEL); + limit = need + 1; + name = kmalloc(limit,GFP_KERNEL); if (!name) { error = -ENOMEM; goto Done; } - limit = need; need = vsnprintf(name,limit,fmt,args); /* Still? Give up. */ - if (need > limit) { + if (need >= limit) { kfree(name); error = -EFAULT; goto Done; --- linux-2.6.6-rc1/lib/radix-tree.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/lib/radix-tree.c 2004-04-18 22:25:32.699859344 -0700 @@ -30,12 +30,7 @@ #include #include -/* - * Radix tree node definition. - * - * RADIX_TREE_MAP_SHIFT must be >= log2(BITS_PER_LONG). Otherwise the tags - * array will have zero size and the set_tag() arithmetic will go wrong. - */ + #ifdef __KERNEL__ #define RADIX_TREE_MAP_SHIFT 6 #else @@ -804,9 +799,7 @@ void __init radix_tree_init(void) { radix_tree_node_cachep = kmem_cache_create("radix_tree_node", sizeof(struct radix_tree_node), 0, - 0, radix_tree_node_ctor, NULL); - if (!radix_tree_node_cachep) - panic ("Failed to create radix_tree_node cache\n"); + SLAB_PANIC, radix_tree_node_ctor, NULL); radix_tree_init_maxindex(); hotcpu_notifier(radix_tree_callback, 0); } --- linux-2.6.6-rc1/lib/rwsem.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/lib/rwsem.c 2004-04-18 22:25:56.646218944 -0700 @@ -7,11 +7,13 @@ #include #include #include +#include struct rwsem_waiter { - struct list_head list; - struct task_struct *task; - unsigned int flags; + struct list_head list; + struct task_struct *task; + struct completion granted; + unsigned int flags; #define RWSEM_WAITING_FOR_READ 0x00000001 #define RWSEM_WAITING_FOR_WRITE 0x00000002 }; @@ -20,100 +22,105 @@ struct rwsem_waiter { #undef rwsemtrace void rwsemtrace(struct rw_semaphore *sem, const char *str) { - printk("sem=%p\n",sem); - printk("(sem)=%08lx\n",sem->count); + printk("sem=%p\n", sem); + printk("(sem)=%08lx\n", sem->count); if (sem->debug) - printk("[%d] %s({%08lx})\n",current->pid,str,sem->count); + printk("[%d] %s({%08lx})\n", current->pid, str, sem->count); } #endif /* - * handle the lock being released whilst there are processes blocked on it that can now run + * handle the lock being released whilst there are processes blocked on it + * that can now run * - if we come here, then: - * - the 'active part' of the count (&0x0000ffff) reached zero but has been re-incremented - * - the 'waiting part' of the count (&0xffff0000) is negative (and will still be so) + * - the 'active part' of the count (&0x0000ffff) reached zero but has been + * re-incremented + * - the 'waiting part' of the count (&0xffff0000) is negative (and will + * still be so) * - there must be someone on the queue * - the spinlock must be held by the caller - * - woken process blocks are discarded from the list after having flags zeroised + * - woken process blocks are discarded from the list after having flags + * zeroised * - writers are only woken if wakewrite is non-zero + * + * The spinlock will be dropped by this function. */ -static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) +static inline struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) { + LIST_HEAD(wake_list); struct rwsem_waiter *waiter; - struct list_head *next; signed long oldcount; - int woken, loop; + int woken; - rwsemtrace(sem,"Entering __rwsem_do_wake"); + rwsemtrace(sem, "Entering __rwsem_do_wake"); if (!wakewrite) goto dont_wake_writers; - /* only wake someone up if we can transition the active part of the count from 0 -> 1 */ - try_again: - oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS,sem) - RWSEM_ACTIVE_BIAS; + /* only wake someone if we can transition the active part of the count + * from 0 -> 1 */ +try_again: + oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS, sem) + - RWSEM_ACTIVE_BIAS; if (oldcount & RWSEM_ACTIVE_MASK) goto undo; - waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list); + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); - /* try to grant a single write lock if there's a writer at the front of the queue - * - note we leave the 'active part' of the count incremented by 1 and the waiting part - * incremented by 0x00010000 + /* try to grant a single write lock if there's a writer at the front + * of the queue - note we leave the 'active part' of the count + * incremented by 1 and the waiting part incremented by 0x00010000 */ if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) goto readers_only; - list_del(&waiter->list); - waiter->flags = 0; - wake_up_process(waiter->task); + list_move_tail(&waiter->list, &wake_list); goto out; /* don't want to wake any writers */ - dont_wake_writers: - waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list); +dont_wake_writers: + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (waiter->flags & RWSEM_WAITING_FOR_WRITE) goto out; - /* grant an infinite number of read locks to the readers at the front of the queue - * - note we increment the 'active part' of the count by the number of readers (less one - * for the activity decrement we've already done) before waking any processes up + /* grant an infinite number of read locks to the readers at the front + * of the queue - note we increment the 'active part' of the count by + * the number of readers (less one for the activity decrement we've + * already done) before waking any processes up */ - readers_only: +readers_only: woken = 0; do { + list_move_tail(&waiter->list, &wake_list); woken++; - if (waiter->list.next==&sem->wait_list) + if (list_empty(&sem->wait_list)) break; - waiter = list_entry(waiter->list.next,struct rwsem_waiter,list); + waiter = list_entry(sem->wait_list.next, + struct rwsem_waiter, list); } while (waiter->flags & RWSEM_WAITING_FOR_READ); - loop = woken; - woken *= RWSEM_ACTIVE_BIAS-RWSEM_WAITING_BIAS; + woken *= RWSEM_ACTIVE_BIAS - RWSEM_WAITING_BIAS; woken -= RWSEM_ACTIVE_BIAS; - rwsem_atomic_add(woken,sem); + rwsem_atomic_add(woken, sem); - next = sem->wait_list.next; - for (; loop>0; loop--) { - waiter = list_entry(next,struct rwsem_waiter,list); - next = waiter->list.next; - waiter->flags = 0; - wake_up_process(waiter->task); +out: + spin_unlock(&sem->wait_lock); + while (!list_empty(&wake_list)) { + waiter = list_entry(wake_list.next, struct rwsem_waiter, list); + list_del(&waiter->list); + complete(&waiter->granted); } - sem->wait_list.next = next; - next->prev = &sem->wait_list; - - out: - rwsemtrace(sem,"Leaving __rwsem_do_wake"); + rwsemtrace(sem, "Leaving __rwsem_do_wake"); return sem; /* undo the change to count, but check for a transition 1->0 */ - undo: - if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS,sem)!=0) +undo: + if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS, sem) != 0) goto out; goto try_again; } @@ -121,41 +128,35 @@ static inline struct rw_semaphore *__rws /* * wait for a lock to be granted */ -static inline struct rw_semaphore *rwsem_down_failed_common(struct rw_semaphore *sem, - struct rwsem_waiter *waiter, - signed long adjustment) +static struct rw_semaphore * +rwsem_down_failed_common(struct rw_semaphore *sem, + struct rwsem_waiter *waiter, signed long adjustment) { struct task_struct *tsk = current; signed long count; - set_task_state(tsk,TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ - spin_lock(&sem->wait_lock); waiter->task = tsk; + init_completion(&waiter->granted); - list_add_tail(&waiter->list,&sem->wait_list); - - /* note that we're now waiting on the lock, but no longer actively read-locking */ - count = rwsem_atomic_update(adjustment,sem); + spin_lock(&sem->wait_lock); + list_add_tail(&waiter->list, &sem->wait_list); - /* if there are no longer active locks, wake the front queued process(es) up - * - it might even be this process, since the waker takes a more active part + /* note that we're now waiting on the lock, but no longer actively + * read-locking */ + count = rwsem_atomic_update(adjustment, sem); + + /* if there are no longer active locks, wake the front queued + * process(es) up - it might even be this process, since the waker + * takes a more active part */ if (!(count & RWSEM_ACTIVE_MASK)) - sem = __rwsem_do_wake(sem,1); - - spin_unlock(&sem->wait_lock); + sem = __rwsem_do_wake(sem, 1); + else + spin_unlock(&sem->wait_lock); /* wait to be given the lock */ - for (;;) { - if (!waiter->flags) - break; - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - - tsk->state = TASK_RUNNING; + wait_for_completion(&waiter->granted); return sem; } @@ -163,32 +164,35 @@ static inline struct rw_semaphore *rwsem /* * wait for the read lock to be granted */ -struct rw_semaphore fastcall __sched *rwsem_down_read_failed(struct rw_semaphore *sem) +struct rw_semaphore fastcall __sched * +rwsem_down_read_failed(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - rwsemtrace(sem,"Entering rwsem_down_read_failed"); + rwsemtrace(sem, "Entering rwsem_down_read_failed"); waiter.flags = RWSEM_WAITING_FOR_READ; - rwsem_down_failed_common(sem,&waiter,RWSEM_WAITING_BIAS-RWSEM_ACTIVE_BIAS); + rwsem_down_failed_common(sem, &waiter, + RWSEM_WAITING_BIAS - RWSEM_ACTIVE_BIAS); - rwsemtrace(sem,"Leaving rwsem_down_read_failed"); + rwsemtrace(sem, "Leaving rwsem_down_read_failed"); return sem; } /* * wait for the write lock to be granted */ -struct rw_semaphore fastcall __sched *rwsem_down_write_failed(struct rw_semaphore *sem) +struct rw_semaphore fastcall __sched * +rwsem_down_write_failed(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - rwsemtrace(sem,"Entering rwsem_down_write_failed"); + rwsemtrace(sem, "Entering rwsem_down_write_failed"); waiter.flags = RWSEM_WAITING_FOR_WRITE; - rwsem_down_failed_common(sem,&waiter,-RWSEM_ACTIVE_BIAS); + rwsem_down_failed_common(sem, &waiter, -RWSEM_ACTIVE_BIAS); - rwsemtrace(sem,"Leaving rwsem_down_write_failed"); + rwsemtrace(sem, "Leaving rwsem_down_write_failed"); return sem; } @@ -198,39 +202,39 @@ struct rw_semaphore fastcall __sched *rw */ struct rw_semaphore fastcall *rwsem_wake(struct rw_semaphore *sem) { - rwsemtrace(sem,"Entering rwsem_wake"); + rwsemtrace(sem, "Entering rwsem_wake"); spin_lock(&sem->wait_lock); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem,1); - - spin_unlock(&sem->wait_lock); + sem = __rwsem_do_wake(sem, 1); + else + spin_unlock(&sem->wait_lock); - rwsemtrace(sem,"Leaving rwsem_wake"); + rwsemtrace(sem, "Leaving rwsem_wake"); return sem; } /* * downgrade a write lock into a read lock - * - caller incremented waiting part of count, and discovered it to be still negative + * - caller incremented waiting part of count, and discovered it to be still -ve * - just wake up any readers at the front of the queue */ struct rw_semaphore fastcall *rwsem_downgrade_wake(struct rw_semaphore *sem) { - rwsemtrace(sem,"Entering rwsem_downgrade_wake"); + rwsemtrace(sem, "Entering rwsem_downgrade_wake"); spin_lock(&sem->wait_lock); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem,0); + sem = __rwsem_do_wake(sem, 0); spin_unlock(&sem->wait_lock); - rwsemtrace(sem,"Leaving rwsem_downgrade_wake"); + rwsemtrace(sem, "Leaving rwsem_downgrade_wake"); return sem; } --- linux-2.6.6-rc1/lib/rwsem-spinlock.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/lib/rwsem-spinlock.c 2004-04-18 22:25:56.647218792 -0700 @@ -1,5 +1,5 @@ -/* rwsem-spinlock.c: R/W semaphores: contention handling functions for generic spinlock - * implementation +/* rwsem-spinlock.c: R/W semaphores: contention handling functions for generic + * spinlock implementation * * Copyright (c) 2001 David Howells (dhowells@redhat.com). * - Derived partially from idea by Andrea Arcangeli @@ -8,11 +8,13 @@ #include #include #include +#include struct rwsem_waiter { - struct list_head list; - struct task_struct *task; - unsigned int flags; + struct list_head list; + struct task_struct *task; + struct completion granted; + unsigned int flags; #define RWSEM_WAITING_FOR_READ 0x00000001 #define RWSEM_WAITING_FOR_WRITE 0x00000002 }; @@ -22,7 +24,8 @@ void rwsemtrace(struct rw_semaphore *sem { if (sem->debug) printk("[%d] %s({%d,%d})\n", - current->pid,str,sem->activity,list_empty(&sem->wait_list)?0:1); + current->pid, str, sem->activity, + list_empty(&sem->wait_list) ? 0 : 1); } #endif @@ -40,22 +43,25 @@ void fastcall init_rwsem(struct rw_semap } /* - * handle the lock being released whilst there are processes blocked on it that can now run + * handle the lock being released whilst there are processes blocked on it + * that can now run * - if we come here, then: * - the 'active count' _reached_ zero * - the 'waiting count' is non-zero * - the spinlock must be held by the caller - * - woken process blocks are discarded from the list after having flags zeroised + * - woken process blocks are discarded from the list after having flags zeroed * - writers are only woken if wakewrite is non-zero */ -static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) +static inline struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) { + LIST_HEAD(wake_list); struct rwsem_waiter *waiter; int woken; - rwsemtrace(sem,"Entering __rwsem_do_wake"); + rwsemtrace(sem, "Entering __rwsem_do_wake"); - waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list); + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (!wakewrite) { if (waiter->flags & RWSEM_WAITING_FOR_WRITE) @@ -63,54 +69,59 @@ static inline struct rw_semaphore *__rws goto dont_wake_writers; } - /* if we are allowed to wake writers try to grant a single write lock if there's a - * writer at the front of the queue - * - we leave the 'waiting count' incremented to signify potential contention + /* if we are allowed to wake writers try to grant a single write lock + * if there's a writer at the front of the queue - we leave the + * 'waiting count' incremented to signify potential contention */ if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { sem->activity = -1; - list_del(&waiter->list); - waiter->flags = 0; - wake_up_process(waiter->task); + list_move_tail(&waiter->list, &wake_list); goto out; } - /* grant an infinite number of read locks to the readers at the front of the queue */ - dont_wake_writers: + /* grant an infinite number of read locks to the readers at the front + * of the queue */ +dont_wake_writers: woken = 0; - while (waiter->flags&RWSEM_WAITING_FOR_READ) { - struct list_head *next = waiter->list.next; - - list_del(&waiter->list); - waiter->flags = 0; - wake_up_process(waiter->task); + while (waiter->flags & RWSEM_WAITING_FOR_READ) { + list_move_tail(&waiter->list, &wake_list); woken++; if (list_empty(&sem->wait_list)) break; - waiter = list_entry(next,struct rwsem_waiter,list); + waiter = list_entry(sem->wait_list.next, + struct rwsem_waiter, list); } sem->activity += woken; - out: - rwsemtrace(sem,"Leaving __rwsem_do_wake"); +out: + spin_unlock(&sem->wait_lock); + while (!list_empty(&wake_list)) { + waiter = list_entry(wake_list.next, struct rwsem_waiter, list); + list_del(&waiter->list); + complete(&waiter->granted); + } + + rwsemtrace(sem, "Leaving __rwsem_do_wake"); return sem; } /* - * wake a single writer + * wake a single writer. + * called with wait_lock locked and unlocks it in the process. */ -static inline struct rw_semaphore *__rwsem_wake_one_writer(struct rw_semaphore *sem) +static inline struct rw_semaphore * +__rwsem_wake_one_writer(struct rw_semaphore *sem) { struct rwsem_waiter *waiter; sem->activity = -1; - waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list); + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); list_del(&waiter->list); + spin_unlock(&sem->wait_lock); - waiter->flags = 0; - wake_up_process(waiter->task); + complete(&waiter->granted); return sem; } @@ -120,43 +131,32 @@ static inline struct rw_semaphore *__rws void fastcall __down_read(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - struct task_struct *tsk; - rwsemtrace(sem,"Entering __down_read"); + rwsemtrace(sem, "Entering __down_read"); spin_lock(&sem->wait_lock); - if (sem->activity>=0 && list_empty(&sem->wait_list)) { + if (sem->activity >= 0 && list_empty(&sem->wait_list)) { /* granted */ sem->activity++; spin_unlock(&sem->wait_lock); goto out; } - tsk = current; - set_task_state(tsk,TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ - waiter.task = tsk; + init_completion(&waiter.granted); + waiter.task = current; waiter.flags = RWSEM_WAITING_FOR_READ; - list_add_tail(&waiter.list,&sem->wait_list); + list_add_tail(&waiter.list, &sem->wait_list); /* we don't need to touch the semaphore struct anymore */ spin_unlock(&sem->wait_lock); - /* wait to be given the lock */ - for (;;) { - if (!waiter.flags) - break; - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - - tsk->state = TASK_RUNNING; + wait_for_completion(&waiter.granted); - out: - rwsemtrace(sem,"Leaving __down_read"); +out: + rwsemtrace(sem, "Leaving __down_read"); } /* @@ -165,11 +165,11 @@ void fastcall __down_read(struct rw_sema int fastcall __down_read_trylock(struct rw_semaphore *sem) { int ret = 0; - rwsemtrace(sem,"Entering __down_read_trylock"); + rwsemtrace(sem, "Entering __down_read_trylock"); spin_lock(&sem->wait_lock); - if (sem->activity>=0 && list_empty(&sem->wait_list)) { + if (sem->activity >= 0 && list_empty(&sem->wait_list)) { /* granted */ sem->activity++; ret = 1; @@ -177,54 +177,43 @@ int fastcall __down_read_trylock(struct spin_unlock(&sem->wait_lock); - rwsemtrace(sem,"Leaving __down_read_trylock"); + rwsemtrace(sem, "Leaving __down_read_trylock"); return ret; } /* * get a write lock on the semaphore - * - note that we increment the waiting count anyway to indicate an exclusive lock + * - note that we increment the waiting count anyway to indicate an exclusive + * lock */ void fastcall __down_write(struct rw_semaphore *sem) { struct rwsem_waiter waiter; - struct task_struct *tsk; - rwsemtrace(sem,"Entering __down_write"); + rwsemtrace(sem, "Entering __down_write"); spin_lock(&sem->wait_lock); - if (sem->activity==0 && list_empty(&sem->wait_list)) { + if (sem->activity == 0 && list_empty(&sem->wait_list)) { /* granted */ sem->activity = -1; spin_unlock(&sem->wait_lock); goto out; } - tsk = current; - set_task_state(tsk,TASK_UNINTERRUPTIBLE); - /* set up my own style of waitqueue */ - waiter.task = tsk; + init_completion(&waiter.granted); + waiter.task = current; waiter.flags = RWSEM_WAITING_FOR_WRITE; - list_add_tail(&waiter.list,&sem->wait_list); + list_add_tail(&waiter.list, &sem->wait_list); /* we don't need to touch the semaphore struct anymore */ spin_unlock(&sem->wait_lock); - /* wait to be given the lock */ - for (;;) { - if (!waiter.flags) - break; - schedule(); - set_task_state(tsk, TASK_UNINTERRUPTIBLE); - } - - tsk->state = TASK_RUNNING; - - out: - rwsemtrace(sem,"Leaving __down_write"); + wait_for_completion(&waiter.granted); +out: + rwsemtrace(sem, "Leaving __down_write"); } /* @@ -233,11 +222,11 @@ void fastcall __down_write(struct rw_sem int fastcall __down_write_trylock(struct rw_semaphore *sem) { int ret = 0; - rwsemtrace(sem,"Entering __down_write_trylock"); + rwsemtrace(sem, "Entering __down_write_trylock"); spin_lock(&sem->wait_lock); - if (sem->activity==0 && list_empty(&sem->wait_list)) { + if (sem->activity == 0 && list_empty(&sem->wait_list)) { /* granted */ sem->activity = -1; ret = 1; @@ -245,7 +234,7 @@ int fastcall __down_write_trylock(struct spin_unlock(&sem->wait_lock); - rwsemtrace(sem,"Leaving __down_write_trylock"); + rwsemtrace(sem, "Leaving __down_write_trylock"); return ret; } @@ -254,16 +243,16 @@ int fastcall __down_write_trylock(struct */ void fastcall __up_read(struct rw_semaphore *sem) { - rwsemtrace(sem,"Entering __up_read"); + rwsemtrace(sem, "Entering __up_read"); spin_lock(&sem->wait_lock); - if (--sem->activity==0 && !list_empty(&sem->wait_list)) + if (--sem->activity == 0 && !list_empty(&sem->wait_list)) sem = __rwsem_wake_one_writer(sem); + else + spin_unlock(&sem->wait_lock); - spin_unlock(&sem->wait_lock); - - rwsemtrace(sem,"Leaving __up_read"); + rwsemtrace(sem, "Leaving __up_read"); } /* @@ -271,17 +260,17 @@ void fastcall __up_read(struct rw_semaph */ void fastcall __up_write(struct rw_semaphore *sem) { - rwsemtrace(sem,"Entering __up_write"); + rwsemtrace(sem, "Entering __up_write"); spin_lock(&sem->wait_lock); sem->activity = 0; if (!list_empty(&sem->wait_list)) sem = __rwsem_do_wake(sem, 1); + else + spin_unlock(&sem->wait_lock); - spin_unlock(&sem->wait_lock); - - rwsemtrace(sem,"Leaving __up_write"); + rwsemtrace(sem, "Leaving __up_write"); } /* @@ -290,17 +279,17 @@ void fastcall __up_write(struct rw_semap */ void fastcall __downgrade_write(struct rw_semaphore *sem) { - rwsemtrace(sem,"Entering __downgrade_write"); + rwsemtrace(sem, "Entering __downgrade_write"); spin_lock(&sem->wait_lock); sem->activity = 1; if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem,0); - - spin_unlock(&sem->wait_lock); + sem = __rwsem_do_wake(sem, 0); + else + spin_unlock(&sem->wait_lock); - rwsemtrace(sem,"Leaving __downgrade_write"); + rwsemtrace(sem, "Leaving __downgrade_write"); } EXPORT_SYMBOL(init_rwsem); --- linux-2.6.6-rc1/MAINTAINERS 2004-04-14 23:14:47.000000000 -0700 +++ 25/MAINTAINERS 2004-04-18 22:25:30.352216240 -0700 @@ -1061,23 +1061,33 @@ M: tigran@veritas.com S: Maintained INTEL PRO/100 ETHERNET SUPPORT +P: John Ronciak +M: john.ronciak@intel.com +P: Ganesh Venkatesan +M: ganesh.venkatesan@intel.com P: Scott Feldman M: scott.feldman@intel.com +W: http://sourceforge.net/projects/e1000/ S: Supported INTEL PRO/1000 GIGABIT ETHERNET SUPPORT P: Jeb Cramer M: cramerj@intel.com -P: Scott Feldman -M: scott.feldman@intel.com +P: John Ronciak +M: john.ronciak@intel.com +P: Ganesh Venkatesan +M: ganesh.venkatesan@intel.com W: http://sourceforge.net/projects/e1000/ S: Supported INTEL PRO/10GbE SUPPORT +P: Ayyappan Veeraiyan +M: ayyappan.veeraiyan@intel.com P: Ganesh Venkatesan -M: Ganesh.Venkatesan@intel.com -P: Scott Feldman -M: scott.feldman@intel.com +M: ganesh.venkatesan@intel.com +P: John Ronciak +M: john.ronciak@intel.com +W: http://sourceforge.net/projects/e1000/ S: Supported INTERMEZZO FILE SYSTEM @@ -1192,6 +1202,12 @@ W: http://sf.net/projects/kernel-janitor W: http://developer.osdl.org/rddunlap/kj-patches/ S: Maintained +KGDB FOR I386 PLATFORM +P: George Anzinger +M: george@mvista.com +L: linux-net@vger.kernel.org +S: Supported + KERNEL NFSD P: Neil Brown M: neilb@cse.unsw.edu.au @@ -2266,9 +2282,8 @@ M: hirofumi@mail.parknet.co.jp L: linux-kernel@vger.kernel.org S: Maintained -VIA 82Cxxx AUDIO DRIVER +VIA 82Cxxx AUDIO DRIVER (old OSS driver) P: Jeff Garzik -L: linux-via@gtf.org S: Odd fixes VIA RHINE NETWORK DRIVER --- linux-2.6.6-rc1/Makefile 2004-04-14 23:14:47.000000000 -0700 +++ 25/Makefile 2004-04-18 22:25:31.607025480 -0700 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 6 -EXTRAVERSION =-rc1 +EXTRAVERSION =-rc1-mm1 NAME=Zonked Quokka # *DOCUMENTATION* @@ -461,6 +461,7 @@ endif ifdef CONFIG_DEBUG_INFO CFLAGS += -g +AFLAGS += -g endif # warn about C99 declaration after statement --- linux-2.6.6-rc1/mm/filemap.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/filemap.c 2004-04-18 22:25:25.026025944 -0700 @@ -62,7 +62,7 @@ * ->mapping->tree_lock * * ->i_sem - * ->i_shared_sem (truncate->invalidate_mmap_range) + * ->i_shared_sem (truncate->unmap_mapping_range) * * ->mmap_sem * ->i_shared_sem (various places) @@ -127,7 +127,7 @@ static inline int sync_page(struct page if (mapping->a_ops && mapping->a_ops->sync_page) return mapping->a_ops->sync_page(page); } else if (PageSwapCache(page)) { - swap_unplug_io_fn(NULL); + swap_unplug_io_fn(page); } return 0; } @@ -551,7 +551,7 @@ unsigned find_get_pages(struct address_s /* * Like find_get_pages, except we only return pages which are tagged with - * `tag'. We update *start to index the next page for the traversal. + * `tag'. We update *index to index the next page for the traversal. */ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, int tag, unsigned int nr_pages, struct page **pages) @@ -1363,11 +1363,7 @@ repeat: * If a nonlinear mapping then store the file page offset * in the pte. */ - unsigned long pgidx; - pgidx = (addr - vma->vm_start) >> PAGE_SHIFT; - pgidx += vma->vm_pgoff; - pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (pgoff != pgidx) { + if (pgoff != linear_page_index(vma, addr)) { err = install_file_pte(mm, vma, addr, pgoff, prot); if (err) return err; --- linux-2.6.6-rc1/mm/fremap.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/fremap.c 2004-04-18 22:25:32.959819824 -0700 @@ -36,7 +36,7 @@ static inline void zap_pte(struct mm_str if (!PageReserved(page)) { if (pte_dirty(pte)) set_page_dirty(page); - page_remove_rmap(page, ptep); + page_remove_rmap(page); page_cache_release(page); mm->rss--; } @@ -49,7 +49,7 @@ static inline void zap_pte(struct mm_str } /* - * Install a page to a given virtual memory address, release any + * Install a file page to a given virtual memory address, release any * previously existing mapping. */ int install_page(struct mm_struct *mm, struct vm_area_struct *vma, @@ -60,11 +60,13 @@ int install_page(struct mm_struct *mm, s pgd_t *pgd; pmd_t *pmd; pte_t pte_val; - struct pte_chain *pte_chain; - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto err; + /* + * We use page_add_file_rmap below: if install_page is + * ever extended to anonymous pages, this will warn us. + */ + BUG_ON(!page_mapping(page)); + pgd = pgd_offset(mm, addr); spin_lock(&mm->page_table_lock); @@ -81,18 +83,14 @@ int install_page(struct mm_struct *mm, s mm->rss++; flush_icache_page(vma, page); set_pte(pte, mk_pte(page, prot)); - pte_chain = page_add_rmap(page, pte, pte_chain); + page_add_file_rmap(page); pte_val = *pte; pte_unmap(pte); update_mmu_cache(vma, addr, pte_val); - spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); - return 0; + err = 0; err_unlock: spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); -err: return err; } EXPORT_SYMBOL(install_page); @@ -188,15 +186,18 @@ asmlinkage long sys_remap_file_pages(uns /* * Make sure the vma is shared, that it supports prefaulting, * and that the remapped range is valid and fully within - * the single existing vma: + * the single existing vma. vm_private_data is used as a + * swapout cursor in a VM_NONLINEAR vma (unless VM_RESERVED + * or VM_LOCKED, but VM_LOCKED could be revoked later on). */ if (vma && (vma->vm_flags & VM_SHARED) && + (!vma->vm_private_data || (vma->vm_flags & VM_RESERVED)) && vma->vm_ops && vma->vm_ops->populate && end > start && start >= vma->vm_start && end <= vma->vm_end) { /* Must set VM_NONLINEAR before any pages are populated. */ - if (pgoff != ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff) + if (pgoff != linear_page_index(vma, start)) vma->vm_flags |= VM_NONLINEAR; /* ->populate can take a long time, so downgrade the lock. */ --- linux-2.6.6-rc1/mm/highmem.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/mm/highmem.c 2004-04-18 22:26:02.149382336 -0700 @@ -26,7 +26,6 @@ #include #include #include -#include #include static mempool_t *page_pool, *isa_page_pool; --- linux-2.6.6-rc1/mm/madvise.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/mm/madvise.c 2004-04-18 22:25:25.027025792 -0700 @@ -95,7 +95,7 @@ static long madvise_dontneed(struct vm_a if (vma->vm_flags & VM_LOCKED) return -EINVAL; - zap_page_range(vma, start, end - start); + zap_page_range(vma, start, end - start, NULL); return 0; } --- linux-2.6.6-rc1/mm/Makefile 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/Makefile 2004-04-18 22:25:48.859402720 -0700 @@ -13,3 +13,4 @@ obj-y := bootmem.o filemap.o mempool.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o +obj-$(CONFIG_NUMA) += mempolicy.o --- linux-2.6.6-rc1/mm/memory.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/memory.c 2004-04-18 22:25:59.349807936 -0700 @@ -48,7 +48,6 @@ #include #include -#include #include #include #include @@ -105,7 +104,7 @@ static inline void free_one_pmd(struct m } page = pmd_page(*dir); pmd_clear(dir); - pgtable_remove_rmap(page); + dec_page_state(nr_page_table_pages); pte_free_tlb(tlb, page); } @@ -164,7 +163,7 @@ pte_t fastcall * pte_alloc_map(struct mm pte_free(new); goto out; } - pgtable_add_rmap(new, mm, address); + inc_page_state(nr_page_table_pages); pmd_populate(mm, pmd, new); } out: @@ -190,7 +189,6 @@ pte_t fastcall * pte_alloc_kernel(struct pte_free_kernel(new); goto out; } - pgtable_add_rmap(virt_to_page(new), mm, address); pmd_populate_kernel(mm, pmd, new); } out: @@ -217,20 +215,10 @@ int copy_page_range(struct mm_struct *ds unsigned long address = vma->vm_start; unsigned long end = vma->vm_end; unsigned long cow; - struct pte_chain *pte_chain = NULL; if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst, src, vma); - pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN); - if (!pte_chain) { - spin_unlock(&dst->page_table_lock); - pte_chain = pte_chain_alloc(GFP_KERNEL); - spin_lock(&dst->page_table_lock); - if (!pte_chain) - goto nomem; - } - cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; src_pgd = pgd_offset(src, address)-1; dst_pgd = pgd_offset(dst, address)-1; @@ -329,32 +317,8 @@ skip_copy_pte_range: pte = pte_mkold(pte); get_page(page); dst->rss++; - set_pte(dst_pte, pte); - pte_chain = page_add_rmap(page, dst_pte, - pte_chain); - if (pte_chain) - goto cont_copy_pte_range_noset; - pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN); - if (pte_chain) - goto cont_copy_pte_range_noset; - - /* - * pte_chain allocation failed, and we need to - * run page reclaim. - */ - pte_unmap_nested(src_pte); - pte_unmap(dst_pte); - spin_unlock(&src->page_table_lock); - spin_unlock(&dst->page_table_lock); - pte_chain = pte_chain_alloc(GFP_KERNEL); - spin_lock(&dst->page_table_lock); - if (!pte_chain) - goto nomem; - spin_lock(&src->page_table_lock); - dst_pte = pte_offset_map(dst_pmd, address); - src_pte = pte_offset_map_nested(src_pmd, - address); + page_dup_rmap(page); cont_copy_pte_range_noset: address += PAGE_SIZE; if (address >= end) { @@ -368,7 +332,7 @@ cont_copy_pte_range_noset: pte_unmap_nested(src_pte-1); pte_unmap(dst_pte-1); spin_unlock(&src->page_table_lock); - + cond_resched_lock(&dst->page_table_lock); cont_copy_pmd_range: src_pmd++; dst_pmd++; @@ -377,16 +341,24 @@ cont_copy_pmd_range: out_unlock: spin_unlock(&src->page_table_lock); out: - pte_chain_free(pte_chain); return 0; nomem: - pte_chain_free(pte_chain); return -ENOMEM; } -static void -zap_pte_range(struct mmu_gather *tlb, pmd_t * pmd, - unsigned long address, unsigned long size) +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct vm_area_struct *nonlinear_vma; /* Check page->index if set */ + struct address_space *check_mapping; /* Check page->mapping if set */ + pgoff_t first_index; /* Lowest page->index to unmap */ + pgoff_t last_index; /* Highest page->index to unmap */ +}; + +static void zap_pte_range(struct mmu_gather *tlb, + pmd_t *pmd, unsigned long address, + unsigned long size, struct zap_details *details) { unsigned long offset; pte_t *ptep; @@ -408,35 +380,64 @@ zap_pte_range(struct mmu_gather *tlb, pm if (pte_none(pte)) continue; if (pte_present(pte)) { + struct page *page = NULL; unsigned long pfn = pte_pfn(pte); - - pte = ptep_get_and_clear(ptep); - tlb_remove_tlb_entry(tlb, ptep, address+offset); if (pfn_valid(pfn)) { - struct page *page = pfn_to_page(pfn); - if (!PageReserved(page)) { - if (pte_dirty(pte)) - set_page_dirty(page); - if (pte_young(pte) && - page_mapping(page)) - mark_page_accessed(page); - tlb->freed++; - page_remove_rmap(page, ptep); - tlb_remove_page(tlb, page); - } + page = pfn_to_page(pfn); + if (PageReserved(page)) + page = NULL; } - } else { - if (!pte_file(pte)) - free_swap_and_cache(pte_to_swp_entry(pte)); - pte_clear(ptep); + if (unlikely(details) && page) { + /* + * unmap_shared_mapping_pages() wants to + * invalidate cache without truncating: + * unmap shared but keep private pages. + */ + if (details->check_mapping && + details->check_mapping != page->mapping) + continue; + /* + * Each page->index must be checked when + * invalidating or truncating nonlinear. + */ + if (details->nonlinear_vma && + (page->index < details->first_index || + page->index > details->last_index)) + continue; + } + pte = ptep_get_and_clear(ptep); + tlb_remove_tlb_entry(tlb, ptep, address+offset); + if (unlikely(!page)) + continue; + if (unlikely(details) && details->nonlinear_vma + && linear_page_index(details->nonlinear_vma, + address+offset) != page->index) + set_pte(ptep, pgoff_to_pte(page->index)); + if (pte_dirty(pte)) + set_page_dirty(page); + if (pte_young(pte) && page_mapping(page)) + mark_page_accessed(page); + tlb->freed++; + page_remove_rmap(page); + tlb_remove_page(tlb, page); + continue; } + /* + * If details->check_mapping, we leave swap entries; + * if details->nonlinear_vma, we leave file entries. + */ + if (unlikely(details)) + continue; + if (!pte_file(pte)) + free_swap_and_cache(pte_to_swp_entry(pte)); + pte_clear(ptep); } pte_unmap(ptep-1); } -static void -zap_pmd_range(struct mmu_gather *tlb, pgd_t * dir, - unsigned long address, unsigned long size) +static void zap_pmd_range(struct mmu_gather *tlb, + pgd_t * dir, unsigned long address, + unsigned long size, struct zap_details *details) { pmd_t * pmd; unsigned long end; @@ -453,28 +454,23 @@ zap_pmd_range(struct mmu_gather *tlb, pg if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) end = ((address + PGDIR_SIZE) & PGDIR_MASK); do { - zap_pte_range(tlb, pmd, address, end - address); + zap_pte_range(tlb, pmd, address, end - address, details); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); } -void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, - unsigned long address, unsigned long end) +static void unmap_page_range(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long address, + unsigned long end, struct zap_details *details) { pgd_t * dir; - if (is_vm_hugetlb_page(vma)) { - unmap_hugepage_range(vma, address, end); - return; - } - BUG_ON(address >= end); - dir = pgd_offset(vma->vm_mm, address); tlb_start_vma(tlb, vma); do { - zap_pmd_range(tlb, dir, address, end - address); + zap_pmd_range(tlb, dir, address, end - address, details); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; } while (address && (address < end)); @@ -504,6 +500,7 @@ void unmap_page_range(struct mmu_gather * @start_addr: virtual address at which to start unmapping * @end_addr: virtual address at which to end unmapping * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here + * @details: details of nonlinear truncation or shared cache invalidation * * Returns the number of vma's which were covered by the unmapping. * @@ -524,22 +521,14 @@ void unmap_page_range(struct mmu_gather */ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start_addr, - unsigned long end_addr, unsigned long *nr_accounted) + unsigned long end_addr, unsigned long *nr_accounted, + struct zap_details *details) { unsigned long zap_bytes = ZAP_BLOCK_SIZE; unsigned long tlb_start = 0; /* For tlb_finish_mmu */ int tlb_start_valid = 0; int ret = 0; - if (vma) { /* debug. killme. */ - if (end_addr <= vma->vm_start) - printk("%s: end_addr(0x%08lx) <= vm_start(0x%08lx)\n", - __FUNCTION__, end_addr, vma->vm_start); - if (start_addr >= vma->vm_end) - printk("%s: start_addr(0x%08lx) <= vm_end(0x%08lx)\n", - __FUNCTION__, start_addr, vma->vm_end); - } - for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { unsigned long start; unsigned long end; @@ -558,17 +547,20 @@ int unmap_vmas(struct mmu_gather **tlbp, while (start != end) { unsigned long block; - if (is_vm_hugetlb_page(vma)) - block = end - start; - else - block = min(zap_bytes, end - start); - if (!tlb_start_valid) { tlb_start = start; tlb_start_valid = 1; } - unmap_page_range(*tlbp, vma, start, start + block); + if (is_vm_hugetlb_page(vma)) { + block = end - start; + unmap_hugepage_range(vma, start, end); + } else { + block = min(zap_bytes, end - start); + unmap_page_range(*tlbp, vma, start, + start + block, details); + } + start += block; zap_bytes -= block; if ((long)zap_bytes > 0) @@ -582,9 +574,6 @@ int unmap_vmas(struct mmu_gather **tlbp, } zap_bytes = ZAP_BLOCK_SIZE; } - if (vma->vm_next && vma->vm_next->vm_start < vma->vm_end) - printk("%s: VMA list is not sorted correctly!\n", - __FUNCTION__); } return ret; } @@ -594,9 +583,10 @@ int unmap_vmas(struct mmu_gather **tlbp, * @vma: vm_area_struct holding the applicable pages * @address: starting address of pages to zap * @size: number of bytes to zap + * @details: details of nonlinear truncation or shared cache invalidation */ -void zap_page_range(struct vm_area_struct *vma, - unsigned long address, unsigned long size) +void zap_page_range(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details) { struct mm_struct *mm = vma->vm_mm; struct mmu_gather *tlb; @@ -613,7 +603,7 @@ void zap_page_range(struct vm_area_struc lru_add_drain(); spin_lock(&mm->page_table_lock); tlb = tlb_gather_mmu(mm, 0); - unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted); + unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details); tlb_finish_mmu(tlb, address, end); spin_unlock(&mm->page_table_lock); } @@ -629,11 +619,11 @@ follow_page(struct mm_struct *mm, unsign pmd_t *pmd; pte_t *ptep, pte; unsigned long pfn; - struct vm_area_struct *vma; + struct page *page; - vma = hugepage_vma(mm, address); - if (vma) - return follow_huge_addr(mm, vma, address, write); + page = follow_huge_addr(mm, address, write); + if (! IS_ERR(page)) + return page; pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || pgd_bad(*pgd)) @@ -719,6 +709,7 @@ int get_user_pages(struct task_struct *t struct page **pages, struct vm_area_struct **vmas) { int i; + int vm_io; unsigned int flags; /* @@ -762,8 +753,10 @@ int get_user_pages(struct task_struct *t continue; } - if (!vma || (pages && (vma->vm_flags & VM_IO)) - || !(flags & vma->vm_flags)) + if (!vma) + return i ? : -EFAULT; + vm_io = vma->vm_flags & VM_IO; + if ((pages && vm_io) || !(flags & vma->vm_flags)) return i ? : -EFAULT; if (is_vm_hugetlb_page(vma)) { @@ -773,8 +766,15 @@ int get_user_pages(struct task_struct *t } spin_lock(&mm->page_table_lock); do { - struct page *map; + struct page *map = NULL; int lookup_write = write; + + /* + * We don't follow pagetables for VM_IO regions - they + * may have no pageframes. + */ + if (vm_io) + goto no_follow; while (!(map = follow_page(mm, start, lookup_write))) { /* * Shortcut for anonymous pages. We don't want @@ -826,6 +826,7 @@ int get_user_pages(struct task_struct *t if (!PageReserved(pages[i])) page_cache_get(pages[i]); } +no_follow: if (vmas) vmas[i] = vma; i++; @@ -1052,7 +1053,6 @@ static int do_wp_page(struct mm_struct * { struct page *old_page, *new_page; unsigned long pfn = pte_pfn(pte); - struct pte_chain *pte_chain; pte_t entry; if (unlikely(!pfn_valid(pfn))) { @@ -1091,10 +1091,7 @@ static int do_wp_page(struct mm_struct * page_cache_get(old_page); spin_unlock(&mm->page_table_lock); - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto no_pte_chain; - new_page = alloc_page(GFP_HIGHUSER); + new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); if (!new_page) goto no_new_page; copy_cow_page(old_page,new_page,address); @@ -1107,10 +1104,11 @@ static int do_wp_page(struct mm_struct * if (pte_same(*page_table, pte)) { if (PageReserved(old_page)) ++mm->rss; - page_remove_rmap(old_page, page_table); + else + page_remove_rmap(old_page); break_cow(vma, new_page, address, page_table); - pte_chain = page_add_rmap(new_page, page_table, pte_chain); lru_cache_add_active(new_page); + page_add_anon_rmap(new_page, mm, address); /* Free the old page.. */ new_page = old_page; @@ -1119,57 +1117,54 @@ static int do_wp_page(struct mm_struct * page_cache_release(new_page); page_cache_release(old_page); spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); return VM_FAULT_MINOR; no_new_page: - pte_chain_free(pte_chain); -no_pte_chain: page_cache_release(old_page); return VM_FAULT_OOM; } /* - * Helper function for invalidate_mmap_range(). - * Both hba and hlen are page numbers in PAGE_SIZE units. - * An hlen of zero blows away the entire portion file after hba. - */ -static void -invalidate_mmap_range_list(struct list_head *head, - unsigned long const hba, - unsigned long const hlen) -{ - struct list_head *curr; - unsigned long hea; /* last page of hole. */ - unsigned long vba; - unsigned long vea; /* last page of corresponding uva hole. */ - struct vm_area_struct *vp; - unsigned long zba; - unsigned long zea; - - hea = hba + hlen - 1; /* avoid overflow. */ - if (hea < hba) - hea = ULONG_MAX; - list_for_each(curr, head) { - vp = list_entry(curr, struct vm_area_struct, shared); - vba = vp->vm_pgoff; - vea = vba + ((vp->vm_end - vp->vm_start) >> PAGE_SHIFT) - 1; - if (hea < vba || vea < hba) - continue; /* Mapping disjoint from hole. */ - zba = (hba <= vba) ? vba : hba; - zea = (vea <= hea) ? vea : hea; - zap_page_range(vp, - ((zba - vba) << PAGE_SHIFT) + vp->vm_start, - (zea - zba + 1) << PAGE_SHIFT); + * Helper function for unmap_mapping_range(). + */ +static void unmap_mapping_range_list(struct list_head *head, + struct zap_details *details) +{ + struct vm_area_struct *vma; + pgoff_t vba, vea, zba, zea; + + list_for_each_entry(vma, head, shared) { + if (unlikely(vma->vm_flags & VM_NONLINEAR)) { + details->nonlinear_vma = vma; + zap_page_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, details); + details->nonlinear_vma = NULL; + continue; + } + vba = vma->vm_pgoff; + vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1; + /* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */ + if (vba > details->last_index || vea < details->first_index) + continue; /* Mapping disjoint from hole. */ + zba = details->first_index; + if (zba < vba) + zba = vba; + zea = details->last_index; + if (zea > vea) + zea = vea; + zap_page_range(vma, + ((zba - vba) << PAGE_SHIFT) + vma->vm_start, + (zea - zba + 1) << PAGE_SHIFT, + details->check_mapping? details: NULL); } } /** - * invalidate_mmap_range - invalidate the portion of all mmaps + * unmap_mapping_range - unmap the portion of all mmaps * in the specified address_space corresponding to the specified * page range in the underlying file. - * @address_space: the address space containing mmaps to be invalidated. - * @holebegin: byte in first page to invalidate, relative to the start of + * @address_space: the address space containing mmaps to be unmapped. + * @holebegin: byte in first page to unmap, relative to the start of * the underlying file. This will be rounded down to a PAGE_SIZE * boundary. Note that this is different from vmtruncate(), which * must keep the partial page. In contrast, we must get rid of @@ -1177,31 +1172,45 @@ invalidate_mmap_range_list(struct list_h * @holelen: size of prospective hole in bytes. This will be rounded * up to a PAGE_SIZE boundary. A holelen of zero truncates to the * end of the file. + * @even_cows: 1 when truncating a file, unmap even private COWed pages; + * but 0 when invalidating pagecache, don't throw away private data. */ -void invalidate_mmap_range(struct address_space *mapping, - loff_t const holebegin, loff_t const holelen) +void unmap_mapping_range(struct address_space *mapping, + loff_t const holebegin, loff_t const holelen, int even_cows) { - unsigned long hba = holebegin >> PAGE_SHIFT; - unsigned long hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct zap_details details; + pgoff_t hba = holebegin >> PAGE_SHIFT; + pgoff_t hlen = (holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; /* Check for overflow. */ if (sizeof(holelen) > sizeof(hlen)) { long long holeend = (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (holeend & ~(long long)ULONG_MAX) hlen = ULONG_MAX - hba + 1; } + + details.check_mapping = even_cows? NULL: mapping; + details.nonlinear_vma = NULL; + details.first_index = hba; + details.last_index = hba + hlen - 1; + if (details.last_index < details.first_index) + details.last_index = ULONG_MAX; + down(&mapping->i_shared_sem); /* Protect against page fault */ atomic_inc(&mapping->truncate_count); if (unlikely(!list_empty(&mapping->i_mmap))) - invalidate_mmap_range_list(&mapping->i_mmap, hba, hlen); + unmap_mapping_range_list(&mapping->i_mmap, &details); + + /* Don't waste time to check mapping on fully shared vmas */ + details.check_mapping = NULL; + if (unlikely(!list_empty(&mapping->i_mmap_shared))) - invalidate_mmap_range_list(&mapping->i_mmap_shared, hba, hlen); + unmap_mapping_range_list(&mapping->i_mmap_shared, &details); up(&mapping->i_shared_sem); } -EXPORT_SYMBOL_GPL(invalidate_mmap_range); +EXPORT_SYMBOL(unmap_mapping_range); /* * Handle all mappings that got truncated by a "truncate()" @@ -1219,7 +1228,7 @@ int vmtruncate(struct inode * inode, lof if (inode->i_size < offset) goto do_expand; i_size_write(inode, offset); - invalidate_mmap_range(mapping, offset + PAGE_SIZE - 1, 0); + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); truncate_inode_pages(mapping, offset); goto out_truncate; @@ -1248,9 +1257,17 @@ EXPORT_SYMBOL(vmtruncate); * (1 << page_cluster) entries in the swap area. This method is chosen * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... + * + * This has been extended to use the NUMA policies from the mm triggering + * the readahead. + * + * Caller must hold down_read on the vma->vm_mm if vma is not NULL. */ -void swapin_readahead(swp_entry_t entry) +void swapin_readahead(swp_entry_t entry, unsigned long addr,struct vm_area_struct *vma) { +#ifdef CONFIG_NUMA + struct vm_area_struct *next_vma = vma ? vma->vm_next : NULL; +#endif int i, num; struct page *new_page; unsigned long offset; @@ -1262,10 +1279,31 @@ void swapin_readahead(swp_entry_t entry) for (i = 0; i < num; offset++, i++) { /* Ok, do the async read-ahead now */ new_page = read_swap_cache_async(swp_entry(swp_type(entry), - offset)); + offset), vma, addr); if (!new_page) break; page_cache_release(new_page); +#ifdef CONFIG_NUMA + /* + * Find the next applicable VMA for the NUMA policy. + */ + addr += PAGE_SIZE; + if (addr == 0) + vma = NULL; + if (vma) { + if (addr >= vma->vm_end) { + vma = next_vma; + next_vma = vma ? vma->vm_next : NULL; + } + if (vma && addr < vma->vm_start) + vma = NULL; + } else { + if (next_vma && addr >= next_vma->vm_start) { + vma = next_vma; + next_vma = vma->vm_next; + } + } +#endif } lru_add_drain(); /* Push any new pages onto the LRU now */ } @@ -1282,14 +1320,13 @@ static int do_swap_page(struct mm_struct swp_entry_t entry = pte_to_swp_entry(orig_pte); pte_t pte; int ret = VM_FAULT_MINOR; - struct pte_chain *pte_chain = NULL; pte_unmap(page_table); spin_unlock(&mm->page_table_lock); page = lookup_swap_cache(entry); if (!page) { - swapin_readahead(entry); - page = read_swap_cache_async(entry); + swapin_readahead(entry, address, vma); + page = read_swap_cache_async(entry, vma, address); if (!page) { /* * Back out if somebody else faulted in this pte while @@ -1312,11 +1349,6 @@ static int do_swap_page(struct mm_struct } mark_page_accessed(page); - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) { - ret = VM_FAULT_OOM; - goto out; - } lock_page(page); /* @@ -1342,20 +1374,28 @@ static int do_swap_page(struct mm_struct mm->rss++; pte = mk_pte(page, vma->vm_page_prot); - if (write_access && can_share_swap_page(page)) + if (write_access && can_share_swap_page(page)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); + write_access = 0; + } unlock_page(page); flush_icache_page(vma, page); set_pte(page_table, pte); - pte_chain = page_add_rmap(page, page_table, pte_chain); + page_add_anon_rmap(page, mm, address); + + if (write_access || mremap_moved_anon_rmap(page, address)) { + if (do_wp_page(mm, vma, address, + page_table, pmd, pte) == VM_FAULT_OOM) + ret = VM_FAULT_OOM; + goto out; + } /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); pte_unmap(page_table); spin_unlock(&mm->page_table_lock); out: - pte_chain_free(pte_chain); return ret; } @@ -1371,20 +1411,7 @@ do_anonymous_page(struct mm_struct *mm, { pte_t entry; struct page * page = ZERO_PAGE(addr); - struct pte_chain *pte_chain; - int ret; - pte_chain = pte_chain_alloc(GFP_ATOMIC | __GFP_NOWARN); - if (!pte_chain) { - pte_unmap(page_table); - spin_unlock(&mm->page_table_lock); - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto no_mem; - spin_lock(&mm->page_table_lock); - page_table = pte_offset_map(pmd, addr); - } - /* Read-only mapping of ZERO_PAGE. */ entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); @@ -1394,7 +1421,7 @@ do_anonymous_page(struct mm_struct *mm, pte_unmap(page_table); spin_unlock(&mm->page_table_lock); - page = alloc_page(GFP_HIGHUSER); + page = alloc_page_vma(GFP_HIGHUSER, vma, addr); if (!page) goto no_mem; clear_user_highpage(page, addr); @@ -1406,7 +1433,6 @@ do_anonymous_page(struct mm_struct *mm, pte_unmap(page_table); page_cache_release(page); spin_unlock(&mm->page_table_lock); - ret = VM_FAULT_MINOR; goto out; } mm->rss++; @@ -1415,24 +1441,19 @@ do_anonymous_page(struct mm_struct *mm, vma); lru_cache_add_active(page); mark_page_accessed(page); + page_add_anon_rmap(page, mm, addr); } set_pte(page_table, entry); - /* ignores ZERO_PAGE */ - pte_chain = page_add_rmap(page, page_table, pte_chain); pte_unmap(page_table); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); spin_unlock(&mm->page_table_lock); - ret = VM_FAULT_MINOR; - goto out; - -no_mem: - ret = VM_FAULT_OOM; out: - pte_chain_free(pte_chain); - return ret; + return VM_FAULT_MINOR; +no_mem: + return VM_FAULT_OOM; } /* @@ -1454,9 +1475,9 @@ do_no_page(struct mm_struct *mm, struct struct page * new_page; struct address_space *mapping = NULL; pte_t entry; - struct pte_chain *pte_chain; int sequence = 0; int ret = VM_FAULT_MINOR; + int anon = 0; if (!vma->vm_ops || !vma->vm_ops->nopage) return do_anonymous_page(mm, vma, page_table, @@ -1478,27 +1499,23 @@ retry: if (new_page == NOPAGE_OOM) return VM_FAULT_OOM; - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - goto oom; - /* * Should we do an early C-O-W break? */ if (write_access && !(vma->vm_flags & VM_SHARED)) { - struct page * page = alloc_page(GFP_HIGHUSER); + struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, address); if (!page) goto oom; copy_user_highpage(page, new_page, address); page_cache_release(new_page); - lru_cache_add_active(page); new_page = page; + anon = 1; } spin_lock(&mm->page_table_lock); /* * For a file-backed vma, someone could have truncated or otherwise - * invalidated this page. If invalidate_mmap_range got called, + * invalidated this page. If unmap_mapping_range got called, * retry getting the page. */ if (mapping && @@ -1506,7 +1523,6 @@ retry: sequence = atomic_read(&mapping->truncate_count); spin_unlock(&mm->page_table_lock); page_cache_release(new_page); - pte_chain_free(pte_chain); goto retry; } page_table = pte_offset_map(pmd, address); @@ -1530,7 +1546,11 @@ retry: if (write_access) entry = maybe_mkwrite(pte_mkdirty(entry), vma); set_pte(page_table, entry); - pte_chain = page_add_rmap(new_page, page_table, pte_chain); + if (anon) { + lru_cache_add_active(new_page); + page_add_anon_rmap(new_page, mm, address); + } else + page_add_file_rmap(new_page); pte_unmap(page_table); } else { /* One of our sibling threads was faster, back out. */ @@ -1548,7 +1568,6 @@ oom: page_cache_release(new_page); ret = VM_FAULT_OOM; out: - pte_chain_free(pte_chain); return ret; } --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/mm/mempolicy.c 2004-04-18 22:25:49.664280360 -0700 @@ -0,0 +1,1018 @@ +/* + * Simple NUMA memory policy for the Linux kernel. + * + * Copyright 2003,2004 Andi Kleen, SuSE Labs. + * Subject to the GNU Public License, version 2. + * + * NUMA policy allows the user to give hints in which node(s) memory should + * be allocated. + * + * Support four policies per VMA and per process: + * + * The VMA policy has priority over the process policy for a page fault. + * + * interleave Allocate memory interleaved over a set of nodes, + * with normal fallback if it fails. + * For VMA based allocations this interleaves based on the + * offset into the backing object or offset into the mapping + * for anonymous memory. For process policy an process counter + * is used. + * bind Only allocate memory on a specific set of nodes, + * no fallback. + * preferred Try a specific node first before normal fallback. + * As a special case node -1 here means do the allocation + * on the local CPU. This is normally identical to default, + * but useful to set in a VMA when you have a non default + * process policy. + * default Allocate on the local node first, or when on a VMA + * use the process policy. This is what Linux always did + * in a NUMA aware kernel and still does by, ahem, default. + * + * The process policy is applied for most non interrupt memory allocations + * in that process' context. Interrupts ignore the policies and always + * try to allocate on the local CPU. The VMA policy is only applied for memory + * allocations for a VMA in the VM. + * + * Currently there are a few corner cases in swapping where the policy + * is not applied, but the majority should be handled. When process policy + * is used it is not remembered over swap outs/swap ins. + * + * Only the highest zone in the zone hierarchy gets policied. Allocations + * requesting a lower zone just use default policy. This implies that + * on systems with highmem kernel lowmem allocation don't get policied. + * Same with GFP_DMA allocations. + * + * For shmfs/tmpfs/hugetlbfs shared memory the policy is shared between + * all users and remembered even when nobody has memory mapped. + */ + +/* Notebook: + fix mmap readahead to honour policy and enable policy for any page cache + object + statistics for bigpages + global policy for page cache? currently it uses process policy. Requires + first item above. + handle mremap for shared memory (currently ignored for the policy) + grows down? + make bind policy root only? It can trigger oom much faster and the + kernel is not always grateful with that. + could replace all the switch()es with a mempolicy_ops structure. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static kmem_cache_t *policy_cache; +static kmem_cache_t *sn_cache; + +#define PDprintk(fmt...) + +/* Highest zone. An specific allocation for a zone below that is not + policied. */ +static int policy_zone; + +static struct mempolicy default_policy = { + .refcnt = ATOMIC_INIT(1), /* never free it */ + .policy = MPOL_DEFAULT, +}; + +/* Check if all specified nodes are online */ +static int nodes_online(unsigned long *nodes) +{ + DECLARE_BITMAP(offline, MAX_NUMNODES); + + bitmap_copy(offline, node_online_map, MAX_NUMNODES); + if (bitmap_empty(offline, MAX_NUMNODES)) + set_bit(0, offline); + bitmap_complement(offline, MAX_NUMNODES); + bitmap_and(offline, offline, nodes, MAX_NUMNODES); + if (!bitmap_empty(offline, MAX_NUMNODES)) + return -EINVAL; + return 0; +} + +/* Do sanity checking on a policy */ +static int mpol_check_policy(int mode, unsigned long *nodes) +{ + int empty = bitmap_empty(nodes, MAX_NUMNODES); + + switch (mode) { + case MPOL_DEFAULT: + if (!empty) + return -EINVAL; + break; + case MPOL_BIND: + case MPOL_INTERLEAVE: + /* Preferred will only use the first bit, but allow + more for now. */ + if (empty) + return -EINVAL; + break; + } + return nodes_online(nodes); +} + +/* Copy a node mask from user space. */ +static int get_nodes(unsigned long *nodes, unsigned long *nmask, + unsigned long maxnode, int mode) +{ + unsigned long k; + unsigned long nlongs; + unsigned long endmask; + + --maxnode; + nlongs = BITS_TO_LONGS(maxnode); + if ((maxnode % BITS_PER_LONG) == 0) + endmask = ~0UL; + else + endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1; + + /* When the user specified more nodes than supported just check + if the non supported part is all zero. */ + if (nmask && nlongs > BITS_TO_LONGS(MAX_NUMNODES)) { + for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) { + unsigned long t; + if (get_user(t, nmask + k)) + return -EFAULT; + if (k == nlongs - 1) { + if (t & endmask) + return -EINVAL; + } else if (t) + return -EINVAL; + } + nlongs = BITS_TO_LONGS(MAX_NUMNODES); + endmask = ~0UL; + } + + bitmap_zero(nodes, MAX_NUMNODES); + if (nmask && copy_from_user(nodes, nmask, nlongs*sizeof(unsigned long))) + return -EFAULT; + nodes[nlongs-1] &= endmask; + return mpol_check_policy(mode, nodes); +} + +/* Generate a custom zonelist for the BIND policy. */ +static struct zonelist *bind_zonelist(unsigned long *nodes) +{ + struct zonelist *zl; + int num, max, nd; + + max = 1 + MAX_NR_ZONES * bitmap_weight(nodes, MAX_NUMNODES); + zl = kmalloc(sizeof(void *) * max, GFP_KERNEL); + if (!zl) + return NULL; + num = 0; + for (nd = find_first_bit(nodes, MAX_NUMNODES); + nd < MAX_NUMNODES; + nd = find_next_bit(nodes, MAX_NUMNODES, 1+nd)) { + int k; + for (k = MAX_NR_ZONES-1; k >= 0; k--) { + struct zone *z = &NODE_DATA(nd)->node_zones[k]; + if (!z->present_pages) + continue; + zl->zones[num++] = z; + if (k > policy_zone) + policy_zone = k; + } + } + BUG_ON(num >= max); + zl->zones[num] = NULL; + return zl; +} + +/* Create a new policy */ +static struct mempolicy *mpol_new(int mode, unsigned long *nodes) +{ + struct mempolicy *policy; + + PDprintk("setting mode %d nodes[0] %lx\n", mode, nodes[0]); + if (mode == MPOL_DEFAULT) + return NULL; + policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); + if (!policy) + return ERR_PTR(-ENOMEM); + atomic_set(&policy->refcnt, 1); + switch (mode) { + case MPOL_INTERLEAVE: + bitmap_copy(policy->v.nodes, nodes, MAX_NUMNODES); + break; + case MPOL_PREFERRED: + policy->v.preferred_node = find_first_bit(nodes, MAX_NUMNODES); + if (policy->v.preferred_node >= MAX_NUMNODES) + policy->v.preferred_node = -1; + break; + case MPOL_BIND: + policy->v.zonelist = bind_zonelist(nodes); + if (policy->v.zonelist == NULL) { + kmem_cache_free(policy_cache, policy); + return ERR_PTR(-ENOMEM); + } + break; + } + policy->policy = mode; + return policy; +} + +/* Ensure all existing pages follow the policy. */ +static int +verify_pages(unsigned long addr, unsigned long end, unsigned long *nodes) +{ + while (addr < end) { + struct page *p; + pte_t *pte; + pmd_t *pmd; + pgd_t *pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + addr = (addr + PGDIR_SIZE) & PGDIR_MASK; + continue; + } + pmd = pmd_offset(pgd, addr); + if (pmd_none(*pmd)) { + addr = (addr + PMD_SIZE) & PMD_MASK; + continue; + } + p = NULL; + pte = pte_offset_map(pmd, addr); + if (pte_present(*pte)) + p = pte_page(*pte); + pte_unmap(pte); + if (p) { + unsigned nid = page_to_nid(p); + if (!test_bit(nid, nodes)) + return -EIO; + } + addr += PAGE_SIZE; + } + return 0; +} + +/* Step 1: check the range */ +static struct vm_area_struct * +check_range(struct mm_struct *mm, unsigned long start, unsigned long end, + unsigned long *nodes, unsigned long flags) +{ + int err; + struct vm_area_struct *first, *vma, *prev; + + first = find_vma(mm, start); + if (!first) + return ERR_PTR(-EFAULT); + prev = NULL; + for (vma = first; vma->vm_start < end; vma = vma->vm_next) { + if (!vma->vm_next && vma->vm_end < end) + return ERR_PTR(-EFAULT); + if (prev && prev->vm_end < vma->vm_start) + return ERR_PTR(-EFAULT); + if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) { + err = verify_pages(vma->vm_start, vma->vm_end, nodes); + if (err) { + first = ERR_PTR(err); + break; + } + } + prev = vma; + } + return first; +} + +/* Apply policy to a single VMA */ +static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new) +{ + int err = 0; + struct mempolicy *old = vma->vm_policy; + + PDprintk("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n", + vma->vm_start, vma->vm_end, vma->vm_pgoff, + vma->vm_ops, vma->vm_file, + vma->vm_ops ? vma->vm_ops->set_policy : NULL); + + if (vma->vm_file) + down(&vma->vm_file->f_mapping->i_shared_sem); + if (vma->vm_ops && vma->vm_ops->set_policy) + err = vma->vm_ops->set_policy(vma, new); + if (!err) { + mpol_get(new); + vma->vm_policy = new; + mpol_free(old); + } + if (vma->vm_file) + up(&vma->vm_file->f_mapping->i_shared_sem); + return err; +} + +/* Step 2: apply policy to a range and do splits. */ +static int mbind_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end, struct mempolicy *new) +{ + struct vm_area_struct *next; + int err; + + err = 0; + for (; vma->vm_start < end; vma = next) { + next = vma->vm_next; + if (vma->vm_start < start) + err = split_vma(vma->vm_mm, vma, start, 1); + if (!err && vma->vm_end > end) + err = split_vma(vma->vm_mm, vma, end, 0); + if (!err) + err = policy_vma(vma, new); + if (err) + break; + } + return err; +} + +/* Change policy for a memory range */ +asmlinkage long sys_mbind(unsigned long start, unsigned long len, + unsigned long mode, + unsigned long *nmask, unsigned long maxnode, + unsigned flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + struct mempolicy *new; + unsigned long end; + DECLARE_BITMAP(nodes, MAX_NUMNODES); + int err; + + if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX) + return -EINVAL; + if (start & ~PAGE_MASK) + return -EINVAL; + if (mode == MPOL_DEFAULT) + flags &= ~MPOL_MF_STRICT; + len = (len + PAGE_SIZE - 1) & PAGE_MASK; + end = start + len; + if (end < start) + return -EINVAL; + if (end == start) + return 0; + + err = get_nodes(nodes, nmask, maxnode, mode); + if (err) + return err; + + new = mpol_new(mode, nodes); + if (IS_ERR(new)) + return PTR_ERR(new); + + PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len, + mode,nodes[0]); + + down_write(&mm->mmap_sem); + vma = check_range(mm, start, end, nodes, flags); + err = PTR_ERR(vma); + if (!IS_ERR(vma)) + err = mbind_range(vma, start, end, new); + up_write(&mm->mmap_sem); + mpol_free(new); + return err; +} + +/* Set the process memory policy */ +asmlinkage long sys_set_mempolicy(int mode, unsigned long *nmask, + unsigned long maxnode) +{ + int err; + struct mempolicy *new; + DECLARE_BITMAP(nodes, MAX_NUMNODES); + + if (mode > MPOL_MAX) + return -EINVAL; + err = get_nodes(nodes, nmask, maxnode, mode); + if (err) + return err; + new = mpol_new(mode, nodes); + if (IS_ERR(new)) + return PTR_ERR(new); + mpol_free(current->mempolicy); + current->mempolicy = new; + if (new && new->policy == MPOL_INTERLEAVE) + current->il_next = find_first_bit(new->v.nodes, MAX_NUMNODES); + return 0; +} + +/* Fill a zone bitmap for a policy */ +static void get_zonemask(struct mempolicy *p, unsigned long *nodes) +{ + int i; + + bitmap_zero(nodes, MAX_NUMNODES); + switch (p->policy) { + case MPOL_BIND: + for (i = 0; p->v.zonelist->zones[i]; i++) + __set_bit(p->v.zonelist->zones[i]->zone_pgdat->node_id, nodes); + break; + case MPOL_DEFAULT: + break; + case MPOL_INTERLEAVE: + bitmap_copy(nodes, p->v.nodes, MAX_NUMNODES); + break; + case MPOL_PREFERRED: + /* or use current node instead of online map? */ + if (p->v.preferred_node < 0) + bitmap_copy(nodes, node_online_map, MAX_NUMNODES); + else + __set_bit(p->v.preferred_node, nodes); + break; + default: + BUG(); + } +} + +static int lookup_node(struct mm_struct *mm, unsigned long addr) +{ + struct page *p; + int err; + + err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, 0, &p, NULL); + if (err >= 0) { + err = page_zone(p)->zone_pgdat->node_id; + put_page(p); + } + return err; +} + +/* Copy a kernel node mask to user space */ +static int copy_nodes_to_user(unsigned long *user_mask, unsigned long maxnode, + unsigned long *nodes) +{ + unsigned long copy = ALIGN(maxnode-1, 64) / 8; + + if (copy > sizeof(nodes)) { + if (copy > PAGE_SIZE) + return -EINVAL; + if (clear_user((char*)user_mask + sizeof(nodes), + copy - sizeof(nodes))) + return -EFAULT; + copy = sizeof(nodes); + } + return copy_to_user(user_mask, nodes, copy) ? -EFAULT : 0; +} + +/* Retrieve NUMA policy */ +asmlinkage long sys_get_mempolicy(int *policy, + unsigned long *nmask, unsigned long maxnode, + unsigned long addr, unsigned long flags) +{ + int err, pval; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = NULL; + struct mempolicy *pol = current->mempolicy; + + if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) + return -EINVAL; + if (nmask != NULL && maxnode < numnodes) + return -EINVAL; + if (flags & MPOL_F_ADDR) { + down_read(&mm->mmap_sem); + vma = find_vma_intersection(mm, addr, addr+1); + if (!vma) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + if (vma->vm_ops && vma->vm_ops->get_policy) + pol = vma->vm_ops->get_policy(vma, addr); + else + pol = vma->vm_policy; + } else if (addr) + return -EINVAL; + + if (!pol) + pol = &default_policy; + + if (flags & MPOL_F_NODE) { + if (flags & MPOL_F_ADDR) { + err = lookup_node(mm, addr); + if (err < 0) + goto out; + pval = err; + } else if (pol == current->mempolicy && + pol->policy == MPOL_INTERLEAVE) { + pval = current->il_next; + } else { + err = -EINVAL; + goto out; + } + } else + pval = pol->policy; + + err = -EFAULT; + if (policy && put_user(pval, policy)) + goto out; + + err = 0; + if (nmask) { + DECLARE_BITMAP(nodes, MAX_NUMNODES); + get_zonemask(pol, nodes); + err = copy_nodes_to_user(nmask, maxnode, nodes); + } + + out: + if (vma) + up_read(¤t->mm->mmap_sem); + return err; +} + +#ifdef CONFIG_COMPAT +/* The other functions are compatible */ +asmlinkage long compat_get_mempolicy(int *policy, + unsigned *nmask, unsigned maxnode, + unsigned addr, unsigned flags) +{ + long err; + unsigned long *nm = NULL; + if (nmask) + nm = compat_alloc_user_space(ALIGN(maxnode-1, 64) / 8); + err = sys_get_mempolicy(policy, nm, maxnode, addr, flags); + if (!err && copy_in_user(nmask, nm, ALIGN(maxnode-1, 32)/8)) + err = -EFAULT; + return err; +} +#endif + +/* Return effective policy for a VMA */ +static struct mempolicy * +get_vma_policy(struct vm_area_struct *vma, unsigned long addr) +{ + struct mempolicy *pol = current->mempolicy; + + if (vma) { + if (vma->vm_ops && vma->vm_ops->get_policy) + pol = vma->vm_ops->get_policy(vma, addr); + else if (vma->vm_policy && + vma->vm_policy->policy != MPOL_DEFAULT) + pol = vma->vm_policy; + } + if (!pol) + pol = &default_policy; + return pol; +} + +/* Return a zonelist representing a mempolicy */ +static struct zonelist *zonelist_policy(unsigned gfp, struct mempolicy *policy) +{ + int nd; + + switch (policy->policy) { + case MPOL_PREFERRED: + nd = policy->v.preferred_node; + if (nd < 0) + nd = numa_node_id(); + break; + case MPOL_BIND: + /* Lower zones don't get a policy applied */ + if (gfp >= policy_zone) + return policy->v.zonelist; + /*FALL THROUGH*/ + case MPOL_INTERLEAVE: /* should not happen */ + case MPOL_DEFAULT: + nd = numa_node_id(); + break; + default: + nd = 0; + BUG(); + } + return NODE_DATA(nd)->node_zonelists + (gfp & GFP_ZONEMASK); +} + +/* Do dynamic interleaving for a process */ +static unsigned interleave_nodes(struct mempolicy *policy) +{ + unsigned nid, next; + struct task_struct *me = current; + + nid = me->il_next; + BUG_ON(nid >= MAX_NUMNODES); + next = find_next_bit(policy->v.nodes, MAX_NUMNODES, 1+nid); + if (next >= MAX_NUMNODES) + next = find_first_bit(policy->v.nodes, MAX_NUMNODES); + me->il_next = next; + return nid; +} + +/* Do static interleaving for a VMA with known offset. */ +static unsigned offset_il_node(struct mempolicy *pol, + struct vm_area_struct *vma, unsigned long off) +{ + unsigned nnodes = bitmap_weight(pol->v.nodes, MAX_NUMNODES); + unsigned target = (unsigned)off % nnodes; + int c; + int nid = -1; + + c = 0; + do { + nid = find_next_bit(pol->v.nodes, MAX_NUMNODES, nid+1); + c++; + } while (c <= target); + BUG_ON(nid >= MAX_NUMNODES); + BUG_ON(!test_bit(nid, pol->v.nodes)); + return nid; +} + +/* Allocate a page in interleaved policy. + Own path because it needs to do special accounting. */ +static struct page *alloc_page_interleave(unsigned gfp, unsigned nid) +{ + struct zonelist *zl; + struct page *page; + + BUG_ON(!test_bit(nid, node_online_map)); + zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK); + page = __alloc_pages(gfp, 0, zl); + if (page && page_zone(page) == zl->zones[0]) { + zl->zones[0]->pageset[get_cpu()].interleave_hit++; + put_cpu(); + } + return page; +} + +/** + * alloc_page_vma - Allocate a page for a VMA. + * + * @gfp: + * %GFP_USER user allocation. + * %GFP_KERNEL kernel allocations, + * %GFP_HIGHMEM highmem/user allocations, + * %GFP_FS allocation should not call back into a file system. + * %GFP_ATOMIC don't sleep. + * + * @vma: Pointer to VMA or NULL if not available. + * @addr: Virtual Address of the allocation. Must be inside the VMA. + * + * This function allocates a page from the kernel page pool and applies + * a NUMA policy associated with the VMA or the current process. + * When VMA is not NULL caller must hold down_read on the mmap_sem of the + * mm_struct of the VMA to prevent it from going away. Should be used for + * all allocations for pages that will be mapped into + * user space. Returns NULL when no page can be allocated. + * + * Should be called with the mm_sem of the vma hold. + */ +struct page * +alloc_page_vma(unsigned gfp, struct vm_area_struct *vma, unsigned long addr) +{ + struct mempolicy *pol = get_vma_policy(vma, addr); + + if (unlikely(pol->policy == MPOL_INTERLEAVE)) { + unsigned nid; + if (vma) { + unsigned long off; + BUG_ON(addr >= vma->vm_end); + BUG_ON(addr < vma->vm_start); + off = vma->vm_pgoff; + off += (addr - vma->vm_start) >> PAGE_SHIFT; + nid = offset_il_node(pol, vma, off); + } else { + /* fall back to process interleaving */ + nid = interleave_nodes(pol); + } + return alloc_page_interleave(gfp, nid); + } + return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); +} + +/** + * alloc_pages_current - Allocate pages. + * + * @gfp: + * %GFP_USER user allocation, + * %GFP_KERNEL kernel allocation, + * %GFP_HIGHMEM highmem allocation, + * %GFP_FS don't call back into a file system. + * %GFP_ATOMIC don't sleep. + * @order: Power of two of allocation size in pages. 0 is a single page. + * + * Allocate a page from the kernel page pool. When not in + * interrupt context and apply the current process NUMA policy. + * Returns NULL when no page can be allocated. + */ +struct page *alloc_pages_current(unsigned gfp, unsigned order) +{ + struct mempolicy *pol = current->mempolicy; + + if (!pol || in_interrupt()) + pol = &default_policy; + if (pol->policy == MPOL_INTERLEAVE && order == 0) + return alloc_page_interleave(gfp, interleave_nodes(pol)); + return __alloc_pages(gfp, order, zonelist_policy(gfp, pol)); +} +EXPORT_SYMBOL(alloc_pages_current); + +/* Slow path of a mempolicy copy */ +struct mempolicy *__mpol_copy(struct mempolicy *old) +{ + struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL); + + if (!new) + return ERR_PTR(-ENOMEM); + *new = *old; + atomic_set(&new->refcnt, 1); + if (new->policy == MPOL_BIND) { + int sz = ksize(old->v.zonelist); + new->v.zonelist = kmalloc(sz, SLAB_KERNEL); + if (!new->v.zonelist) { + kmem_cache_free(policy_cache, new); + return ERR_PTR(-ENOMEM); + } + memcpy(new->v.zonelist, old->v.zonelist, sz); + } + return new; +} + +/* Slow path of a mempolicy comparison */ +int __mpol_equal(struct mempolicy *a, struct mempolicy *b) +{ + if (!a || !b) + return 0; + if (a->policy != b->policy) + return 0; + switch (a->policy) { + case MPOL_DEFAULT: + return 1; + case MPOL_INTERLEAVE: + return bitmap_equal(a->v.nodes, b->v.nodes, MAX_NUMNODES); + case MPOL_PREFERRED: + return a->v.preferred_node == b->v.preferred_node; + case MPOL_BIND: { + int i; + for (i = 0; a->v.zonelist->zones[i]; i++) + if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i]) + return 0; + return b->v.zonelist->zones[i] == NULL; + } + default: + BUG(); + return 0; + } +} + +/* Slow path of a mpol destructor. */ +extern void __mpol_free(struct mempolicy *p) +{ + if (!atomic_dec_and_test(&p->refcnt)) + return; + if (p->policy == MPOL_BIND) + kfree(p->v.zonelist); + p->policy = MPOL_DEFAULT; + kmem_cache_free(policy_cache, p); +} + +/* + * Hugetlb policy. Same as above, just works with node numbers instead of + * zonelists. + */ + +/* Find first node suitable for an allocation */ +int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) +{ + struct mempolicy *pol = get_vma_policy(vma, addr); + + switch (pol->policy) { + case MPOL_DEFAULT: + return numa_node_id(); + case MPOL_BIND: + return pol->v.zonelist->zones[0]->zone_pgdat->node_id; + case MPOL_INTERLEAVE: + return interleave_nodes(pol); + case MPOL_PREFERRED: + return pol->v.preferred_node >= 0 ? + pol->v.preferred_node : numa_node_id(); + } + BUG(); + return 0; +} + +/* Find secondary valid nodes for an allocation */ +int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) +{ + struct mempolicy *pol = get_vma_policy(vma, addr); + + switch (pol->policy) { + case MPOL_PREFERRED: + case MPOL_DEFAULT: + case MPOL_INTERLEAVE: + return 1; + case MPOL_BIND: { + struct zone **z; + for (z = pol->v.zonelist->zones; *z; z++) + if ((*z)->zone_pgdat->node_id == nid) + return 1; + return 0; + } + default: + BUG(); + return 0; + } +} + +/* + * Shared memory backing store policy support. + * + * Remember policies even when nobody has shared memory mapped. + * The policies are kept in Red-Black tree linked from the inode. + * They are protected by the sp->sem semaphore, which should be held + * for any accesses to the tree. + */ + +/* lookup first element intersecting start-end */ +/* Caller holds sp->sem */ +static struct sp_node * +sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end) +{ + struct rb_node *n = sp->root.rb_node; + + while (n) { + struct sp_node *p = rb_entry(n, struct sp_node, nd); + if (start >= p->end) { + n = n->rb_right; + } else if (end < p->start) { + n = n->rb_left; + } else { + break; + } + } + if (!n) + return NULL; + for (;;) { + struct sp_node *w = NULL; + struct rb_node *prev = rb_prev(n); + if (!prev) + break; + w = rb_entry(prev, struct sp_node, nd); + if (w->end <= start) + break; + n = prev; + } + return rb_entry(n, struct sp_node, nd); +} + +/* Insert a new shared policy into the list. */ +/* Caller holds sp->sem */ +static void sp_insert(struct shared_policy *sp, struct sp_node *new) +{ + struct rb_node **p = &sp->root.rb_node; + struct rb_node *parent = NULL; + struct sp_node *nd; + + while (*p) { + parent = *p; + nd = rb_entry(parent, struct sp_node, nd); + if (new->start < nd->start) + p = &(*p)->rb_left; + else if (new->end > nd->end) + p = &(*p)->rb_right; + else + BUG(); + } + rb_link_node(&new->nd, parent, p); + rb_insert_color(&new->nd, &sp->root); + PDprintk("inserting %lx-%lx: %d\n", new->start, new->end, + new->policy ? new->policy->policy : 0); +} + +/* Find shared policy intersecting idx */ +struct mempolicy * +mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx) +{ + struct mempolicy *pol = NULL; + struct sp_node *sn; + + down(&sp->sem); + sn = sp_lookup(sp, idx, idx+1); + if (sn) { + mpol_get(sn->policy); + pol = sn->policy; + } + up(&sp->sem); + return pol; +} + +static void sp_delete(struct shared_policy *sp, struct sp_node *n) +{ + PDprintk("deleting %lx-l%x\n", n->start, n->end); + rb_erase(&n->nd, &sp->root); + mpol_free(n->policy); + kmem_cache_free(sn_cache, n); +} + +struct sp_node * +sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol) +{ + struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL); + + if (!n) + return NULL; + n->start = start; + n->end = end; + mpol_get(pol); + n->policy = pol; + return n; +} + +/* Replace a policy range. */ +static int shared_policy_replace(struct shared_policy *sp, unsigned long start, + unsigned long end, struct sp_node *new) +{ + struct sp_node *n, *new2; + + down(&sp->sem); + n = sp_lookup(sp, start, end); + /* Take care of old policies in the same range. */ + while (n && n->start < end) { + struct rb_node *next = rb_next(&n->nd); + if (n->start >= start) { + if (n->end <= end) + sp_delete(sp, n); + else + n->start = end; + } else { + /* Old policy spanning whole new range. */ + if (n->end > end) { + new2 = sp_alloc(end, n->end, n->policy); + if (!new2) { + up(&sp->sem); + return -ENOMEM; + } + n->end = end; + sp_insert(sp, new2); + } + /* Old crossing beginning, but not end (easy) */ + if (n->start < start && n->end > start) + n->end = start; + } + if (!next) + break; + n = rb_entry(next, struct sp_node, nd); + } + if (new) + sp_insert(sp, new); + up(&sp->sem); + return 0; +} + +int mpol_set_shared_policy(struct shared_policy *info, + struct vm_area_struct *vma, struct mempolicy *npol) +{ + int err; + struct sp_node *new = NULL; + unsigned long sz = vma_pages(vma); + + PDprintk("set_shared_policy %lx sz %lu %d %lx\n", + vma->vm_pgoff, + sz, npol? npol->policy : -1, + npol ? npol->v.nodes[0] : -1); + + if (npol) { + new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol); + if (!new) + return -ENOMEM; + } + err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new); + if (err && new) + kmem_cache_free(sn_cache, new); + return err; +} + +/* Free a backing policy store on inode delete. */ +void mpol_free_shared_policy(struct shared_policy *p) +{ + struct sp_node *n; + struct rb_node *next; + + down(&p->sem); + next = rb_first(&p->root); + while (next) { + n = rb_entry(next, struct sp_node, nd); + next = rb_next(&n->nd); + rb_erase(&n->nd, &p->root); + mpol_free(n->policy); + kmem_cache_free(sn_cache, n); + } + up(&p->sem); +} + +static __init int numa_policy_init(void) +{ + policy_cache = kmem_cache_create("numa_policy", + sizeof(struct mempolicy), + 0, SLAB_PANIC, NULL, NULL); + + sn_cache = kmem_cache_create("shared_policy_node", + sizeof(struct sp_node), + 0, SLAB_PANIC, NULL, NULL); + return 0; +} +module_init(numa_policy_init); --- linux-2.6.6-rc1/mm/mincore.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/mm/mincore.c 2004-04-18 22:26:02.149382336 -0700 @@ -14,7 +14,6 @@ #include #include -#include /* * Later we can get more picky about what "in core" means precisely. --- linux-2.6.6-rc1/mm/mmap.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/mmap.c 2004-04-18 22:26:02.150382184 -0700 @@ -23,7 +23,6 @@ #include #include -#include #include /* @@ -333,8 +332,6 @@ static inline int is_mergeable_vma(struc return 0; if (vma->vm_flags != vm_flags) return 0; - if (vma->vm_private_data) - return 0; return 1; } @@ -389,7 +386,8 @@ static struct vm_area_struct *vma_merge( struct vm_area_struct *prev, struct rb_node *rb_parent, unsigned long addr, unsigned long end, unsigned long vm_flags, - struct file *file, unsigned long pgoff) + struct file *file, unsigned long pgoff, + struct mempolicy *policy) { spinlock_t *lock = &mm->page_table_lock; struct inode *inode = file ? file->f_dentry->d_inode : NULL; @@ -413,6 +411,7 @@ static struct vm_area_struct *vma_merge( * Can it merge with the predecessor? */ if (prev->vm_end == addr && + mpol_equal(vma_policy(prev), policy) && can_vma_merge_after(prev, vm_flags, file, pgoff)) { struct vm_area_struct *next; int need_up = 0; @@ -430,6 +429,7 @@ static struct vm_area_struct *vma_merge( */ next = prev->vm_next; if (next && prev->vm_end == next->vm_start && + vma_mpol_equal(prev, next) && can_vma_merge_before(next, vm_flags, file, pgoff, (end - addr) >> PAGE_SHIFT)) { prev->vm_end = next->vm_end; @@ -442,6 +442,7 @@ static struct vm_area_struct *vma_merge( fput(file); mm->map_count--; + mpol_free(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); return prev; } @@ -457,6 +458,8 @@ static struct vm_area_struct *vma_merge( prev = prev->vm_next; if (prev) { merge_next: + if (!mpol_equal(policy, vma_policy(prev))) + return 0; if (!can_vma_merge_before(prev, vm_flags, file, pgoff, (end - addr) >> PAGE_SHIFT)) return NULL; @@ -633,7 +636,7 @@ munmap_back: /* Can we just expand an old anonymous mapping? */ if (!file && !(vm_flags & VM_SHARED) && rb_parent) if (vma_merge(mm, prev, rb_parent, addr, addr + len, - vm_flags, NULL, 0)) + vm_flags, NULL, pgoff, NULL)) goto out; /* @@ -656,6 +659,7 @@ munmap_back: vma->vm_file = NULL; vma->vm_private_data = NULL; vma->vm_next = NULL; + mpol_set_vma_default(vma); INIT_LIST_HEAD(&vma->shared); if (file) { @@ -695,7 +699,9 @@ munmap_back: addr = vma->vm_start; if (!file || !rb_parent || !vma_merge(mm, prev, rb_parent, addr, - addr + len, vma->vm_flags, file, pgoff)) { + vma->vm_end, + vma->vm_flags, file, pgoff, + vma_policy(vma))) { vma_link(mm, vma, prev, rb_link, rb_parent); if (correct_wcount) atomic_inc(&inode->i_writecount); @@ -705,6 +711,7 @@ munmap_back: atomic_inc(&inode->i_writecount); fput(file); } + mpol_free(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); } out: @@ -728,7 +735,7 @@ unmap_and_free_vma: fput(file); /* Undo any partial mapping done by a device driver. */ - zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start); + zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL); free_vma: kmem_cache_free(vm_area_cachep, vma); unacct_error: @@ -1120,6 +1127,7 @@ static void unmap_vma(struct mm_struct * remove_shared_vm_struct(area); + mpol_free(vma_policy(area)); if (area->vm_ops && area->vm_ops->close) area->vm_ops->close(area); if (area->vm_file) @@ -1160,7 +1168,7 @@ static void unmap_region(struct mm_struc lru_add_drain(); tlb = tlb_gather_mmu(mm, 0); - unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted); + unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); if (is_hugepage_only_range(start, end - start)) @@ -1202,6 +1210,7 @@ detach_vmas_to_be_unmapped(struct mm_str int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr, int new_below) { + struct mempolicy *pol; struct vm_area_struct *new; struct address_space *mapping = NULL; @@ -1224,6 +1233,13 @@ int split_vma(struct mm_struct * mm, str new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); } + pol = mpol_copy(vma_policy(vma)); + if (IS_ERR(pol)) { + kmem_cache_free(vm_area_cachep, new); + return PTR_ERR(pol); + } + vma_set_policy(new, pol); + if (new->vm_file) get_file(new->vm_file); @@ -1393,7 +1409,7 @@ unsigned long do_brk(unsigned long addr, /* Can we just expand an old anonymous mapping? */ if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len, - flags, NULL, 0)) + flags, NULL, 0, NULL)) goto out; /* @@ -1414,6 +1430,7 @@ unsigned long do_brk(unsigned long addr, vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_private_data = NULL; + mpol_set_vma_default(vma); INIT_LIST_HEAD(&vma->shared); vma_link(mm, vma, prev, rb_link, rb_parent); @@ -1446,7 +1463,7 @@ void exit_mmap(struct mm_struct *mm) flush_cache_mm(mm); /* Use ~0UL here to ensure all VMAs in the mm are unmapped */ mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0, - ~0UL, &nr_accounted); + ~0UL, &nr_accounted, NULL); vm_unacct_memory(nr_accounted); BUG_ON(mm->map_count); /* This is just debugging */ clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD); @@ -1474,6 +1491,7 @@ void exit_mmap(struct mm_struct *mm) } if (vma->vm_file) fput(vma->vm_file); + mpol_free(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); vma = next; } @@ -1498,20 +1516,36 @@ void insert_vm_struct(struct mm_struct * * Copy the vma structure to a new location in the same mm, * prior to moving page table entries, to effect an mremap move. */ -struct vm_area_struct *copy_vma(struct vm_area_struct *vma, +struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, unsigned long addr, unsigned long len, unsigned long pgoff) { + struct vm_area_struct *vma = *vmap; + unsigned long vma_start = vma->vm_start; struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma, *prev; struct rb_node **rb_link, *rb_parent; + struct mempolicy *pol; find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); new_vma = vma_merge(mm, prev, rb_parent, addr, addr + len, - vma->vm_flags, vma->vm_file, pgoff); - if (!new_vma) { + vma->vm_flags, vma->vm_file, pgoff, vma_policy(vma)); + if (new_vma) { + /* + * Source vma may have been merged into new_vma + */ + if (vma_start >= new_vma->vm_start && + vma_start < new_vma->vm_end) + *vmap = new_vma; + } else { new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (new_vma) { *new_vma = *vma; + pol = mpol_copy(vma_policy(vma)); + if (IS_ERR(pol)) { + kmem_cache_free(vm_area_cachep, new_vma); + return NULL; + } + vma_set_policy(new_vma, pol); INIT_LIST_HEAD(&new_vma->shared); new_vma->vm_start = addr; new_vma->vm_end = addr + len; @@ -1525,24 +1559,3 @@ struct vm_area_struct *copy_vma(struct v } return new_vma; } - -/* - * Position vma after prev in shared file list: - * for mremap move error recovery racing against vmtruncate. - */ -void vma_relink_file(struct vm_area_struct *vma, struct vm_area_struct *prev) -{ - struct mm_struct *mm = vma->vm_mm; - struct address_space *mapping; - - if (vma->vm_file) { - mapping = vma->vm_file->f_mapping; - if (mapping) { - down(&mapping->i_shared_sem); - spin_lock(&mm->page_table_lock); - list_move(&vma->shared, &prev->shared); - spin_unlock(&mm->page_table_lock); - up(&mapping->i_shared_sem); - } - } -} --- linux-2.6.6-rc1/mm/mprotect.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/mm/mprotect.c 2004-04-18 22:26:02.150382184 -0700 @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -114,16 +113,19 @@ static int mprotect_attempt_merge(struct vm_area_struct *vma, struct vm_area_struct *prev, unsigned long end, int newflags) { - struct mm_struct * mm = vma->vm_mm; + struct mm_struct * mm; if (!prev || !vma) return 0; + mm = vma->vm_mm; if (prev->vm_end != vma->vm_start) return 0; if (!can_vma_merge(prev, newflags)) return 0; if (vma->vm_file || (vma->vm_flags & VM_SHARED)) return 0; + if (!vma_mpol_equal(vma, prev)) + return 0; /* * If the whole area changes to the protection of the previous one @@ -135,6 +137,7 @@ mprotect_attempt_merge(struct vm_area_st __vma_unlink(mm, vma, prev); spin_unlock(&mm->page_table_lock); + mpol_free(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); mm->map_count--; return 1; @@ -317,12 +320,14 @@ sys_mprotect(unsigned long start, size_t if (next && prev->vm_end == next->vm_start && can_vma_merge(next, prev->vm_flags) && + vma_mpol_equal(prev, next) && !prev->vm_file && !(prev->vm_flags & VM_SHARED)) { spin_lock(&prev->vm_mm->page_table_lock); prev->vm_end = next->vm_end; __vma_unlink(prev->vm_mm, next, prev); spin_unlock(&prev->vm_mm->page_table_lock); + mpol_free(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); prev->vm_mm->map_count--; } --- linux-2.6.6-rc1/mm/mremap.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/mremap.c 2004-04-18 22:26:02.151382032 -0700 @@ -19,7 +19,6 @@ #include #include -#include #include #include @@ -79,21 +78,19 @@ static inline pte_t *alloc_one_pte_map(s return pte; } -static void -copy_one_pte(struct vm_area_struct *vma, unsigned long old_addr, - pte_t *src, pte_t *dst, struct pte_chain **pte_chainp) +static inline int +can_move_one_pte(pte_t *src, unsigned long new_addr) { - pte_t pte = ptep_clear_flush(vma, old_addr, src); - set_pte(dst, pte); - - if (pte_present(pte)) { - unsigned long pfn = pte_pfn(pte); + int move = 1; + if (pte_present(*src)) { + unsigned long pfn = pte_pfn(*src); if (pfn_valid(pfn)) { struct page *page = pfn_to_page(pfn); - page_remove_rmap(page, src); - *pte_chainp = page_add_rmap(page, dst, *pte_chainp); + if (PageAnon(page)) + move = mremap_move_anon_rmap(page, new_addr); } } + return move; } static int @@ -103,13 +100,7 @@ move_one_page(struct vm_area_struct *vma struct mm_struct *mm = vma->vm_mm; int error = 0; pte_t *src, *dst; - struct pte_chain *pte_chain; - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) { - error = -ENOMEM; - goto out; - } spin_lock(&mm->page_table_lock); src = get_one_pte_map_nested(mm, old_addr); if (src) { @@ -130,23 +121,26 @@ move_one_page(struct vm_area_struct *vma * page_table_lock, we should re-check the src entry... */ if (src) { - if (dst) - copy_one_pte(vma, old_addr, src, - dst, &pte_chain); - else + if (!dst) error = -ENOMEM; + else if (!can_move_one_pte(src, new_addr)) + error = -EAGAIN; + else { + pte_t pte; + pte = ptep_clear_flush(vma, old_addr, src); + set_pte(dst, pte); + } pte_unmap_nested(src); } pte_unmap(dst); } spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); -out: return error; } static int move_page_tables(struct vm_area_struct *vma, - unsigned long new_addr, unsigned long old_addr, unsigned long len) + unsigned long new_addr, unsigned long old_addr, + unsigned long len, int *cows) { unsigned long offset; @@ -158,8 +152,23 @@ static int move_page_tables(struct vm_ar * only a few pages.. This also makes error recovery easier. */ for (offset = 0; offset < len; offset += PAGE_SIZE) { - if (move_one_page(vma, old_addr+offset, new_addr+offset) < 0) + int ret = move_one_page(vma, old_addr+offset, new_addr+offset); + /* + * The anonmm objrmap can only track anon page movements + * if the page is exclusive to one mm. In the rare case + * when mremap move is applied to a shared page, break + * COW (take a copy of the page) to make it exclusive. + * If shared while on swap, page will be copied when + * brought back in (if it's still shared by then). + */ + if (ret == -EAGAIN) { + ret = make_page_exclusive(vma, old_addr+offset); + offset -= PAGE_SIZE; + (*cows)++; + } + if (ret) break; + cond_resched(); } return offset; } @@ -169,12 +178,14 @@ static unsigned long move_vma(struct vm_ unsigned long new_len, unsigned long new_addr) { struct mm_struct *mm = vma->vm_mm; + struct address_space *mapping = NULL; struct vm_area_struct *new_vma; unsigned long vm_flags = vma->vm_flags; unsigned long new_pgoff; unsigned long moved_len; unsigned long excess = 0; int split = 0; + int cows = 0; /* * We'd prefer to avoid failure later on in do_munmap: @@ -184,30 +195,38 @@ static unsigned long move_vma(struct vm_ return -ENOMEM; new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); - new_vma = copy_vma(vma, new_addr, new_len, new_pgoff); + new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff); if (!new_vma) return -ENOMEM; - moved_len = move_page_tables(vma, new_addr, old_addr, old_len); + if (vma->vm_file) { + /* + * Subtle point from Rajesh Venkatasubramanian: before + * moving file-based ptes, we must lock vmtruncate out, + * since it might clean the dst vma before the src vma, + * and we propagate stale pages into the dst afterward. + */ + mapping = vma->vm_file->f_mapping; + down(&mapping->i_shared_sem); + } + moved_len = move_page_tables(vma, new_addr, old_addr, old_len, &cows); if (moved_len < old_len) { /* * On error, move entries back from new area to old, * which will succeed since page tables still there, * and then proceed to unmap new area instead of old. - * - * Subtle point from Rajesh Venkatasubramanian: before - * moving file-based ptes, move new_vma before old vma - * in the i_mmap or i_mmap_shared list, so when racing - * against vmtruncate we cannot propagate pages to be - * truncated back from new_vma into just cleaned old. */ - vma_relink_file(vma, new_vma); - move_page_tables(new_vma, old_addr, new_addr, moved_len); + move_page_tables(new_vma, old_addr, new_addr, moved_len, &cows); vma = new_vma; old_len = new_len; old_addr = new_addr; new_addr = -ENOMEM; } + if (cows) /* Downgrade or remove this message later */ + printk(KERN_WARNING "%s: mremap moved %d cows\n", + current->comm, cows); + if (mapping) + up(&mapping->i_shared_sem); /* Conceal VM_ACCOUNT so old reservation is not undone */ if (vm_flags & VM_ACCOUNT) { --- linux-2.6.6-rc1/mm/msync.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/mm/msync.c 2004-04-18 22:26:02.151382032 -0700 @@ -13,7 +13,6 @@ #include #include -#include #include /* --- linux-2.6.6-rc1/mm/nommu.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/nommu.c 2004-04-18 22:26:02.152381880 -0700 @@ -18,9 +18,7 @@ #include #include #include -#include -#include #include #include #include @@ -568,11 +566,3 @@ unsigned long get_unmapped_area(struct f { return -ENOMEM; } - -void pte_chain_init(void) -{ -} - -void swap_unplug_io_fn(struct backing_dev_info *) -{ -} --- linux-2.6.6-rc1/mm/page_alloc.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/page_alloc.c 2004-04-18 22:25:50.535147968 -0700 @@ -460,6 +460,32 @@ void drain_local_pages(void) } #endif /* CONFIG_PM */ +static void zone_statistics(struct zonelist *zonelist, struct zone *z) +{ +#ifdef CONFIG_NUMA + unsigned long flags; + int cpu; + pg_data_t *pg = z->zone_pgdat; + pg_data_t *orig = zonelist->zones[0]->zone_pgdat; + struct per_cpu_pageset *p; + + local_irq_save(flags); + cpu = smp_processor_id(); + p = &z->pageset[cpu]; + if (pg == orig) { + z->pageset[cpu].numa_hit++; + } else { + p->numa_miss++; + zonelist->zones[0]->pageset[cpu].numa_foreign++; + } + if (pg == NODE_DATA(numa_node_id())) + p->local_node++; + else + p->other_node++; + local_irq_restore(flags); +#endif +} + /* * Free a 0-order page */ @@ -593,8 +619,10 @@ __alloc_pages(unsigned int gfp_mask, uns if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, gfp_mask); - if (page) + if (page) { + zone_statistics(zonelist, z); goto got_pg; + } } } @@ -616,8 +644,10 @@ __alloc_pages(unsigned int gfp_mask, uns if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, gfp_mask); - if (page) + if (page) { + zone_statistics(zonelist, z); goto got_pg; + } } } @@ -630,8 +660,10 @@ rebalance: struct zone *z = zones[i]; page = buffered_rmqueue(z, order, gfp_mask); - if (page) + if (page) { + zone_statistics(zonelist, z); goto got_pg; + } } goto nopage; } @@ -658,8 +690,10 @@ rebalance: if (z->free_pages >= min || (!wait && z->free_pages >= z->pages_high)) { page = buffered_rmqueue(z, order, gfp_mask); - if (page) + if (page) { + zone_statistics(zonelist, z); goto got_pg; + } } } --- linux-2.6.6-rc1/mm/page-writeback.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/page-writeback.c 2004-04-18 22:25:47.305638928 -0700 @@ -91,6 +91,7 @@ int block_dump; * Flag that puts the machine in "laptop mode". */ int laptop_mode; +EXPORT_SYMBOL(laptop_mode); /* End of sysctl-exported parameters */ --- linux-2.6.6-rc1/mm/pdflush.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/mm/pdflush.c 2004-04-18 22:25:34.879527984 -0700 @@ -88,6 +88,8 @@ struct pdflush_work { unsigned long when_i_went_to_sleep; }; +static int wakeup_count = 100; + static int __pdflush(struct pdflush_work *my_work) { current->flags |= PF_FLUSHER; @@ -114,7 +116,10 @@ static int __pdflush(struct pdflush_work spin_lock_irq(&pdflush_lock); if (!list_empty(&my_work->list)) { - printk("pdflush: bogus wakeup!\n"); + if (wakeup_count > 0) { + wakeup_count--; + printk("pdflush: bogus wakeup!\n"); + } my_work->fn = NULL; continue; } @@ -190,6 +195,7 @@ int pdflush_operation(void (*fn)(unsigne { unsigned long flags; int ret = 0; + static int poke_count = 0; if (fn == NULL) BUG(); /* Hard to diagnose if it's deferred */ @@ -198,9 +204,19 @@ int pdflush_operation(void (*fn)(unsigne if (list_empty(&pdflush_list)) { spin_unlock_irqrestore(&pdflush_lock, flags); ret = -1; + if (wakeup_count < 100 && poke_count < 10) { + printk("%s: no threads\n", __FUNCTION__); + dump_stack(); + poke_count++; + } } else { struct pdflush_work *pdf; + if (wakeup_count < 100 && poke_count < 10) { + printk("%s: found a thread\n", __FUNCTION__); + dump_stack(); + poke_count++; + } pdf = list_entry(pdflush_list.next, struct pdflush_work, list); list_del_init(&pdf->list); if (list_empty(&pdflush_list)) --- linux-2.6.6-rc1/mm/rmap.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/rmap.c 2004-04-18 22:25:33.307766928 -0700 @@ -4,17 +4,14 @@ * Copyright 2001, Rik van Riel * Released under the General Public License (GPL). * - * - * Simple, low overhead pte-based reverse mapping scheme. - * This is kept modular because we may want to experiment - * with object-based reverse mapping schemes. Please try - * to keep this thing as modular as possible. + * Simple, low overhead reverse mapping scheme. + * Please try to keep this thing as modular as possible. */ /* * Locking: - * - the page->pte.chain is protected by the PG_maplock bit, - * which nests within the the mm->page_table_lock, + * - the page->mapcount field is protected by the PG_maplock bit, + * which nests within the mm->page_table_lock, * which nests within the page lock. * - because swapout locking is opposite to the locking order * in the page fault path, the swapout path uses trylocks @@ -27,106 +24,349 @@ #include #include #include -#include -#include -#include -#include -#include #include /* - * Something oopsable to put for now in the page->mapping - * of an anonymous page, to test that it is ignored. - */ -#define ANON_MAPPING_DEBUG ((struct address_space *) 0xADB) + * struct anonmm: to track a bundle of anonymous memory mappings. + * + * Could be embedded in mm_struct, but mm_struct is rather heavyweight, + * and we may need the anonmm to stay around long after the mm_struct + * and its pgd have been freed: because pages originally faulted into + * that mm have been duped into forked mms, and still need tracking. + */ +struct anonmm { + atomic_t count; /* ref count, including 1 per page */ + spinlock_t lock; /* head's locks list; others unused */ + struct mm_struct *mm; /* assoc mm_struct, NULL when gone */ + struct anonmm *head; /* exec starts new chain from head */ + struct list_head list; /* chain of associated anonmms */ +}; +static kmem_cache_t *anonmm_cachep; -static inline void clear_page_anon(struct page *page) +/** + ** Functions for creating and destroying struct anonmm. + **/ + +void __init init_rmap(void) { - BUG_ON(page->mapping != ANON_MAPPING_DEBUG); - page->mapping = NULL; - ClearPageAnon(page); + anonmm_cachep = kmem_cache_create("anonmm", + sizeof(struct anonmm), 0, SLAB_PANIC, NULL, NULL); } -/* - * Shared pages have a chain of pte_chain structures, used to locate - * all the mappings to this page. We only need a pointer to the pte - * here, the page struct for the page table page contains the process - * it belongs to and the offset within that process. - * - * We use an array of pte pointers in this structure to minimise cache misses - * while traversing reverse maps. - */ -#define NRPTE ((L1_CACHE_BYTES - sizeof(unsigned long))/sizeof(pte_addr_t)) - -/* - * next_and_idx encodes both the address of the next pte_chain and the - * offset of the lowest-index used pte in ptes[] (which is equal also - * to the offset of the highest-index unused pte in ptes[], plus one). - */ -struct pte_chain { - unsigned long next_and_idx; - pte_addr_t ptes[NRPTE]; -} ____cacheline_aligned; +int exec_rmap(struct mm_struct *mm) +{ + struct anonmm *anonmm; -kmem_cache_t *pte_chain_cache; + anonmm = kmem_cache_alloc(anonmm_cachep, SLAB_KERNEL); + if (unlikely(!anonmm)) + return -ENOMEM; -static inline struct pte_chain *pte_chain_next(struct pte_chain *pte_chain) -{ - return (struct pte_chain *)(pte_chain->next_and_idx & ~NRPTE); + atomic_set(&anonmm->count, 2); /* ref by mm and head */ + anonmm->lock = SPIN_LOCK_UNLOCKED; /* this lock is used */ + anonmm->mm = mm; + anonmm->head = anonmm; + INIT_LIST_HEAD(&anonmm->list); + mm->anonmm = anonmm; + return 0; } -static inline struct pte_chain *pte_chain_ptr(unsigned long pte_chain_addr) +int dup_rmap(struct mm_struct *mm, struct mm_struct *oldmm) { - return (struct pte_chain *)(pte_chain_addr & ~NRPTE); + struct anonmm *anonmm; + struct anonmm *anonhd = oldmm->anonmm->head; + + anonmm = kmem_cache_alloc(anonmm_cachep, SLAB_KERNEL); + if (unlikely(!anonmm)) + return -ENOMEM; + + /* + * copy_mm calls us before dup_mmap has reset the mm fields, + * so reset rss ourselves before adding to anonhd's list, + * to keep away from this mm until it's worth examining. + */ + mm->rss = 0; + + atomic_set(&anonmm->count, 1); /* ref by mm */ + anonmm->lock = SPIN_LOCK_UNLOCKED; /* this lock is not used */ + anonmm->mm = mm; + anonmm->head = anonhd; + spin_lock(&anonhd->lock); + atomic_inc(&anonhd->count); /* ref by anonmm's head */ + list_add_tail(&anonmm->list, &anonhd->list); + spin_unlock(&anonhd->lock); + mm->anonmm = anonmm; + return 0; +} + +void exit_rmap(struct mm_struct *mm) +{ + struct anonmm *anonmm = mm->anonmm; + struct anonmm *anonhd = anonmm->head; + + mm->anonmm = NULL; + spin_lock(&anonhd->lock); + anonmm->mm = NULL; + if (atomic_dec_and_test(&anonmm->count)) { + BUG_ON(anonmm == anonhd); + list_del(&anonmm->list); + kmem_cache_free(anonmm_cachep, anonmm); + if (atomic_dec_and_test(&anonhd->count)) + BUG(); + } + spin_unlock(&anonhd->lock); + if (atomic_read(&anonhd->count) == 1) { + BUG_ON(anonhd->mm); + BUG_ON(!list_empty(&anonhd->list)); + kmem_cache_free(anonmm_cachep, anonhd); + } } -static inline int pte_chain_idx(struct pte_chain *pte_chain) -{ - return pte_chain->next_and_idx & NRPTE; +static void free_anonmm(struct anonmm *anonmm) +{ + struct anonmm *anonhd = anonmm->head; + + BUG_ON(anonmm->mm); + BUG_ON(anonmm == anonhd); + spin_lock(&anonhd->lock); + list_del(&anonmm->list); + if (atomic_dec_and_test(&anonhd->count)) + BUG(); + spin_unlock(&anonhd->lock); + kmem_cache_free(anonmm_cachep, anonmm); } -static inline unsigned long -pte_chain_encode(struct pte_chain *pte_chain, int idx) +static inline void clear_page_anon(struct page *page) { - return (unsigned long)pte_chain | idx; + struct anonmm *anonmm = (struct anonmm *) page->mapping; + + page->mapping = NULL; + ClearPageAnon(page); + if (atomic_dec_and_test(&anonmm->count)) + free_anonmm(anonmm); } +/** + ** VM stuff below this comment + **/ + /* - * pte_chain list management policy: - * - * - If a page has a pte_chain list then it is shared by at least two processes, - * because a single sharing uses PageDirect. (Well, this isn't true yet, - * coz this code doesn't collapse singletons back to PageDirect on the remove - * path). - * - A pte_chain list has free space only in the head member - all succeeding - * members are 100% full. - * - If the head element has free space, it occurs in its leading slots. - * - All free space in the pte_chain is at the start of the head member. - * - Insertion into the pte_chain puts a pte pointer in the last free slot of - * the head member. - * - Removal from a pte chain moves the head pte of the head member onto the - * victim pte and frees the head member if it became empty. + * At what user virtual address is pgoff expected in file-backed vma? */ +static inline +unsigned long vma_address(struct vm_area_struct *vma, pgoff_t pgoff) +{ + unsigned long address; + + address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + return (address >= vma->vm_start && address < vma->vm_end)? + address: -EFAULT; +} /** - ** VM stuff below this comment + ** Subfunctions of page_referenced: page_referenced_one called + ** repeatedly from either page_referenced_anon or page_referenced_file. **/ +static int page_referenced_one(struct page *page, + struct mm_struct *mm, unsigned long address, + unsigned int *mapcount, int *failed) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int referenced = 0; + + if (!spin_trylock(&mm->page_table_lock)) { + /* + * For debug we're currently warning if not all found, + * but in this case that's expected: suppress warning. + */ + (*failed)++; + return 0; + } + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto out_unlock; + + pmd = pmd_offset(pgd, address); + if (!pmd_present(*pmd)) + goto out_unlock; + + pte = pte_offset_map(pmd, address); + if (!pte_present(*pte)) + goto out_unmap; + + if (page_to_pfn(page) != pte_pfn(*pte)) + goto out_unmap; + + if (ptep_test_and_clear_young(pte)) + referenced++; + + (*mapcount)--; + +out_unmap: + pte_unmap(pte); + +out_unlock: + spin_unlock(&mm->page_table_lock); + return referenced; +} + +static inline int page_referenced_anon(struct page *page) +{ + unsigned int mapcount = page->mapcount; + struct anonmm *anonmm = (struct anonmm *) page->mapping; + struct anonmm *anonhd = anonmm->head; + struct anonmm *new_anonmm = anonmm; + struct list_head *seek_head; + int referenced = 0; + int failed = 0; + + spin_lock(&anonhd->lock); + /* + * First try the indicated mm, it's the most likely. + * Make a note to migrate the page if this mm is extinct. + */ + if (!anonmm->mm) + new_anonmm = NULL; + else if (anonmm->mm->rss) { + referenced += page_referenced_one(page, + anonmm->mm, page->index, &mapcount, &failed); + if (!mapcount) + goto out; + } + + /* + * Then down the rest of the list, from that as the head. Stop + * when we reach anonhd? No: although a page cannot get dup'ed + * into an older mm, once swapped, its indicated mm may not be + * the oldest, just the first into which it was faulted back. + * If original mm now extinct, note first to contain the page. + */ + seek_head = &anonmm->list; + list_for_each_entry(anonmm, seek_head, list) { + if (!anonmm->mm || !anonmm->mm->rss) + continue; + referenced += page_referenced_one(page, + anonmm->mm, page->index, &mapcount, &failed); + if (!new_anonmm && mapcount < page->mapcount) + new_anonmm = anonmm; + if (!mapcount) { + anonmm = (struct anonmm *) page->mapping; + if (new_anonmm == anonmm) + goto out; + goto migrate; + } + } + + /* + * The warning below may appear if page_referenced catches the + * page in between page_add_rmap and its replacement demanded + * by mremap_moved_anon_page: so remove the warning once we're + * convinced that anonmm rmap really is finding its pages. + */ + WARN_ON(!failed); +out: + spin_unlock(&anonhd->lock); + return referenced; + +migrate: + /* + * Migrate pages away from an extinct mm, so that its anonmm + * can be freed in due course: we could leave this to happen + * through the natural attrition of try_to_unmap, but that + * would miss locked pages and frequently referenced pages. + */ + spin_unlock(&anonhd->lock); + page->mapping = (void *) new_anonmm; + atomic_inc(&new_anonmm->count); + if (atomic_dec_and_test(&anonmm->count)) + free_anonmm(anonmm); + return referenced; +} + +/** + * page_referenced_file - referenced check for object-based rmap + * @page: the page we're checking references on. + * + * For an object-based mapped page, find all the places it is mapped and + * check/clear the referenced flag. This is done by following the page->mapping + * pointer, then walking the chain of vmas it holds. It returns the number + * of references it found. + * + * This function is only called from page_referenced for object-based pages. + * + * The semaphore address_space->i_shared_sem is tried. If it can't be gotten, + * assume a reference count of 0, so try_to_unmap will then have a go. + */ +static inline int page_referenced_file(struct page *page) +{ + unsigned int mapcount = page->mapcount; + struct address_space *mapping = page->mapping; + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + struct vm_area_struct *vma; + unsigned long address; + int referenced = 0; + int failed = 0; + + if (down_trylock(&mapping->i_shared_sem)) + return 0; + + list_for_each_entry(vma, &mapping->i_mmap, shared) { + address = vma_address(vma, pgoff); + if (address == -EFAULT) + continue; + if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) + == (VM_LOCKED|VM_MAYSHARE)) { + referenced++; + goto out; + } + if (vma->vm_mm->rss) { + referenced += page_referenced_one(page, + vma->vm_mm, address, &mapcount, &failed); + if (!mapcount) + goto out; + } + } + + list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { + if (unlikely(vma->vm_flags & VM_NONLINEAR)) { + failed++; + continue; + } + address = vma_address(vma, pgoff); + if (address == -EFAULT) + continue; + if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) { + referenced++; + goto out; + } + if (vma->vm_mm->rss) { + referenced += page_referenced_one(page, + vma->vm_mm, address, &mapcount, &failed); + if (!mapcount) + goto out; + } + } + + WARN_ON(!failed); +out: + up(&mapping->i_shared_sem); + return referenced; +} + /** * page_referenced - test if the page was referenced * @page: the page to test * * Quick test_and_clear_referenced for all mappings to a page, - * returns the number of processes which referenced the page. + * returns the number of ptes which referenced the page. * Caller needs to hold the rmap lock. - * - * If the page has a single-entry pte_chain, collapse that back to a PageDirect - * representation. This way, it's only done under memory pressure. */ -int fastcall page_referenced(struct page * page) +int fastcall page_referenced(struct page *page) { - struct pte_chain *pc; int referenced = 0; if (page_test_and_clear_young(page)) @@ -135,218 +375,175 @@ int fastcall page_referenced(struct page if (TestClearPageReferenced(page)) referenced++; - if (PageDirect(page)) { - pte_t *pte = rmap_ptep_map(page->pte.direct); - if (ptep_test_and_clear_young(pte)) - referenced++; - rmap_ptep_unmap(pte); - } else { - int nr_chains = 0; - - /* Check all the page tables mapping this page. */ - for (pc = page->pte.chain; pc; pc = pte_chain_next(pc)) { - int i; - - for (i = pte_chain_idx(pc); i < NRPTE; i++) { - pte_addr_t pte_paddr = pc->ptes[i]; - pte_t *p; - - p = rmap_ptep_map(pte_paddr); - if (ptep_test_and_clear_young(p)) - referenced++; - rmap_ptep_unmap(p); - nr_chains++; - } - } - if (nr_chains == 1) { - pc = page->pte.chain; - page->pte.direct = pc->ptes[NRPTE-1]; - SetPageDirect(page); - pc->ptes[NRPTE-1] = 0; - __pte_chain_free(pc); - } + if (page->mapcount && page->mapping) { + if (PageAnon(page)) + referenced += page_referenced_anon(page); + else + referenced += page_referenced_file(page); } return referenced; } /** - * page_add_rmap - add reverse mapping entry to a page - * @page: the page to add the mapping to - * @ptep: the page table entry mapping this page + * page_add_anon_rmap - add pte mapping to an anonymous page + * @page: the page to add the mapping to + * @mm: the mm in which the mapping is added + * @address: the user virtual address mapped * - * Add a new pte reverse mapping to a page. * The caller needs to hold the mm->page_table_lock. */ -struct pte_chain * fastcall -page_add_rmap(struct page *page, pte_t *ptep, struct pte_chain *pte_chain) +void fastcall page_add_anon_rmap(struct page *page, + struct mm_struct *mm, unsigned long address) { - pte_addr_t pte_paddr = ptep_to_paddr(ptep); - struct pte_chain *cur_pte_chain; + struct anonmm *anonmm = mm->anonmm; - if (PageReserved(page)) - return pte_chain; + BUG_ON(PageReserved(page)); + BUG_ON(page_mapping(page)); rmap_lock(page); - - if (page->pte.direct == 0) { - page->pte.direct = pte_paddr; - SetPageDirect(page); - if (!page->mapping) { - SetPageAnon(page); - page->mapping = ANON_MAPPING_DEBUG; - } + if (!page->mapcount) { + SetPageAnon(page); + page->index = address & PAGE_MASK; + page->mapping = (void *) anonmm; + atomic_inc(&anonmm->count); inc_page_state(nr_mapped); - goto out; } - - if (PageDirect(page)) { - /* Convert a direct pointer into a pte_chain */ - ClearPageDirect(page); - pte_chain->ptes[NRPTE-1] = page->pte.direct; - pte_chain->ptes[NRPTE-2] = pte_paddr; - pte_chain->next_and_idx = pte_chain_encode(NULL, NRPTE-2); - page->pte.direct = 0; - page->pte.chain = pte_chain; - pte_chain = NULL; /* We consumed it */ - goto out; - } - - cur_pte_chain = page->pte.chain; - if (cur_pte_chain->ptes[0]) { /* It's full */ - pte_chain->next_and_idx = pte_chain_encode(cur_pte_chain, - NRPTE - 1); - page->pte.chain = pte_chain; - pte_chain->ptes[NRPTE-1] = pte_paddr; - pte_chain = NULL; /* We consumed it */ - goto out; - } - cur_pte_chain->ptes[pte_chain_idx(cur_pte_chain) - 1] = pte_paddr; - cur_pte_chain->next_and_idx--; -out: + page->mapcount++; rmap_unlock(page); - return pte_chain; } /** - * page_remove_rmap - take down reverse mapping to a page - * @page: page to remove mapping from - * @ptep: page table entry to remove + * page_add_file_rmap - add pte mapping to a file page + * @page: the page to add the mapping to * - * Removes the reverse mapping from the pte_chain of the page, - * after that the caller can clear the page table entry and free - * the page. - * Caller needs to hold the mm->page_table_lock. + * The caller needs to hold the mm->page_table_lock. */ -void fastcall page_remove_rmap(struct page *page, pte_t *ptep) +void fastcall page_add_file_rmap(struct page *page) { - pte_addr_t pte_paddr = ptep_to_paddr(ptep); - struct pte_chain *pc; - + BUG_ON(PageAnon(page)); if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) return; rmap_lock(page); + if (!page->mapcount) + inc_page_state(nr_mapped); + page->mapcount++; + rmap_unlock(page); +} - if (!page_mapped(page)) - goto out_unlock; /* remap_page_range() from a driver? */ +/** + * page_remove_rmap - take down pte mapping from a page + * @page: page to remove mapping from + * + * Caller needs to hold the mm->page_table_lock. + */ +void fastcall page_remove_rmap(struct page *page) +{ + BUG_ON(PageReserved(page)); + BUG_ON(!page->mapcount); - if (PageDirect(page)) { - if (page->pte.direct == pte_paddr) { - page->pte.direct = 0; - ClearPageDirect(page); - goto out; - } - } else { - struct pte_chain *start = page->pte.chain; - struct pte_chain *next; - int victim_i = pte_chain_idx(start); - - for (pc = start; pc; pc = next) { - int i; - - next = pte_chain_next(pc); - if (next) - prefetch(next); - for (i = pte_chain_idx(pc); i < NRPTE; i++) { - pte_addr_t pa = pc->ptes[i]; - - if (pa != pte_paddr) - continue; - pc->ptes[i] = start->ptes[victim_i]; - start->ptes[victim_i] = 0; - if (victim_i == NRPTE-1) { - /* Emptied a pte_chain */ - page->pte.chain = pte_chain_next(start); - __pte_chain_free(start); - } else { - start->next_and_idx++; - } - goto out; - } - } - } -out: - if (!page_mapped(page)) { + rmap_lock(page); + page->mapcount--; + if (!page->mapcount) { if (page_test_and_clear_dirty(page)) set_page_dirty(page); if (PageAnon(page)) clear_page_anon(page); dec_page_state(nr_mapped); } -out_unlock: rmap_unlock(page); } /** - * try_to_unmap_one - worker function for try_to_unmap - * @page: page to unmap - * @ptep: page table entry to unmap from page - * - * Internal helper function for try_to_unmap, called for each page - * table entry mapping a page. Because locking order here is opposite - * to the locking order used by the page fault path, we use trylocks. - * Locking: - * page lock shrink_list(), trylock - * rmap lock shrink_list() - * mm->page_table_lock try_to_unmap_one(), trylock - */ -static int fastcall try_to_unmap_one(struct page * page, pte_addr_t paddr) -{ - pte_t *ptep = rmap_ptep_map(paddr); - unsigned long address = ptep_to_address(ptep); - struct mm_struct * mm = ptep_to_mm(ptep); - struct vm_area_struct * vma; - pte_t pte; - int ret; + * mremap_move_anon_rmap - try to note new address of anonymous page + * @page: page about to be moved + * @address: user virtual address at which it is going to be mapped + * + * Returns boolean, true if page is not shared, so address updated. + * + * For mremap's can_move_one_page: to update address when vma is moved, + * provided that anon page is not shared with a parent or child mm. + * If it is shared, then caller must take a copy of the page instead: + * not very clever, but too rare a case to merit cleverness. + */ +int fastcall mremap_move_anon_rmap(struct page *page, unsigned long address) +{ + int move = 0; + if (page->mapcount == 1) { + rmap_lock(page); + if (page->mapcount == 1) { + page->index = address & PAGE_MASK; + move = 1; + } + rmap_unlock(page); + } + return move; +} - if (!mm) - BUG(); +/** + ** Subfunctions of try_to_unmap: try_to_unmap_one called + ** repeatedly from either try_to_unmap_anon or try_to_unmap_file. + **/ + +static int try_to_unmap_one(struct page *page, + struct mm_struct *mm, unsigned long address, + unsigned int *mapcount, struct vm_area_struct *vma) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + pte_t pteval; + int ret = SWAP_AGAIN; /* * We need the page_table_lock to protect us from page faults, * munmap, fork, etc... */ - if (!spin_trylock(&mm->page_table_lock)) { - rmap_ptep_unmap(ptep); - return SWAP_AGAIN; - } + if (!spin_trylock(&mm->page_table_lock)) + goto out; - /* unmap_vmas drops page_table_lock with vma unlinked */ - vma = find_vma(mm, address); - if (!vma) { - ret = SWAP_FAIL; + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto out_unlock; + + pmd = pmd_offset(pgd, address); + if (!pmd_present(*pmd)) goto out_unlock; + + pte = pte_offset_map(pmd, address); + if (!pte_present(*pte)) + goto out_unmap; + + if (page_to_pfn(page) != pte_pfn(*pte)) + goto out_unmap; + + (*mapcount)--; + + if (!vma) { + vma = find_vma(mm, address); + /* unmap_vmas drops page_table_lock with vma unlinked */ + if (!vma) + goto out_unmap; } - /* The page is mlock()d, we cannot swap it out. */ - if (vma->vm_flags & VM_LOCKED) { + /* + * If the page is mlock()d, we cannot swap it out. + * If it's recently referenced (perhaps page_referenced + * skipped over this mm) then we should reactivate it. + */ + if ((vma->vm_flags & (VM_LOCKED|VM_RESERVED)) || + ptep_test_and_clear_young(pte)) { ret = SWAP_FAIL; - goto out_unlock; + goto out_unmap; } /* Nuke the page table entry. */ flush_cache_page(vma, address); - pte = ptep_clear_flush(vma, address, ptep); + pteval = ptep_clear_flush(vma, address, pte); + + /* Move the dirty bit to the physical page now the pte is gone. */ + if (pte_dirty(pteval)) + set_page_dirty(page); if (PageAnon(page)) { swp_entry_t entry = { .val = page->private }; @@ -356,193 +553,311 @@ static int fastcall try_to_unmap_one(str */ BUG_ON(!PageSwapCache(page)); swap_duplicate(entry); - set_pte(ptep, swp_entry_to_pte(entry)); - BUG_ON(pte_file(*ptep)); - } else { - unsigned long pgidx; - /* - * If a nonlinear mapping then store the file page offset - * in the pte. - */ - BUG_ON(!page->mapping); - pgidx = (address - vma->vm_start) >> PAGE_SHIFT; - pgidx += vma->vm_pgoff; - pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (page->index != pgidx) { - set_pte(ptep, pgoff_to_pte(page->index)); - BUG_ON(!pte_file(*ptep)); - } + set_pte(pte, swp_entry_to_pte(entry)); + BUG_ON(pte_file(*pte)); } - /* Move the dirty bit to the physical page now the pte is gone. */ - if (pte_dirty(pte)) - set_page_dirty(page); - mm->rss--; + BUG_ON(!page->mapcount); + page->mapcount--; page_cache_release(page); - ret = SWAP_SUCCESS; + +out_unmap: + pte_unmap(pte); out_unlock: - rmap_ptep_unmap(ptep); spin_unlock(&mm->page_table_lock); + +out: return ret; } -/** - * try_to_unmap - try to remove all page table mappings to a page - * @page: the page to get unmapped - * - * Tries to remove all the page table entries which are mapping this - * page, used in the pageout path. Caller must hold the page lock - * and its rmap lock. Return values are: - * - * SWAP_SUCCESS - we succeeded in removing all mappings - * SWAP_AGAIN - we missed a trylock, try again later - * SWAP_FAIL - the page is unswappable +/* + * try_to_unmap_cluster is only used on VM_NONLINEAR shared object vmas, + * in which objrmap is unable to predict where a page will be found. */ -int fastcall try_to_unmap(struct page * page) +#define CLUSTER_SIZE (32 * PAGE_SIZE) +#if CLUSTER_SIZE > PMD_SIZE +#undef CLUSTER_SIZE +#define CLUSTER_SIZE PMD_SIZE +#endif +#define CLUSTER_MASK (~(CLUSTER_SIZE - 1)) + +static int try_to_unmap_cluster(struct mm_struct *mm, unsigned long cursor, + unsigned int *mapcount, struct vm_area_struct *vma) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + pte_t pteval; + struct page *page; + unsigned long address; + unsigned long end; + unsigned long pfn; + + /* + * We need the page_table_lock to protect us from page faults, + * munmap, fork, etc... + */ + if (!spin_trylock(&mm->page_table_lock)) + return SWAP_FAIL; + + address = (vma->vm_start + cursor) & CLUSTER_MASK; + end = address + CLUSTER_SIZE; + if (address < vma->vm_start) + address = vma->vm_start; + if (end > vma->vm_end) + end = vma->vm_end; + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto out_unlock; + + pmd = pmd_offset(pgd, address); + if (!pmd_present(*pmd)) + goto out_unlock; + + for (pte = pte_offset_map(pmd, address); + address < end; pte++, address += PAGE_SIZE) { + + if (!pte_present(*pte)) + continue; + + pfn = pte_pfn(*pte); + if (!pfn_valid(pfn)) + continue; + + page = pfn_to_page(pfn); + BUG_ON(PageAnon(page)); + if (PageReserved(page)) + continue; + + if (ptep_test_and_clear_young(pte)) + continue; + + /* Nuke the page table entry. */ + flush_cache_page(vma, address); + pteval = ptep_clear_flush(vma, address, pte); + + /* If nonlinear, store the file page offset in the pte. */ + if (page->index != linear_page_index(vma, address)) + set_pte(pte, pgoff_to_pte(page->index)); + + /* Move the dirty bit to the physical page now the pte is gone. */ + if (pte_dirty(pteval)) + set_page_dirty(page); + + page_remove_rmap(page); + page_cache_release(page); + mm->rss--; + (*mapcount)--; + } + + pte_unmap(pte); + +out_unlock: + spin_unlock(&mm->page_table_lock); + return SWAP_AGAIN; +} + +static inline int try_to_unmap_anon(struct page *page) { - struct pte_chain *pc, *next_pc, *start; - int ret = SWAP_SUCCESS; - int victim_i; + unsigned int mapcount = page->mapcount; + struct anonmm *anonmm = (struct anonmm *) page->mapping; + struct anonmm *anonhd = anonmm->head; + struct list_head *seek_head; + int ret = SWAP_AGAIN; - /* This page should not be on the pageout lists. */ - if (PageReserved(page)) - BUG(); - if (!PageLocked(page)) - BUG(); + spin_lock(&anonhd->lock); + /* + * First try the indicated mm, it's the most likely. + */ + if (anonmm->mm && anonmm->mm->rss) { + ret = try_to_unmap_one(page, + anonmm->mm, page->index, &mapcount, NULL); + if (ret == SWAP_FAIL || !mapcount) + goto out; + } - if (PageDirect(page)) { - ret = try_to_unmap_one(page, page->pte.direct); - if (ret == SWAP_SUCCESS) { - page->pte.direct = 0; - ClearPageDirect(page); - } - goto out; + /* + * Then down the rest of the list, from that as the head. Stop + * when we reach anonhd? No: although a page cannot get dup'ed + * into an older mm, once swapped, its indicated mm may not be + * the oldest, just the first into which it was faulted back. + */ + seek_head = &anonmm->list; + list_for_each_entry(anonmm, seek_head, list) { + if (!anonmm->mm || !anonmm->mm->rss) + continue; + ret = try_to_unmap_one(page, + anonmm->mm, page->index, &mapcount, NULL); + if (ret == SWAP_FAIL || !mapcount) + goto out; } +out: + spin_unlock(&anonhd->lock); + return ret; +} - start = page->pte.chain; - victim_i = pte_chain_idx(start); - for (pc = start; pc; pc = next_pc) { - int i; - - next_pc = pte_chain_next(pc); - if (next_pc) - prefetch(next_pc); - for (i = pte_chain_idx(pc); i < NRPTE; i++) { - pte_addr_t pte_paddr = pc->ptes[i]; - - switch (try_to_unmap_one(page, pte_paddr)) { - case SWAP_SUCCESS: - /* - * Release a slot. If we're releasing the - * first pte in the first pte_chain then - * pc->ptes[i] and start->ptes[victim_i] both - * refer to the same thing. It works out. - */ - pc->ptes[i] = start->ptes[victim_i]; - start->ptes[victim_i] = 0; - victim_i++; - if (victim_i == NRPTE) { - page->pte.chain = pte_chain_next(start); - __pte_chain_free(start); - start = page->pte.chain; - victim_i = 0; - } else { - start->next_and_idx++; - } - break; - case SWAP_AGAIN: - /* Skip this pte, remembering status. */ - ret = SWAP_AGAIN; +/** + * try_to_unmap_file - unmap file page using the object-based rmap method + * @page: the page to unmap + * + * Find all the mappings of a page using the mapping pointer and the vma chains + * contained in the address_space struct it points to. + * + * This function is only called from try_to_unmap for object-based pages. + * + * The semaphore address_space->i_shared_sem is tried. If it can't be gotten, + * return a temporary error. + */ +static inline int try_to_unmap_file(struct page *page) +{ + unsigned int mapcount = page->mapcount; + struct address_space *mapping = page->mapping; + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + struct vm_area_struct *vma; + unsigned long address; + int ret = SWAP_AGAIN; + unsigned long cursor; + unsigned long max_nl_cursor = 0; + unsigned long max_nl_size = 0; + + if (down_trylock(&mapping->i_shared_sem)) + return ret; + + list_for_each_entry(vma, &mapping->i_mmap, shared) { + if (vma->vm_mm->rss) { + address = vma_address(vma, pgoff); + if (address == -EFAULT) continue; - case SWAP_FAIL: - ret = SWAP_FAIL; + ret = try_to_unmap_one(page, + vma->vm_mm, address, &mapcount, vma); + if (ret == SWAP_FAIL || !mapcount) goto out; - } } } -out: - if (!page_mapped(page)) { - if (page_test_and_clear_dirty(page)) - set_page_dirty(page); - if (PageAnon(page)) - clear_page_anon(page); - dec_page_state(nr_mapped); - ret = SWAP_SUCCESS; + + list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { + if (unlikely(vma->vm_flags & VM_NONLINEAR)) { + /* + * Defer unmapping nonlinear to the next loop, + * but take notes while we're here e.g. don't + * want to loop again when no nonlinear vmas. + */ + if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) + continue; + cursor = (unsigned long) vma->vm_private_data; + if (cursor > max_nl_cursor) + max_nl_cursor = cursor; + cursor = vma->vm_end - vma->vm_start; + if (cursor > max_nl_size) + max_nl_size = cursor; + continue; + } + if (vma->vm_mm->rss) { + address = vma_address(vma, pgoff); + if (address == -EFAULT) + continue; + ret = try_to_unmap_one(page, + vma->vm_mm, address, &mapcount, vma); + if (ret == SWAP_FAIL || !mapcount) + goto out; + } } - return ret; -} -/** - ** No more VM stuff below this comment, only pte_chain helper - ** functions. - **/ + if (max_nl_size == 0) /* no nonlinear vmas of this file */ + goto out; -static void pte_chain_ctor(void *p, kmem_cache_t *cachep, unsigned long flags) -{ - struct pte_chain *pc = p; + /* + * We don't try to search for this page in the nonlinear vmas, + * and page_referenced wouldn't have found it anyway. Instead + * just walk the nonlinear vmas trying to age and unmap some. + * The mapcount of the page we came in with is irrelevant, + * but even so use it as a guide to how hard we should try? + */ + rmap_unlock(page); - memset(pc, 0, sizeof(*pc)); -} + max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; + if (max_nl_cursor == 0) + max_nl_cursor = CLUSTER_SIZE; + + do { + list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { + if (VM_NONLINEAR != (vma->vm_flags & + (VM_NONLINEAR|VM_LOCKED|VM_RESERVED))) + continue; + cursor = (unsigned long) vma->vm_private_data; + while (vma->vm_mm->rss && + cursor < max_nl_cursor && + cursor < vma->vm_end - vma->vm_start) { + ret = try_to_unmap_cluster(vma->vm_mm, + cursor, &mapcount, vma); + if (ret == SWAP_FAIL) + break; + cursor += CLUSTER_SIZE; + vma->vm_private_data = (void *) cursor; + if ((int)mapcount <= 0) + goto relock; + cond_resched(); + } + if (ret != SWAP_FAIL) + vma->vm_private_data = + (void *) max_nl_cursor; + ret = SWAP_AGAIN; + } + max_nl_cursor += CLUSTER_SIZE; + } while (max_nl_cursor <= max_nl_size); -DEFINE_PER_CPU(struct pte_chain *, local_pte_chain) = 0; + /* + * Don't loop forever (perhaps all the remaining pages are + * in locked vmas). Reset cursor on all unreserved nonlinear + * vmas, now forgetting on which ones it had fallen behind. + */ + list_for_each_entry(vma, &mapping->i_mmap_shared, shared) { + if ((vma->vm_flags & (VM_NONLINEAR|VM_RESERVED)) == + VM_NONLINEAR) + vma->vm_private_data = 0; + } +relock: + rmap_lock(page); +out: + up(&mapping->i_shared_sem); + return ret; +} /** - * __pte_chain_free - free pte_chain structure - * @pte_chain: pte_chain struct to free + * try_to_unmap - try to remove all page table mappings to a page + * @page: the page to get unmapped + * + * Tries to remove all the page table entries which are mapping this + * page, used in the pageout path. Caller must hold the page lock + * and its rmap lock. Return values are: + * + * SWAP_SUCCESS - we succeeded in removing all mappings + * SWAP_AGAIN - we missed a trylock, try again later + * SWAP_FAIL - the page is unswappable */ -void __pte_chain_free(struct pte_chain *pte_chain) +int fastcall try_to_unmap(struct page *page) { - struct pte_chain **pte_chainp; + int ret; - pte_chainp = &get_cpu_var(local_pte_chain); - if (pte_chain->next_and_idx) - pte_chain->next_and_idx = 0; - if (*pte_chainp) - kmem_cache_free(pte_chain_cache, *pte_chainp); - *pte_chainp = pte_chain; - put_cpu_var(local_pte_chain); -} + BUG_ON(PageReserved(page)); + BUG_ON(!PageLocked(page)); + BUG_ON(!page->mapcount); + + if (PageAnon(page)) + ret = try_to_unmap_anon(page); + else + ret = try_to_unmap_file(page); -/* - * pte_chain_alloc(): allocate a pte_chain structure for use by page_add_rmap(). - * - * The caller of page_add_rmap() must perform the allocation because - * page_add_rmap() is invariably called under spinlock. Often, page_add_rmap() - * will not actually use the pte_chain, because there is space available in one - * of the existing pte_chains which are attached to the page. So the case of - * allocating and then freeing a single pte_chain is specially optimised here, - * with a one-deep per-cpu cache. - */ -struct pte_chain *pte_chain_alloc(int gfp_flags) -{ - struct pte_chain *ret; - struct pte_chain **pte_chainp; - - might_sleep_if(gfp_flags & __GFP_WAIT); - - pte_chainp = &get_cpu_var(local_pte_chain); - if (*pte_chainp) { - ret = *pte_chainp; - *pte_chainp = NULL; - put_cpu_var(local_pte_chain); - } else { - put_cpu_var(local_pte_chain); - ret = kmem_cache_alloc(pte_chain_cache, gfp_flags); + if (!page->mapcount) { + if (page_test_and_clear_dirty(page)) + set_page_dirty(page); + if (PageAnon(page)) + clear_page_anon(page); + dec_page_state(nr_mapped); + ret = SWAP_SUCCESS; } return ret; } - -void __init pte_chain_init(void) -{ - pte_chain_cache = kmem_cache_create( "pte_chain", - sizeof(struct pte_chain), - sizeof(struct pte_chain), - 0, - pte_chain_ctor, - NULL); - - if (!pte_chain_cache) - panic("failed to create pte_chain cache!\n"); -} --- linux-2.6.6-rc1/mm/shmem.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/shmem.c 2004-04-18 22:25:50.347176544 -0700 @@ -8,6 +8,7 @@ * 2002 Red Hat Inc. * Copyright (C) 2002-2003 Hugh Dickins. * Copyright (C) 2002-2003 VERITAS Software Corporation. + * Copyright (C) 2004 Andi Kleen, SuSE Labs * * This file is released under the GPL. */ @@ -37,8 +38,10 @@ #include #include #include +#include #include #include +#include /* This magic number is used in glibc for posix shared memory */ #define TMPFS_MAGIC 0x01021994 @@ -783,6 +786,74 @@ redirty: return WRITEPAGE_ACTIVATE; /* Return with the page locked */ } +#ifdef CONFIG_NUMA +static struct page *shmem_swapin_async(struct shared_policy *p, + swp_entry_t entry, unsigned long idx) +{ + struct page *page; + struct vm_area_struct pvma; + + /* Create a pseudo vma that just contains the policy */ + memset(&pvma, 0, sizeof(struct vm_area_struct)); + pvma.vm_end = PAGE_SIZE; + pvma.vm_pgoff = idx; + pvma.vm_policy = mpol_shared_policy_lookup(p, idx); + page = read_swap_cache_async(entry, &pvma, 0); + mpol_free(pvma.vm_policy); + return page; +} + +struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry, + unsigned long idx) +{ + struct shared_policy *p = &info->policy; + int i, num; + struct page *page; + unsigned long offset; + + num = valid_swaphandles(entry, &offset); + for (i = 0; i < num; offset++, i++) { + page = shmem_swapin_async(p, + swp_entry(swp_type(entry), offset), idx); + if (!page) + break; + page_cache_release(page); + } + lru_add_drain(); /* Push any new pages onto the LRU now */ + return shmem_swapin_async(p, entry, idx); +} + +static struct page * +shmem_alloc_page(unsigned long gfp, struct shmem_inode_info *info, + unsigned long idx) +{ + struct vm_area_struct pvma; + struct page *page; + + memset(&pvma, 0, sizeof(struct vm_area_struct)); + pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); + pvma.vm_pgoff = idx; + pvma.vm_end = PAGE_SIZE; + page = alloc_page_vma(gfp, &pvma, 0); + mpol_free(pvma.vm_policy); + return page; +} +#else +static inline struct page * +shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) +{ + swapin_readahead(entry, 0, NULL); + return read_swap_cache_async(entry, NULL, 0); +} + +static inline struct page * +shmem_alloc_page(unsigned long gfp,struct shmem_inode_info *info, + unsigned long idx) +{ + return alloc_page(gfp); +} +#endif + /* * shmem_getpage - either get the page from swap or allocate a new one * @@ -790,7 +861,8 @@ redirty: * vm. If we swap it in we mark it dirty since we also free the swap * entry since a page cannot live in both the swap and page cache */ -static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp, int *type) +static int shmem_getpage(struct inode *inode, unsigned long idx, + struct page **pagep, enum sgp_type sgp, int *type) { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); @@ -840,8 +912,7 @@ repeat: if (majmin == VM_FAULT_MINOR && type) inc_page_state(pgmajfault); majmin = VM_FAULT_MAJOR; - swapin_readahead(swap); - swappage = read_swap_cache_async(swap); + swappage = shmem_swapin(info, swap, idx); if (!swappage) { spin_lock(&info->lock); entry = shmem_swp_alloc(info, idx, sgp); @@ -946,7 +1017,9 @@ repeat: if (!filepage) { spin_unlock(&info->lock); - filepage = page_cache_alloc(mapping); + filepage = shmem_alloc_page(mapping_gfp_mask(mapping), + info, + idx); if (!filepage) { shmem_unacct_blocks(info->flags, 1); shmem_free_block(inode); @@ -1055,11 +1128,7 @@ static int shmem_populate(struct vm_area * If a nonlinear mapping then store the file page * offset in the pte. */ - unsigned long pgidx; - pgidx = (addr - vma->vm_start) >> PAGE_SHIFT; - pgidx += vma->vm_pgoff; - pgidx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; - if (pgoff != pgidx) { + if (pgoff != linear_page_index(vma, addr)) { err = install_file_pte(mm, vma, addr, pgoff, prot); if (err) return err; @@ -1073,6 +1142,24 @@ static int shmem_populate(struct vm_area return 0; } +#ifdef CONFIG_NUMA +int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +{ + struct inode *i = vma->vm_file->f_dentry->d_inode; + return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); +} + +struct mempolicy * +shmem_get_policy(struct vm_area_struct *vma, unsigned long addr) +{ + struct inode *i = vma->vm_file->f_dentry->d_inode; + unsigned long idx; + + idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); +} +#endif + void shmem_lock(struct file *file, int lock) { struct inode *inode = file->f_dentry->d_inode; @@ -1121,6 +1208,7 @@ shmem_get_inode(struct super_block *sb, info = SHMEM_I(inode); memset(info, 0, (char *)inode - (char *)info); spin_lock_init(&info->lock); + mpol_shared_policy_init(&info->policy); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); @@ -1796,6 +1884,7 @@ static struct inode *shmem_alloc_inode(s static void shmem_destroy_inode(struct inode *inode) { + mpol_free_shared_policy(&SHMEM_I(inode)->policy); kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } @@ -1812,9 +1901,9 @@ static void init_once(void *foo, kmem_ca static int init_inodecache(void) { shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", - sizeof(struct shmem_inode_info), - 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, - init_once, NULL); + sizeof(struct shmem_inode_info), + 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, + init_once, NULL); if (shmem_inode_cachep == NULL) return -ENOMEM; return 0; @@ -1880,6 +1969,10 @@ static struct super_operations shmem_ops static struct vm_operations_struct shmem_vm_ops = { .nopage = shmem_nopage, .populate = shmem_populate, +#ifdef CONFIG_NUMA + .set_policy = shmem_set_policy, + .get_policy = shmem_get_policy, +#endif }; static struct super_block *shmem_get_sb(struct file_system_type *fs_type, --- linux-2.6.6-rc1/mm/slab.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/slab.c 2004-04-18 22:25:35.522430248 -0700 @@ -135,11 +135,11 @@ SLAB_POISON | SLAB_HWCACHE_ALIGN | \ SLAB_NO_REAP | SLAB_CACHE_DMA | \ SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ - SLAB_RECLAIM_ACCOUNT ) + SLAB_RECLAIM_ACCOUNT | SLAB_PANIC) #else # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ - SLAB_RECLAIM_ACCOUNT) + SLAB_RECLAIM_ACCOUNT | SLAB_PANIC) #endif /* @@ -1367,9 +1367,11 @@ next: up(&cache_chain_sem); unlock_cpu_hotplug(); opps: + if (!cachep && (flags & SLAB_PANIC)) + panic("kmem_cache_create(): failed to create slab `%s'\n", + name); return cachep; } - EXPORT_SYMBOL(kmem_cache_create); static inline void check_irq_off(void) @@ -1988,6 +1990,15 @@ cache_alloc_debugcheck_after(kmem_cache_ *dbg_redzone1(cachep, objp) = RED_ACTIVE; *dbg_redzone2(cachep, objp) = RED_ACTIVE; } + { + int objnr; + struct slab *slabp; + + slabp = GET_PAGE_SLAB(virt_to_page(objp)); + + objnr = (objp - slabp->s_mem) / cachep->objsize; + slab_bufctl(slabp)[objnr] = (unsigned long)caller; + } objp += obj_dbghead(cachep); if (cachep->ctor && cachep->flags & SLAB_POISON) { unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; @@ -2049,12 +2060,14 @@ static void free_block(kmem_cache_t *cac objnr = (objp - slabp->s_mem) / cachep->objsize; check_slabp(cachep, slabp); #if DEBUG +#if 0 if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { printk(KERN_ERR "slab: double free detected in cache '%s', objp %p.\n", cachep->name, objp); BUG(); } #endif +#endif slab_bufctl(slabp)[objnr] = slabp->free; slabp->free = objnr; STATS_DEC_ACTIVE(cachep); @@ -2835,6 +2848,29 @@ struct seq_operations slabinfo_op = { .show = s_show, }; +static void do_dump_slabp(kmem_cache_t *cachep) +{ +#if DEBUG + struct list_head *q; + + check_irq_on(); + spin_lock_irq(&cachep->spinlock); + list_for_each(q,&cachep->lists.slabs_full) { + struct slab *slabp; + int i; + slabp = list_entry(q, struct slab, list); + for (i = 0; i < cachep->num; i++) { + unsigned long sym = slab_bufctl(slabp)[i]; + + printk("obj %p/%d: %p", slabp, i, (void *)sym); + print_symbol(" <%s>", sym); + printk("\n"); + } + } + spin_unlock_irq(&cachep->spinlock); +#endif +} + #define MAX_SLABINFO_WRITE 128 /** * slabinfo_write - Tuning for the slab allocator @@ -2875,9 +2911,11 @@ ssize_t slabinfo_write(struct file *file batchcount < 1 || batchcount > limit || shared < 0) { - res = -EINVAL; + do_dump_slabp(cachep); + res = 0; } else { - res = do_tune_cpucache(cachep, limit, batchcount, shared); + res = do_tune_cpucache(cachep, limit, + batchcount, shared); } break; } --- linux-2.6.6-rc1/mm/swapfile.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/swapfile.c 2004-04-18 22:25:50.739116960 -0700 @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -86,19 +87,26 @@ static void remove_swap_bdev(struct bloc BUG(); } -void swap_unplug_io_fn(struct backing_dev_info *unused_bdi) +/* + * Unlike a standard unplug_io_fn, swap_unplug_io_fn is never called + * through swap's backing_dev_info (which is only used by shrink_list), + * but directly from sync_page when PageSwapCache: and takes the page + * as argument, so that it can find the right device from swp_entry_t. + */ +void swap_unplug_io_fn(struct page *page) { - int i; + swp_entry_t entry; down(&swap_bdevs_sem); - for (i = 0; i < MAX_SWAPFILES; i++) { - struct block_device *bdev = swap_bdevs[i]; + entry.val = page->private; + if (PageSwapCache(page)) { + struct block_device *bdev = swap_bdevs[swp_type(entry)]; struct backing_dev_info *bdi; - if (bdev == NULL) - break; - bdi = bdev->bd_inode->i_mapping->backing_dev_info; - (*bdi->unplug_io_fn)(bdi); + if (bdev) { + bdi = bdev->bd_inode->i_mapping->backing_dev_info; + (*bdi->unplug_io_fn)(bdi); + } } up(&swap_bdevs_sem); } @@ -448,19 +456,19 @@ void free_swap_and_cache(swp_entry_t ent /* vma->vm_mm->page_table_lock is held */ static void unuse_pte(struct vm_area_struct *vma, unsigned long address, pte_t *dir, - swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) + swp_entry_t entry, struct page *page) { vma->vm_mm->rss++; get_page(page); set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot))); - *pte_chainp = page_add_rmap(page, dir, *pte_chainp); + page_add_anon_rmap(page, vma->vm_mm, address); swap_free(entry); } /* vma->vm_mm->page_table_lock is held */ -static int unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, +static unsigned long unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long size, unsigned long offset, - swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) + swp_entry_t entry, struct page *page) { pte_t * pte; unsigned long end; @@ -485,10 +493,10 @@ static int unuse_pmd(struct vm_area_stru * Test inline before going to call unuse_pte. */ if (unlikely(pte_same(*pte, swp_pte))) { - unuse_pte(vma, offset + address, pte, - entry, page, pte_chainp); + unuse_pte(vma, offset + address, pte, entry, page); pte_unmap(pte); - return 1; + /* add 1 since address may be 0 */ + return 1 + offset + address; } address += PAGE_SIZE; pte++; @@ -498,12 +506,13 @@ static int unuse_pmd(struct vm_area_stru } /* vma->vm_mm->page_table_lock is held */ -static int unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, +static unsigned long unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long size, - swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) + swp_entry_t entry, struct page *page) { pmd_t * pmd; unsigned long offset, end; + unsigned long foundaddr; if (pgd_none(*dir)) return 0; @@ -521,9 +530,10 @@ static int unuse_pgd(struct vm_area_stru if (address >= end) BUG(); do { - if (unuse_pmd(vma, pmd, address, end - address, - offset, entry, page, pte_chainp)) - return 1; + foundaddr = unuse_pmd(vma, pmd, address, end - address, + offset, entry, page); + if (foundaddr) + return foundaddr; address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address && (address < end)); @@ -531,17 +541,19 @@ static int unuse_pgd(struct vm_area_stru } /* vma->vm_mm->page_table_lock is held */ -static int unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir, - swp_entry_t entry, struct page *page, struct pte_chain **pte_chainp) +static unsigned long unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir, + swp_entry_t entry, struct page *page) { unsigned long start = vma->vm_start, end = vma->vm_end; + unsigned long foundaddr; if (start >= end) BUG(); do { - if (unuse_pgd(vma, pgdir, start, end - start, - entry, page, pte_chainp)) - return 1; + foundaddr = unuse_pgd(vma, pgdir, start, end - start, + entry, page); + if (foundaddr) + return foundaddr; start = (start + PGDIR_SIZE) & PGDIR_MASK; pgdir++; } while (start && (start < end)); @@ -552,24 +564,27 @@ static int unuse_process(struct mm_struc swp_entry_t entry, struct page* page) { struct vm_area_struct* vma; - struct pte_chain *pte_chain; - - pte_chain = pte_chain_alloc(GFP_KERNEL); - if (!pte_chain) - return -ENOMEM; + unsigned long foundaddr = 0; + int ret = 0; /* * Go through process' page directory. */ + down_read(&mm->mmap_sem); spin_lock(&mm->page_table_lock); for (vma = mm->mmap; vma; vma = vma->vm_next) { - pgd_t * pgd = pgd_offset(mm, vma->vm_start); - if (unuse_vma(vma, pgd, entry, page, &pte_chain)) - break; + if (!is_vm_hugetlb_page(vma)) { + pgd_t * pgd = pgd_offset(mm, vma->vm_start); + foundaddr = unuse_vma(vma, pgd, entry, page); + if (foundaddr) + break; + } } spin_unlock(&mm->page_table_lock); - pte_chain_free(pte_chain); - return 0; + if (foundaddr && mremap_moved_anon_rmap(page, foundaddr)) + ret = make_page_exclusive(vma, foundaddr); + up_read(&mm->mmap_sem); + return ret; } /* @@ -670,7 +685,7 @@ static int try_to_unuse(unsigned int typ */ swap_map = &si->swap_map[i]; entry = swp_entry(type, i); - page = read_swap_cache_async(entry); + page = read_swap_cache_async(entry, NULL, 0); if (!page) { /* * Either swap_duplicate() failed because entry --- linux-2.6.6-rc1/mm/swap_state.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/swap_state.c 2004-04-18 22:25:50.740116808 -0700 @@ -25,13 +25,13 @@ static struct address_space_operations s }; static struct backing_dev_info swap_backing_dev_info = { - .memory_backed = 1, /* Does not contribute to dirty memory */ - .unplug_io_fn = swap_unplug_io_fn, + .state = 0, /* uncongested */ }; struct address_space swapper_space = { .page_tree = RADIX_TREE_INIT(GFP_ATOMIC), .tree_lock = SPIN_LOCK_UNLOCKED, + .nrpages = 0, /* total_swapcache_pages */ .a_ops = &swap_aops, .backing_dev_info = &swap_backing_dev_info, }; @@ -325,7 +325,8 @@ struct page * lookup_swap_cache(swp_entr * A failure return means that either the page allocation failed or that * the swap entry is no longer in use. */ -struct page * read_swap_cache_async(swp_entry_t entry) +struct page *read_swap_cache_async(swp_entry_t entry, + struct vm_area_struct *vma, unsigned long addr) { struct page *found_page, *new_page = NULL; int err; @@ -349,7 +350,7 @@ struct page * read_swap_cache_async(swp_ * Get a new page to read into from swap. */ if (!new_page) { - new_page = alloc_page(GFP_HIGHUSER); + new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); if (!new_page) break; /* Out of memory */ } --- linux-2.6.6-rc1/mm/vmalloc.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/mm/vmalloc.c 2004-04-18 22:26:02.152381880 -0700 @@ -17,7 +17,6 @@ #include #include -#include #include --- linux-2.6.6-rc1/mm/vmscan.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/mm/vmscan.c 2004-04-18 22:26:02.153381728 -0700 @@ -33,7 +33,6 @@ #include #include -#include #include #include --- linux-2.6.6-rc1/net/8021q/vlan.h 2003-06-14 12:18:24.000000000 -0700 +++ 25/net/8021q/vlan.h 2004-04-18 22:25:25.037024272 -0700 @@ -19,8 +19,8 @@ I never found it..and the problem seems I'll bet they might prove useful again... --Ben -#define VLAN_MEM_DBG(x, y, z) printk(VLAN_DBG __FUNCTION__ ": " x, y, z); -#define VLAN_FMEM_DBG(x, y) printk(VLAN_DBG __FUNCTION__ ": " x, y); +#define VLAN_MEM_DBG(x, y, z) printk(VLAN_DBG "%s: " x, __FUNCTION__, y, z); +#define VLAN_FMEM_DBG(x, y) printk(VLAN_DBG "%s: " x, __FUNCTION__, y); */ /* This way they don't do anything! */ --- linux-2.6.6-rc1/net/8021q/vlanproc.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/8021q/vlanproc.c 2004-04-18 22:25:25.038024120 -0700 @@ -220,7 +220,7 @@ int vlan_proc_rem_dev(struct net_device } #ifdef VLAN_DEBUG - printk(VLAN_DBG __FUNCTION__ ": dev: %p\n", vlandev); + printk(VLAN_DBG "%s: dev: %p\n", __FUNCTION__, vlandev); #endif /** NOTE: This will consume the memory pointed to by dent, it seems. */ --- linux-2.6.6-rc1/net/appletalk/ddp.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/appletalk/ddp.c 2004-04-18 22:25:25.040023816 -0700 @@ -1795,13 +1795,7 @@ static int atalk_ioctl(struct socket *so break; } case SIOCGSTAMP: - if (!sk) - break; - rc = -ENOENT; - if (!sk->sk_stamp.tv_sec) - break; - rc = copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval)) ? -EFAULT : 0; + rc = sock_get_timestamp(sk, (struct timeval *)arg); break; /* Routing */ case SIOCADDRT: --- linux-2.6.6-rc1/net/atm/ioctl.c 2003-09-27 18:57:47.000000000 -0700 +++ 25/net/atm/ioctl.c 2004-04-18 22:25:25.041023664 -0700 @@ -76,12 +76,8 @@ int vcc_ioctl(struct socket *sock, unsig goto done; } case SIOCGSTAMP: /* borrowed from IP */ - if (!vcc->sk->sk_stamp.tv_sec) { - error = -ENOENT; - goto done; - } - error = copy_to_user((void *)arg, &vcc->sk->sk_stamp, - sizeof(struct timeval)) ? -EFAULT : 0; + error = sock_get_timestamp(vcc->sk, (struct timeval *) + arg); goto done; case ATM_SETSC: printk(KERN_WARNING "ATM_SETSC is obsolete\n"); --- linux-2.6.6-rc1/net/atm/lec.h 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/atm/lec.h 2004-04-18 22:26:03.028248728 -0700 @@ -16,6 +16,7 @@ #if defined (CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) #include +struct net_bridge; extern struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, unsigned char *addr); extern void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); --- linux-2.6.6-rc1/net/ax25/af_ax25.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ax25/af_ax25.c 2004-04-18 22:25:25.042023512 -0700 @@ -1694,12 +1694,7 @@ static int ax25_ioctl(struct socket *soc case SIOCGSTAMP: if (sk != NULL) { - if (!sk->sk_stamp.tv_sec) { - res = -ENOENT; - break; - } - res = copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval)) ? -EFAULT : 0; + res = sock_get_timestamp(sk, (struct timeval *)arg); break; } res = -EINVAL; --- linux-2.6.6-rc1/net/ax25/Kconfig 2003-09-27 18:57:47.000000000 -0700 +++ 25/net/ax25/Kconfig 2004-04-18 22:25:25.041023664 -0700 @@ -6,9 +6,8 @@ # Joerg Reuter DL1BKE # 19980129 Moved to net/ax25/Config.in, sourcing device drivers. -menu "Amateur Radio support" - -config HAMRADIO +menuconfig HAMRADIO + depends on NET bool "Amateur Radio support" help If you want to connect your Linux box to an amateur radio, answer Y @@ -109,5 +108,3 @@ source "drivers/net/hamradio/Kconfig" endmenu -endmenu - --- linux-2.6.6-rc1/net/bluetooth/hci_sock.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/bluetooth/hci_sock.c 2004-04-18 22:25:25.043023360 -0700 @@ -61,6 +61,11 @@ /* ----- HCI socket interface ----- */ +static inline int hci_test_bit(int nr, void *addr) +{ + return *((__u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); +} + /* Security filter */ static struct hci_sec_filter hci_sec_filter = { /* Packet types */ @@ -115,8 +120,8 @@ void hci_send_to_sock(struct hci_dev *hd if (skb->pkt_type == HCI_EVENT_PKT) { register int evt = (*(__u8 *)skb->data & HCI_FLT_EVENT_BITS); - - if (!test_bit(evt, flt->event_mask)) + + if (!hci_test_bit(evt, &flt->event_mask)) continue; if (flt->opcode && ((evt == HCI_EV_CMD_COMPLETE && @@ -399,8 +404,8 @@ static int hci_sock_sendmsg(struct kiocb u16 ogf = hci_opcode_ogf(opcode); u16 ocf = hci_opcode_ocf(opcode); - if (((ogf > HCI_SFLT_MAX_OGF) || - !test_bit(ocf & HCI_FLT_OCF_BITS, hci_sec_filter.ocf_mask[ogf])) && + if (((ogf > HCI_SFLT_MAX_OGF) || + !hci_test_bit(ocf & HCI_FLT_OCF_BITS, &hci_sec_filter.ocf_mask[ogf])) && !capable(CAP_NET_RAW)) { err = -EPERM; goto drop; --- linux-2.6.6-rc1/net/bluetooth/Kconfig 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/bluetooth/Kconfig 2004-04-18 22:25:25.043023360 -0700 @@ -2,10 +2,8 @@ # Bluetooth subsystem configuration # -menu "Bluetooth support" +menuconfig BT depends on NET - -config BT tristate "Bluetooth subsystem support" help Bluetooth is low-cost, low-power, short-range wireless technology. @@ -62,5 +60,3 @@ source "net/bluetooth/cmtp/Kconfig" source "drivers/bluetooth/Kconfig" -endmenu - --- linux-2.6.6-rc1/net/bluetooth/rfcomm/tty.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/bluetooth/rfcomm/tty.c 2004-04-18 22:25:25.044023208 -0700 @@ -315,7 +315,7 @@ static int rfcomm_create_dev(struct sock if (req.flags != NOCAP_FLAGS && !capable(CAP_NET_ADMIN)) return -EPERM; - + if (req.flags & (1 << RFCOMM_REUSE_DLC)) { /* Socket must be connected */ if (sk->sk_state != BT_CONNECTED) @@ -354,12 +354,14 @@ static int rfcomm_release_dev(unsigned l BT_DBG("dev_id %id flags 0x%x", req.dev_id, req.flags); - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (!(dev = rfcomm_dev_get(req.dev_id))) return -ENODEV; + if (dev->flags != NOCAP_FLAGS && !capable(CAP_NET_ADMIN)) { + rfcomm_dev_put(dev); + return -EPERM; + } + if (req.flags & (1 << RFCOMM_HANGUP_NOW)) rfcomm_dlc_close(dev->dlc, 0); --- linux-2.6.6-rc1/net/bridge/br.c 2003-11-26 13:53:35.000000000 -0800 +++ 25/net/bridge/br.c 2004-04-18 22:25:25.045023056 -0700 @@ -20,8 +20,7 @@ #include #include #include -#include -#include + #include "br_private.h" #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) @@ -32,6 +31,8 @@ int (*br_should_route_hook) (struct sk_b static int __init br_init(void) { + br_fdb_init(); + #ifdef CONFIG_BRIDGE_NETFILTER if (br_netfilter_init()) return 1; @@ -55,16 +56,18 @@ static void __exit br_deinit(void) #endif unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); - br_handle_frame_hook = NULL; + + br_cleanup_bridges(); + + synchronize_net(); #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) br_fdb_get_hook = NULL; br_fdb_put_hook = NULL; #endif - br_cleanup_bridges(); - - synchronize_net(); + br_handle_frame_hook = NULL; + br_fdb_fini(); } EXPORT_SYMBOL(br_should_route_hook); --- linux-2.6.6-rc1/net/bridge/br_device.c 2003-08-22 19:23:42.000000000 -0700 +++ 25/net/bridge/br_device.c 2004-04-18 22:25:25.045023056 -0700 @@ -15,7 +15,6 @@ #include #include -#include #include #include #include "br_private.h" @@ -32,7 +31,7 @@ static int br_dev_do_ioctl(struct net_de if (copy_from_user(args, data, 4*sizeof(unsigned long))) return -EFAULT; - return br_ioctl(dev->priv, args[0], args[1], args[2], args[3]); + return br_ioctl_device(dev->priv, args[0], args[1], args[2], args[3]); } static struct net_device_stats *br_dev_get_stats(struct net_device *dev) --- linux-2.6.6-rc1/net/bridge/br_fdb.c 2003-07-02 14:53:18.000000000 -0700 +++ 25/net/bridge/br_fdb.c 2004-04-18 22:25:25.046022904 -0700 @@ -14,12 +14,31 @@ */ #include +#include #include #include +#include +#include #include #include #include "br_private.h" +static kmem_cache_t *br_fdb_cache; + +void __init br_fdb_init(void) +{ + br_fdb_cache = kmem_cache_create("bridge_fdb_cache", + sizeof(struct net_bridge_fdb_entry), + 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); +} + +void __exit br_fdb_fini(void) +{ + kmem_cache_destroy(br_fdb_cache); +} + + /* if topology_changing then use forward_delay (default 15 sec) * otherwise keep longer (default 5 minutes) */ @@ -35,7 +54,7 @@ static __inline__ int has_expired(const && time_before_eq(fdb->ageing_timer + hold_time(br), jiffies); } -static __inline__ void copy_fdb(struct __fdb_entry *ent, +static inline void copy_fdb(struct __fdb_entry *ent, const struct net_bridge_fdb_entry *f) { memset(ent, 0, sizeof(struct __fdb_entry)); @@ -43,7 +62,7 @@ static __inline__ void copy_fdb(struct _ ent->port_no = f->dst?f->dst->port_no:0; ent->is_local = f->is_local; ent->ageing_timer_value = f->is_static ? 0 - : ((jiffies - f->ageing_timer) * USER_HZ) / HZ; + : jiffies_to_clock_t(jiffies - f->ageing_timer); } static __inline__ int br_mac_hash(const unsigned char *mac) @@ -173,7 +192,7 @@ struct net_bridge_fdb_entry *br_fdb_get( void br_fdb_put(struct net_bridge_fdb_entry *ent) { if (atomic_dec_and_test(&ent->use_count)) - kfree(ent); + kmem_cache_free(br_fdb_cache, ent); } int br_fdb_get_entries(struct net_bridge *br, @@ -220,7 +239,7 @@ int br_fdb_get_entries(struct net_bridge /* entry was deleted during copy_to_user */ if (atomic_dec_and_test(&f->use_count)) { - kfree(f); + kmem_cache_free(br_fdb_cache, f); num = -EAGAIN; goto out; } @@ -241,12 +260,16 @@ int br_fdb_get_entries(struct net_bridge return num; } -void br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, - const unsigned char *addr, int is_local) +int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source, + const unsigned char *addr, int is_local) { struct hlist_node *h; struct net_bridge_fdb_entry *fdb; int hash = br_mac_hash(addr); + int ret = 0; + + if (!is_valid_ether_addr(addr)) + return -EADDRNOTAVAIL; write_lock_bh(&br->hash_lock); hlist_for_each(h, &br->hash[hash]) { @@ -262,6 +285,7 @@ void br_fdb_insert(struct net_bridge *br printk(KERN_WARNING "%s: received packet with " " own address as source address\n", source->dev->name); + ret = -EEXIST; goto out; } @@ -275,9 +299,11 @@ void br_fdb_insert(struct net_bridge *br } } - fdb = kmalloc(sizeof(*fdb), GFP_ATOMIC); - if (fdb == NULL) + fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); + if (unlikely(fdb == NULL)) { + ret = -ENOMEM; goto out; + } memcpy(fdb->addr.addr, addr, ETH_ALEN); atomic_set(&fdb->use_count, 1); @@ -296,4 +322,6 @@ void br_fdb_insert(struct net_bridge *br list_add_tail(&fdb->age_list, &br->age_list); out: write_unlock_bh(&br->hash_lock); + + return ret; } --- linux-2.6.6-rc1/net/bridge/br_forward.c 2003-10-08 15:07:10.000000000 -0700 +++ 25/net/bridge/br_forward.c 2004-04-18 22:25:25.046022904 -0700 @@ -15,9 +15,7 @@ #include #include -#include #include -#include #include #include "br_private.h" --- linux-2.6.6-rc1/net/bridge/br_if.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/bridge/br_if.c 2004-04-18 22:25:25.047022752 -0700 @@ -14,28 +14,65 @@ */ #include +#include +#include #include -#include -#include #include #include #include #include -#include + #include "br_private.h" +/* + * Determine initial path cost based on speed. + * using recommendations from 802.1d standard + * + * Need to simulate user ioctl because not all device's that support + * ethtool, use ethtool_ops. Also, since driver might sleep need to + * not be holding any locks. + */ static int br_initial_port_cost(struct net_device *dev) { + + struct ethtool_cmd ecmd = { ETHTOOL_GSET }; + struct ifreq ifr; + mm_segment_t old_fs; + int err; + + strncpy(ifr.ifr_name, dev->name, IFNAMSIZ); + ifr.ifr_data = (void *) &ecmd; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = dev_ethtool(&ifr); + set_fs(old_fs); + + if (!err) { + switch(ecmd.speed) { + case SPEED_100: + return 19; + case SPEED_1000: + return 4; + case SPEED_10000: + return 2; + case SPEED_10: + return 100; + default: + pr_info("bridge: can't decode speed from %s: %d\n", + dev->name, ecmd.speed); + return 100; + } + } + + /* Old silly heuristics based on name */ if (!strncmp(dev->name, "lec", 3)) return 7; - if (!strncmp(dev->name, "eth", 3)) - return 100; /* FIXME handle 100Mbps */ - if (!strncmp(dev->name, "plip", 4)) return 2500; - return 100; + return 100; /* assume old 10Mbps */ } static void destroy_nbp(void *arg) @@ -126,39 +163,56 @@ static struct net_bridge *new_nb(const c return br; } +/* find an available port number */ +static int find_portno(struct net_bridge *br) +{ + int index; + struct net_bridge_port *p; + unsigned long *inuse; + + inuse = kmalloc(BITS_TO_LONGS(BR_MAX_PORTS)*sizeof(unsigned long), + GFP_ATOMIC); + if (!inuse) + return -ENOMEM; + + memset(inuse, 0, BITS_TO_LONGS(BR_MAX_PORTS)*sizeof(unsigned long)); + set_bit(0, inuse); /* zero is reserved */ + list_for_each_entry(p, &br->port_list, list) { + set_bit(p->port_no, inuse); + } + index = find_first_zero_bit(inuse, BR_MAX_PORTS); + kfree(inuse); + + return (index >= BR_MAX_PORTS) ? -EXFULL : index; +} + /* called under bridge lock */ -static struct net_bridge_port *new_nbp(struct net_bridge *br, struct net_device *dev) +static struct net_bridge_port *new_nbp(struct net_bridge *br, + struct net_device *dev, + unsigned long cost) { - int i; + int index; struct net_bridge_port *p; + + index = find_portno(br); + if (index < 0) + return ERR_PTR(index); p = kmalloc(sizeof(*p), GFP_ATOMIC); if (p == NULL) - return p; + return ERR_PTR(-ENOMEM); memset(p, 0, sizeof(*p)); p->br = br; + dev_hold(dev); p->dev = dev; - p->path_cost = br_initial_port_cost(dev); - p->priority = 0x80; - - for (i=1;i<255;i++) - if (br_get_port(br, i) == NULL) - break; - - if (i == 255) { - kfree(p); - return NULL; - } - + p->path_cost = cost; + p->priority = 0x8000 >> BR_PORT_BITS; dev->br_port = p; - - p->port_no = i; + p->port_no = index; br_init_port(p); p->state = BR_STATE_DISABLED; - list_add_rcu(&p->list, &br->port_list); - return p; } @@ -203,13 +257,11 @@ int br_del_bridge(const char *name) return ret; } -/* called under bridge lock */ int br_add_if(struct net_bridge *br, struct net_device *dev) { struct net_bridge_port *p; - - if (dev->br_port != NULL) - return -EBUSY; + unsigned long cost; + int err = 0; if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) return -EINVAL; @@ -217,34 +269,48 @@ int br_add_if(struct net_bridge *br, str if (dev->hard_start_xmit == br_dev_xmit) return -ELOOP; - dev_hold(dev); - if ((p = new_nbp(br, dev)) == NULL) { - spin_unlock_bh(&br->lock); - dev_put(dev); - return -EXFULL; - } + cost = br_initial_port_cost(dev); - dev_set_promiscuity(dev, 1); + spin_lock_bh(&br->lock); + if (dev->br_port != NULL) + err = -EBUSY; + + else if (IS_ERR(p = new_nbp(br, dev, cost))) + err = PTR_ERR(p); - br_stp_recalculate_bridge_id(br); - br_fdb_insert(br, p, dev->dev_addr, 1); - if ((br->dev->flags & IFF_UP) && (dev->flags & IFF_UP)) - br_stp_enable_port(p); + else if ((err = br_fdb_insert(br, p, dev->dev_addr, 1))) + destroy_nbp(p); + + else { + dev_set_promiscuity(dev, 1); + + list_add_rcu(&p->list, &br->port_list); + + br_stp_recalculate_bridge_id(br); + if ((br->dev->flags & IFF_UP) && (dev->flags & IFF_UP)) + br_stp_enable_port(p); - return 0; + } + spin_unlock_bh(&br->lock); + return err; } -/* called under bridge lock */ int br_del_if(struct net_bridge *br, struct net_device *dev) { struct net_bridge_port *p; + int err = 0; - if ((p = dev->br_port) == NULL || p->br != br) - return -EINVAL; + spin_lock_bh(&br->lock); + p = dev->br_port; + if (!p || p->br != br) + err = -EINVAL; + else { + del_nbp(p); + br_stp_recalculate_bridge_id(br); + } + spin_unlock_bh(&br->lock); - del_nbp(p); - br_stp_recalculate_bridge_id(br); - return 0; + return err; } int br_get_bridge_ifindices(int *indices, int num) @@ -262,13 +328,14 @@ int br_get_bridge_ifindices(int *indices return i; } -void br_get_port_ifindices(struct net_bridge *br, int *ifindices) +void br_get_port_ifindices(struct net_bridge *br, int *ifindices, int num) { struct net_bridge_port *p; rcu_read_lock(); list_for_each_entry_rcu(p, &br->port_list, list) { - ifindices[p->port_no] = p->dev->ifindex; + if (p->port_no < num) + ifindices[p->port_no] = p->dev->ifindex; } rcu_read_unlock(); } --- linux-2.6.6-rc1/net/bridge/br_input.c 2003-07-02 14:53:18.000000000 -0700 +++ 25/net/bridge/br_input.c 2004-04-18 22:25:25.048022600 -0700 @@ -16,11 +16,10 @@ #include #include #include -#include #include #include "br_private.h" -unsigned char bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +const unsigned char bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; static int br_pass_frame_up_finish(struct sk_buff *skb) { --- linux-2.6.6-rc1/net/bridge/br_ioctl.c 2003-07-27 12:14:40.000000000 -0700 +++ 25/net/bridge/br_ioctl.c 2004-04-18 22:25:25.049022448 -0700 @@ -15,34 +15,20 @@ #include #include -#include +#include +#include #include #include "br_private.h" -/* import values in USER_HZ */ -static inline unsigned long user_to_ticks(unsigned long utick) -{ - return (utick * HZ) / USER_HZ; -} - -/* export values in USER_HZ */ -static inline unsigned long ticks_to_user(unsigned long tick) -{ - return (tick * USER_HZ) / HZ; -} - /* Report time remaining in user HZ */ static unsigned long timer_residue(const struct timer_list *timer) { - return ticks_to_user(timer_pending(timer) - ? (timer->expires - jiffies) : 0); + return timer_pending(timer) + ? jiffies_to_clock_t(timer->expires - jiffies) : 0; } -static int br_ioctl_device(struct net_bridge *br, - unsigned int cmd, - unsigned long arg0, - unsigned long arg1, - unsigned long arg2) +int br_ioctl_device(struct net_bridge *br, unsigned int cmd, + unsigned long arg0, unsigned long arg1, unsigned long arg2) { if (br == NULL) return -EINVAL; @@ -55,16 +41,17 @@ static int br_ioctl_device(struct net_br struct net_device *dev; int ret; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + dev = dev_get_by_index(arg0); if (dev == NULL) return -EINVAL; - spin_lock_bh(&br->lock); if (cmd == BRCTL_ADD_IF) ret = br_add_if(br, dev); else ret = br_del_if(br, dev); - spin_unlock_bh(&br->lock); dev_put(dev); return ret; @@ -79,17 +66,17 @@ static int br_ioctl_device(struct net_br memcpy(&b.designated_root, &br->designated_root, 8); memcpy(&b.bridge_id, &br->bridge_id, 8); b.root_path_cost = br->root_path_cost; - b.max_age = ticks_to_user(br->max_age); - b.hello_time = ticks_to_user(br->hello_time); + b.max_age = jiffies_to_clock_t(br->max_age); + b.hello_time = jiffies_to_clock_t(br->hello_time); b.forward_delay = br->forward_delay; b.bridge_max_age = br->bridge_max_age; b.bridge_hello_time = br->bridge_hello_time; - b.bridge_forward_delay = ticks_to_user(br->bridge_forward_delay); + b.bridge_forward_delay = jiffies_to_clock_t(br->bridge_forward_delay); b.topology_change = br->topology_change; b.topology_change_detected = br->topology_change_detected; b.root_port = br->root_port; b.stp_enabled = br->stp_enabled; - b.ageing_time = ticks_to_user(br->ageing_time); + b.ageing_time = jiffies_to_clock_t(br->ageing_time); b.hello_timer_value = timer_residue(&br->hello_timer); b.tcn_timer_value = timer_residue(&br->tcn_timer); b.topology_change_timer_value = timer_residue(&br->topology_change_timer); @@ -104,51 +91,67 @@ static int br_ioctl_device(struct net_br case BRCTL_GET_PORT_LIST: { - int *indices; - int ret = 0; + int num, *indices; + + num = arg1; + if (num < 0) + return -EINVAL; + if (num == 0) + num = 256; + if (num > BR_MAX_PORTS) + num = BR_MAX_PORTS; - indices = kmalloc(256*sizeof(int), GFP_KERNEL); + indices = kmalloc(num*sizeof(int), GFP_KERNEL); if (indices == NULL) return -ENOMEM; - memset(indices, 0, 256*sizeof(int)); + memset(indices, 0, num*sizeof(int)); - br_get_port_ifindices(br, indices); - if (copy_to_user((void *)arg0, indices, 256*sizeof(int))) - ret = -EFAULT; + br_get_port_ifindices(br, indices, num); + if (copy_to_user((void *)arg0, indices, num*sizeof(int))) + num = -EFAULT; kfree(indices); - return ret; + return num; } case BRCTL_SET_BRIDGE_FORWARD_DELAY: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + spin_lock_bh(&br->lock); - br->bridge_forward_delay = user_to_ticks(arg0); + br->bridge_forward_delay = clock_t_to_jiffies(arg0); if (br_is_root_bridge(br)) br->forward_delay = br->bridge_forward_delay; spin_unlock_bh(&br->lock); return 0; case BRCTL_SET_BRIDGE_HELLO_TIME: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + spin_lock_bh(&br->lock); - br->bridge_hello_time = user_to_ticks(arg0); + br->bridge_hello_time = clock_t_to_jiffies(arg0); if (br_is_root_bridge(br)) br->hello_time = br->bridge_hello_time; spin_unlock_bh(&br->lock); return 0; case BRCTL_SET_BRIDGE_MAX_AGE: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + spin_lock_bh(&br->lock); - br->bridge_max_age = user_to_ticks(arg0); + br->bridge_max_age = clock_t_to_jiffies(arg0); if (br_is_root_bridge(br)) br->max_age = br->bridge_max_age; spin_unlock_bh(&br->lock); return 0; case BRCTL_SET_AGEING_TIME: - br->ageing_time = user_to_ticks(arg0); - return 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; - case BRCTL_SET_GC_INTERVAL: /* no longer used */ + br->ageing_time = clock_t_to_jiffies(arg0); return 0; case BRCTL_GET_PORT_INFO: @@ -185,10 +188,16 @@ static int br_ioctl_device(struct net_br } case BRCTL_SET_BRIDGE_STP_STATE: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + br->stp_enabled = arg0?1:0; return 0; case BRCTL_SET_BRIDGE_PRIORITY: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + spin_lock_bh(&br->lock); br_stp_set_bridge_priority(br, arg0); spin_unlock_bh(&br->lock); @@ -199,6 +208,12 @@ static int br_ioctl_device(struct net_br struct net_bridge_port *p; int ret = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (arg1 >= (1<<(16-BR_PORT_BITS))) + return -ERANGE; + spin_lock_bh(&br->lock); if ((p = br_get_port(br, arg0)) == NULL) ret = -EINVAL; @@ -213,6 +228,9 @@ static int br_ioctl_device(struct net_br struct net_bridge_port *p; int ret = 0; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + spin_lock_bh(&br->lock); if ((p = br_get_port(br, arg0)) == NULL) ret = -EINVAL; @@ -243,9 +261,6 @@ static int br_ioctl_deviceless(unsigned int *indices; int ret = 0; - if (arg1 > 64) - arg1 = 64; - indices = kmalloc(arg1*sizeof(int), GFP_KERNEL); if (indices == NULL) return -ENOMEM; @@ -265,6 +280,9 @@ static int br_ioctl_deviceless(unsigned { char buf[IFNAMSIZ]; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(buf, (void *)arg0, IFNAMSIZ)) return -EFAULT; @@ -285,25 +303,8 @@ int br_ioctl_deviceless_stub(unsigned lo { unsigned long i[3]; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (copy_from_user(i, (void *)arg, 3*sizeof(unsigned long))) return -EFAULT; return br_ioctl_deviceless(i[0], i[1], i[2]); } - -int br_ioctl(struct net_bridge *br, unsigned int cmd, unsigned long arg0, unsigned long arg1, unsigned long arg2) -{ - int err; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - err = br_ioctl_deviceless(cmd, arg0, arg1); - if (err == -EOPNOTSUPP) - err = br_ioctl_device(br, cmd, arg0, arg1, arg2); - - return err; -} --- linux-2.6.6-rc1/net/bridge/br_notify.c 2003-08-08 22:55:14.000000000 -0700 +++ 25/net/bridge/br_notify.c 2004-04-18 22:25:25.049022448 -0700 @@ -14,7 +14,7 @@ */ #include -#include + #include "br_private.h" static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr); --- linux-2.6.6-rc1/net/bridge/br_private.h 2003-07-02 14:53:18.000000000 -0700 +++ 25/net/bridge/br_private.h 2004-04-18 22:25:25.050022296 -0700 @@ -24,6 +24,9 @@ #define BR_HOLD_TIME (1*HZ) +#define BR_PORT_BITS 10 +#define BR_MAX_PORTS (1< -#include -#include #include + #include "br_private.h" #include "br_private_stp.h" --- linux-2.6.6-rc1/net/bridge/br_stp.c 2003-07-27 12:14:40.000000000 -0700 +++ 25/net/bridge/br_stp.c 2004-04-18 22:25:25.051022144 -0700 @@ -13,9 +13,8 @@ * 2 of the License, or (at your option) any later version. */ #include -#include #include -#include + #include "br_private.h" #include "br_private_stp.h" @@ -36,7 +35,7 @@ void br_log_state(const struct net_bridg } /* called under bridge lock */ -struct net_bridge_port *br_get_port(struct net_bridge *br, int port_no) +struct net_bridge_port *br_get_port(struct net_bridge *br, u16 port_no) { struct net_bridge_port *p; @@ -50,7 +49,7 @@ struct net_bridge_port *br_get_port(stru /* called under bridge lock */ static int br_should_become_root_port(const struct net_bridge_port *p, - int root_port) + u16 root_port) { struct net_bridge *br; struct net_bridge_port *rp; @@ -103,9 +102,7 @@ static int br_should_become_root_port(co static void br_root_selection(struct net_bridge *br) { struct net_bridge_port *p; - int root_port; - - root_port = 0; + u16 root_port = 0; list_for_each_entry(p, &br->port_list, list) { if (br_should_become_root_port(p, root_port)) --- linux-2.6.6-rc1/net/bridge/br_stp_if.c 2003-08-08 22:55:14.000000000 -0700 +++ 25/net/bridge/br_stp_if.c 2004-04-18 22:25:25.052021992 -0700 @@ -14,21 +14,26 @@ */ #include -#include #include -#include + #include "br_private.h" #include "br_private_stp.h" -static inline __u16 br_make_port_id(const struct net_bridge_port *p) + +/* Port id is composed of priority and port number. + * NB: least significant bits of priority are dropped to + * make room for more ports. + */ +static inline port_id br_make_port_id(__u8 priority, __u16 port_no) { - return (p->priority << 8) | p->port_no; + return ((u16)priority << BR_PORT_BITS) + | (port_no & ((1<port_id = br_make_port_id(p); + p->port_id = br_make_port_id(p->priority, p->port_no); br_become_designated_port(p); p->state = BR_STATE_BLOCKING; p->topology_change_ack = 0; @@ -111,7 +116,8 @@ void br_stp_disable_port(struct net_brid } /* called under bridge lock */ -static void br_stp_change_bridge_id(struct net_bridge *br, unsigned char *addr) +static void br_stp_change_bridge_id(struct net_bridge *br, + const unsigned char *addr) { unsigned char oldaddr[6]; struct net_bridge_port *p; @@ -138,16 +144,14 @@ static void br_stp_change_bridge_id(stru br_become_root_bridge(br); } -static unsigned char br_mac_zero[6]; +static const unsigned char br_mac_zero[6]; /* called under bridge lock */ void br_stp_recalculate_bridge_id(struct net_bridge *br) { - unsigned char *addr; + const unsigned char *addr = br_mac_zero; struct net_bridge_port *p; - addr = br_mac_zero; - list_for_each_entry(p, &br->port_list, list) { if (addr == br_mac_zero || memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) @@ -160,7 +164,7 @@ void br_stp_recalculate_bridge_id(struct } /* called under bridge lock */ -void br_stp_set_bridge_priority(struct net_bridge *br, int newprio) +void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio) { struct net_bridge_port *p; int wasroot; @@ -185,17 +189,15 @@ void br_stp_set_bridge_priority(struct n } /* called under bridge lock */ -void br_stp_set_port_priority(struct net_bridge_port *p, int newprio) +void br_stp_set_port_priority(struct net_bridge_port *p, u8 newprio) { - __u16 new_port_id; - - p->priority = newprio & 0xFF; - new_port_id = br_make_port_id(p); + port_id new_port_id = br_make_port_id(newprio, p->port_no); if (br_is_designated_port(p)) p->designated_port = new_port_id; p->port_id = new_port_id; + p->priority = newprio; if (!memcmp(&p->br->bridge_id, &p->designated_bridge, 8) && p->port_id < p->designated_port) { br_become_designated_port(p); @@ -204,7 +206,7 @@ void br_stp_set_port_priority(struct net } /* called under bridge lock */ -void br_stp_set_path_cost(struct net_bridge_port *p, int path_cost) +void br_stp_set_path_cost(struct net_bridge_port *p, u32 path_cost) { p->path_cost = path_cost; br_configuration_update(p->br); --- linux-2.6.6-rc1/net/bridge/br_stp_timer.c 2003-07-27 12:14:40.000000000 -0700 +++ 25/net/bridge/br_stp_timer.c 2004-04-18 22:25:25.052021992 -0700 @@ -14,9 +14,8 @@ */ #include -#include #include -#include + #include "br_private.h" #include "br_private_stp.h" --- linux-2.6.6-rc1/net/core/dev.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/core/dev.c 2004-04-18 22:25:31.299072296 -0700 @@ -1125,7 +1125,7 @@ int call_netdevice_notifiers(unsigned lo void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; - do_gettimeofday(&skb->stamp); + net_timestamp(&skb->stamp); rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -1515,7 +1515,6 @@ static void sample_queue(unsigned long d } #endif - /** * netif_rx - post buffer to the network code * @skb: buffer to post @@ -1546,9 +1545,9 @@ int netif_rx(struct sk_buff *skb) return NET_RX_DROP; } #endif - + if (!skb->stamp.tv_sec) - do_gettimeofday(&skb->stamp); + net_timestamp(&skb->stamp); /* * The code is rearranged so that the path is the most @@ -1710,7 +1709,7 @@ int netif_receive_skb(struct sk_buff *sk #endif if (!skb->stamp.tv_sec) - do_gettimeofday(&skb->stamp); + net_timestamp(&skb->stamp); skb_bond(skb); @@ -1835,7 +1834,6 @@ static void net_rx_action(struct softirq unsigned long start_time = jiffies; int budget = netdev_max_backlog; - local_irq_disable(); while (!list_empty(&queue->poll_list)) { @@ -1861,6 +1859,10 @@ static void net_rx_action(struct softirq dev_put(dev); local_irq_disable(); } + +#ifdef CONFIG_KGDBOE + kgdb_process_breakpoint(); +#endif } out: local_irq_enable(); @@ -1903,7 +1905,7 @@ int register_gifconf(unsigned int family * match. --pb */ -static int dev_ifname(struct ifreq *arg) +static int dev_ifname(struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; @@ -1936,7 +1938,7 @@ static int dev_ifname(struct ifreq *arg) * Thus we will need a 'compatibility mode'. */ -static int dev_ifconf(char *arg) +static int dev_ifconf(char __user *arg) { struct ifconf ifc; struct net_device *dev; @@ -2539,7 +2541,7 @@ static int dev_ifsioc(struct ifreq *ifr, * positive or a negative errno code on error. */ -int dev_ioctl(unsigned int cmd, void *arg) +int dev_ioctl(unsigned int cmd, void __user *arg) { struct ifreq ifr; int ret; @@ -2552,12 +2554,12 @@ int dev_ioctl(unsigned int cmd, void *ar if (cmd == SIOCGIFCONF) { rtnl_shlock(); - ret = dev_ifconf((char *) arg); + ret = dev_ifconf((char __user *) arg); rtnl_shunlock(); return ret; } if (cmd == SIOCGIFNAME) - return dev_ifname((struct ifreq *)arg); + return dev_ifname((struct ifreq __user *)arg); if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; --- linux-2.6.6-rc1/net/core/dv.c 2003-10-08 15:07:10.000000000 -0700 +++ 25/net/core/dv.c 2004-04-18 22:25:25.055021536 -0700 @@ -195,7 +195,7 @@ out: #define DVDBG(a) \ printk(KERN_DEBUG "divert_ioctl() line %d %s\n", __LINE__, (a)) -int divert_ioctl(unsigned int cmd, struct divert_cf *arg) +int divert_ioctl(unsigned int cmd, struct divert_cf __user *arg) { struct divert_cf div_cf; struct divert_blk *div_blk; --- linux-2.6.6-rc1/net/core/ethtool.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/core/ethtool.c 2004-04-18 22:25:25.057021232 -0700 @@ -74,7 +74,7 @@ int ethtool_op_set_tso(struct net_device /* Handlers for each ethtool command */ -static int ethtool_get_settings(struct net_device *dev, void *useraddr) +static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd = { ETHTOOL_GSET }; int err; @@ -91,7 +91,7 @@ static int ethtool_get_settings(struct n return 0; } -static int ethtool_set_settings(struct net_device *dev, void *useraddr) +static int ethtool_set_settings(struct net_device *dev, void __user *useraddr) { struct ethtool_cmd cmd; @@ -104,7 +104,7 @@ static int ethtool_set_settings(struct n return dev->ethtool_ops->set_settings(dev, &cmd); } -static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr) +static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) { struct ethtool_drvinfo info; struct ethtool_ops *ops = dev->ethtool_ops; @@ -130,7 +130,7 @@ static int ethtool_get_drvinfo(struct ne return 0; } -static int ethtool_get_regs(struct net_device *dev, char *useraddr) +static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) { struct ethtool_regs regs; struct ethtool_ops *ops = dev->ethtool_ops; @@ -166,7 +166,7 @@ static int ethtool_get_regs(struct net_d return ret; } -static int ethtool_get_wol(struct net_device *dev, char *useraddr) +static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) { struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; @@ -180,7 +180,7 @@ static int ethtool_get_wol(struct net_de return 0; } -static int ethtool_set_wol(struct net_device *dev, char *useraddr) +static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) { struct ethtool_wolinfo wol; @@ -193,7 +193,7 @@ static int ethtool_set_wol(struct net_de return dev->ethtool_ops->set_wol(dev, &wol); } -static int ethtool_get_msglevel(struct net_device *dev, char *useraddr) +static int ethtool_get_msglevel(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata = { ETHTOOL_GMSGLVL }; @@ -207,7 +207,7 @@ static int ethtool_get_msglevel(struct n return 0; } -static int ethtool_set_msglevel(struct net_device *dev, char *useraddr) +static int ethtool_set_msglevel(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -229,7 +229,7 @@ static int ethtool_nway_reset(struct net return dev->ethtool_ops->nway_reset(dev); } -static int ethtool_get_link(struct net_device *dev, void *useraddr) +static int ethtool_get_link(struct net_device *dev, void __user *useraddr) { struct ethtool_value edata = { ETHTOOL_GLINK }; @@ -243,7 +243,7 @@ static int ethtool_get_link(struct net_d return 0; } -static int ethtool_get_eeprom(struct net_device *dev, void *useraddr) +static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; struct ethtool_ops *ops = dev->ethtool_ops; @@ -288,7 +288,7 @@ static int ethtool_get_eeprom(struct net return ret; } -static int ethtool_set_eeprom(struct net_device *dev, void *useraddr) +static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; struct ethtool_ops *ops = dev->ethtool_ops; @@ -329,7 +329,7 @@ static int ethtool_set_eeprom(struct net return ret; } -static int ethtool_get_coalesce(struct net_device *dev, void *useraddr) +static int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; @@ -343,7 +343,7 @@ static int ethtool_get_coalesce(struct n return 0; } -static int ethtool_set_coalesce(struct net_device *dev, void *useraddr) +static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce; @@ -356,7 +356,7 @@ static int ethtool_set_coalesce(struct n return dev->ethtool_ops->set_coalesce(dev, &coalesce); } -static int ethtool_get_ringparam(struct net_device *dev, void *useraddr) +static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) { struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; @@ -370,7 +370,7 @@ static int ethtool_get_ringparam(struct return 0; } -static int ethtool_set_ringparam(struct net_device *dev, void *useraddr) +static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { struct ethtool_ringparam ringparam; @@ -383,7 +383,7 @@ static int ethtool_set_ringparam(struct return dev->ethtool_ops->set_ringparam(dev, &ringparam); } -static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr) +static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) { struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; @@ -397,7 +397,7 @@ static int ethtool_get_pauseparam(struct return 0; } -static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr) +static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr) { struct ethtool_pauseparam pauseparam; @@ -410,7 +410,7 @@ static int ethtool_set_pauseparam(struct return dev->ethtool_ops->set_pauseparam(dev, &pauseparam); } -static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr) +static int ethtool_get_rx_csum(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata = { ETHTOOL_GRXCSUM }; @@ -424,7 +424,7 @@ static int ethtool_get_rx_csum(struct ne return 0; } -static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr) +static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -438,7 +438,7 @@ static int ethtool_set_rx_csum(struct ne return 0; } -static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr) +static int ethtool_get_tx_csum(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata = { ETHTOOL_GTXCSUM }; @@ -452,7 +452,7 @@ static int ethtool_get_tx_csum(struct ne return 0; } -static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr) +static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -465,7 +465,7 @@ static int ethtool_set_tx_csum(struct ne return dev->ethtool_ops->set_tx_csum(dev, edata.data); } -static int ethtool_get_sg(struct net_device *dev, char *useraddr) +static int ethtool_get_sg(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata = { ETHTOOL_GSG }; @@ -479,7 +479,7 @@ static int ethtool_get_sg(struct net_dev return 0; } -static int ethtool_set_sg(struct net_device *dev, char *useraddr) +static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -492,7 +492,7 @@ static int ethtool_set_sg(struct net_dev return dev->ethtool_ops->set_sg(dev, edata.data); } -static int ethtool_get_tso(struct net_device *dev, char *useraddr) +static int ethtool_get_tso(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata = { ETHTOOL_GTSO }; @@ -506,7 +506,7 @@ static int ethtool_get_tso(struct net_de return 0; } -static int ethtool_set_tso(struct net_device *dev, char *useraddr) +static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -519,7 +519,7 @@ static int ethtool_set_tso(struct net_de return dev->ethtool_ops->set_tso(dev, edata.data); } -static int ethtool_self_test(struct net_device *dev, char *useraddr) +static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; struct ethtool_ops *ops = dev->ethtool_ops; @@ -552,7 +552,7 @@ static int ethtool_self_test(struct net_ return ret; } -static int ethtool_get_strings(struct net_device *dev, void *useraddr) +static int ethtool_get_strings(struct net_device *dev, void __user *useraddr) { struct ethtool_gstrings gstrings; struct ethtool_ops *ops = dev->ethtool_ops; @@ -599,7 +599,7 @@ static int ethtool_get_strings(struct ne return ret; } -static int ethtool_phys_id(struct net_device *dev, void *useraddr) +static int ethtool_phys_id(struct net_device *dev, void __user *useraddr) { struct ethtool_value id; @@ -612,7 +612,7 @@ static int ethtool_phys_id(struct net_de return dev->ethtool_ops->phys_id(dev, id.data); } -static int ethtool_get_stats(struct net_device *dev, void *useraddr) +static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) { struct ethtool_stats stats; struct ethtool_ops *ops = dev->ethtool_ops; @@ -650,7 +650,7 @@ static int ethtool_get_stats(struct net_ int dev_ethtool(struct ifreq *ifr) { struct net_device *dev = __dev_get_by_name(ifr->ifr_name); - void *useraddr = (void *) ifr->ifr_data; + void __user *useraddr = (void __user *) ifr->ifr_data; u32 ethcmd; /* @@ -740,6 +740,7 @@ int dev_ethtool(struct ifreq *ifr) return -EOPNOTSUPP; } +EXPORT_SYMBOL(dev_ethtool); EXPORT_SYMBOL(ethtool_op_get_link); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_get_tso); --- linux-2.6.6-rc1/net/core/neighbour.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/core/neighbour.c 2004-04-18 22:25:25.058021080 -0700 @@ -1094,7 +1094,7 @@ void pneigh_enqueue(struct neigh_table * kfree_skb(skb); return; } - skb->stamp.tv_sec = 0; + skb->stamp.tv_sec = LOCALLY_ENQUEUED; skb->stamp.tv_usec = now + sched_next; spin_lock(&tbl->proxy_queue.lock); --- linux-2.6.6-rc1/net/core/netfilter.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/core/netfilter.c 2004-04-18 22:25:25.059020928 -0700 @@ -8,8 +8,10 @@ * * February 2000: Modified by James Morris to have 1 queue per protocol. * 15-Mar-2000: Added NF_REPEAT --RR. + * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. */ #include +#include #include #include #include @@ -741,6 +743,72 @@ pull_skb: EXPORT_SYMBOL(skb_ip_make_writable); #endif /*CONFIG_INET*/ +/* Internal logging interface, which relies on the real + LOG target modules */ + +#define NF_LOG_PREFIXLEN 128 + +static nf_logfn *nf_logging[NPROTO]; /* = NULL */ +static int reported = 0; +static spinlock_t nf_log_lock = SPIN_LOCK_UNLOCKED; + +int nf_log_register(int pf, nf_logfn *logfn) +{ + int ret = -EBUSY; + + /* Any setup of logging members must be done before + * substituting pointer. */ + smp_wmb(); + spin_lock(&nf_log_lock); + if (!nf_logging[pf]) { + nf_logging[pf] = logfn; + ret = 0; + } + spin_unlock(&nf_log_lock); + return ret; +} + +void nf_log_unregister(int pf, nf_logfn *logfn) +{ + spin_lock(&nf_log_lock); + if (nf_logging[pf] == logfn) + nf_logging[pf] = NULL; + spin_unlock(&nf_log_lock); + + /* Give time to concurrent readers. */ + synchronize_net(); +} + +void nf_log_packet(int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *fmt, ...) +{ + va_list args; + char prefix[NF_LOG_PREFIXLEN]; + nf_logfn *logfn; + + rcu_read_lock(); + logfn = nf_logging[pf]; + if (logfn) { + va_start(args, fmt); + vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + /* We must read logging before nf_logfn[pf] */ + smp_read_barrier_depends(); + logfn(hooknum, skb, in, out, prefix); + } else if (!reported) { + printk(KERN_WARNING "nf_log_packet: can\'t log yet, " + "no backend logging module loaded in!\n"); + reported++; + } + rcu_read_unlock(); +} +EXPORT_SYMBOL(nf_log_register); +EXPORT_SYMBOL(nf_log_unregister); +EXPORT_SYMBOL(nf_log_packet); /* This does not belong here, but ipt_REJECT needs it if connection tracking in use: without this, connection may not be in hash table, --- linux-2.6.6-rc1/net/core/netpoll.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/core/netpoll.c 2004-04-18 22:25:25.060020776 -0700 @@ -163,21 +163,15 @@ repeat: spin_lock(&np->dev->xmit_lock); np->dev->xmit_lock_owner = smp_processor_id(); - if (netif_queue_stopped(np->dev)) { - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); - - netpoll_poll(np); - goto repeat; - } - status = np->dev->hard_start_xmit(skb, np->dev); np->dev->xmit_lock_owner = -1; spin_unlock(&np->dev->xmit_lock); /* transmit busy */ - if(status) + if(status) { + netpoll_poll(np); goto repeat; + } } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -231,9 +225,8 @@ void netpoll_send_udp(struct netpoll *np static void arp_reply(struct sk_buff *skb) { - struct in_device *in_dev = (struct in_device *) skb->dev->ip_ptr; struct arphdr *arp; - unsigned char *arp_ptr, *sha, *tha; + unsigned char *arp_ptr; int size, type = ARPOP_REPLY, ptype = ETH_P_ARP; u32 sip, tip; struct sk_buff *send_skb; @@ -253,7 +246,7 @@ static void arp_reply(struct sk_buff *sk if (!np) return; /* No arp on this interface */ - if (!in_dev || skb->dev->flags & IFF_NOARP) + if (skb->dev->flags & IFF_NOARP) return; if (!pskb_may_pull(skb, (sizeof(struct arphdr) + @@ -270,21 +263,15 @@ static void arp_reply(struct sk_buff *sk arp->ar_op != htons(ARPOP_REQUEST)) return; - arp_ptr= (unsigned char *)(arp+1); - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; + arp_ptr = (unsigned char *)(arp+1) + skb->dev->addr_len; memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - tha = arp_ptr; - arp_ptr += skb->dev->addr_len; + arp_ptr += 4 + skb->dev->addr_len; memcpy(&tip, arp_ptr, 4); /* Should we ignore arp? */ - if (tip != in_dev->ifa_list->ifa_address || - LOOPBACK(tip) || MULTICAST(tip)) + if (tip != htonl(np->local_ip) || LOOPBACK(tip) || MULTICAST(tip)) return; - size = sizeof(struct arphdr) + 2 * (skb->dev->addr_len + 4); send_skb = find_skb(np, size + LL_RESERVED_SPACE(np->dev), LL_RESERVED_SPACE(np->dev)); @@ -325,7 +312,7 @@ static void arp_reply(struct sk_buff *sk arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &tip, 4); arp_ptr += 4; - memcpy(arp_ptr, np->local_mac, np->dev->addr_len); + memcpy(arp_ptr, np->remote_mac, np->dev->addr_len); arp_ptr += np->dev->addr_len; memcpy(arp_ptr, &sip, 4); --- linux-2.6.6-rc1/net/core/sock.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/core/sock.c 2004-04-18 22:25:25.061020624 -0700 @@ -328,6 +328,8 @@ int sock_setsockopt(struct socket *sock, case SO_TIMESTAMP: sk->sk_rcvtstamp = valbool; + if (valbool) + sock_enable_timestamp(sk); break; case SO_RCVLOWAT: @@ -642,6 +644,8 @@ void sk_free(struct sock *sk) sk->sk_filter = NULL; } + sock_disable_timestamp(sk); + if (atomic_read(&sk->sk_omem_alloc)) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); @@ -973,13 +977,13 @@ int sock_no_shutdown(struct socket *sock } int sock_no_setsockopt(struct socket *sock, int level, int optname, - char *optval, int optlen) + char __user *optval, int optlen) { return -EOPNOTSUPP; } int sock_no_getsockopt(struct socket *sock, int level, int optname, - char *optval, int *optlen) + char __user *optval, int __user *optlen) { return -EOPNOTSUPP; } @@ -1135,10 +1139,13 @@ void sock_init_data(struct socket *sock, sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_owner = NULL; + sk->sk_stamp.tv_sec = -1L; + sk->sk_stamp.tv_usec = -1L; + atomic_set(&sk->sk_refcnt, 1); } -void lock_sock(struct sock *sk) +void fastcall lock_sock(struct sock *sk) { might_sleep(); spin_lock_bh(&(sk->sk_lock.slock)); @@ -1150,7 +1157,7 @@ void lock_sock(struct sock *sk) EXPORT_SYMBOL(lock_sock); -void release_sock(struct sock *sk) +void fastcall release_sock(struct sock *sk) { spin_lock_bh(&(sk->sk_lock.slock)); if (sk->sk_backlog.tail) @@ -1160,9 +1167,42 @@ void release_sock(struct sock *sk) wake_up(&(sk->sk_lock.wq)); spin_unlock_bh(&(sk->sk_lock.slock)); } - EXPORT_SYMBOL(release_sock); +/* When > 0 there are consumers of rx skb time stamps */ +atomic_t netstamp_needed = ATOMIC_INIT(0); + +int sock_get_timestamp(struct sock *sk, struct timeval *userstamp) +{ + if (!sock_flag(sk, SOCK_TIMESTAMP)) + sock_enable_timestamp(sk); + if (sk->sk_stamp.tv_sec == -1) + return -ENOENT; + if (sk->sk_stamp.tv_sec == 0) + do_gettimeofday(&sk->sk_stamp); + return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ? + -EFAULT : 0; +} +EXPORT_SYMBOL(sock_get_timestamp); + +void sock_enable_timestamp(struct sock *sk) +{ + if (!sock_flag(sk, SOCK_TIMESTAMP)) { + sock_set_flag(sk, SOCK_TIMESTAMP); + atomic_inc(&netstamp_needed); + } +} +EXPORT_SYMBOL(sock_enable_timestamp); + +void sock_disable_timestamp(struct sock *sk) +{ + if (sock_flag(sk, SOCK_TIMESTAMP)) { + sock_reset_flag(sk, SOCK_TIMESTAMP); + atomic_dec(&netstamp_needed); + } +} +EXPORT_SYMBOL(sock_disable_timestamp); + EXPORT_SYMBOL(__lock_sock); EXPORT_SYMBOL(__release_sock); EXPORT_SYMBOL(sk_alloc); --- linux-2.6.6-rc1/net/econet/af_econet.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/econet/af_econet.c 2004-04-18 22:25:25.062020472 -0700 @@ -665,10 +665,8 @@ static int econet_ioctl(struct socket *s switch(cmd) { case SIOCGSTAMP: - if (!sk->sk_stamp.tv_sec) - return -ENOENT; - return copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval)) ? -EFAULT : 0; + return sock_get_timestamp(sk,(struct timeval *)arg); + case SIOCSIFADDR: case SIOCGIFADDR: return ec_dev_ioctl(sock, cmd, (void *)arg); --- linux-2.6.6-rc1/net/ipv4/af_inet.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv4/af_inet.c 2004-04-18 22:25:25.063020320 -0700 @@ -843,11 +843,7 @@ int inet_ioctl(struct socket *sock, unsi switch (cmd) { case SIOCGSTAMP: - if (!sk->sk_stamp.tv_sec) - err = -ENOENT; - else if (copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval))) - err = -EFAULT; + err = sock_get_timestamp(sk, (struct timeval *)arg); break; case SIOCADDRT: case SIOCDELRT: --- linux-2.6.6-rc1/net/ipv4/arp.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv4/arp.c 2004-04-18 22:25:25.064020168 -0700 @@ -860,7 +860,7 @@ int arp_process(struct sk_buff *skb) if (n) neigh_release(n); - if (skb->stamp.tv_sec == 0 || + if (skb->stamp.tv_sec == LOCALLY_ENQUEUED || skb->pkt_type == PACKET_HOST || in_dev->arp_parms->proxy_delay == 0) { arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); --- linux-2.6.6-rc1/net/ipv4/esp4.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv4/esp4.c 2004-04-18 22:25:25.065020016 -0700 @@ -31,6 +31,7 @@ int esp_output(struct sk_buff *skb) struct esp_data *esp; struct sk_buff *trailer; struct udphdr *uh = NULL; + u32 *udpdata32; struct xfrm_encap_tmpl *encap = NULL; int blksize; int clen; @@ -97,6 +98,14 @@ int esp_output(struct sk_buff *skb) esph = (struct ip_esp_hdr*)(uh+1); top_iph->protocol = IPPROTO_UDP; break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + uh = (struct udphdr*) esph; + udpdata32 = (u32*)(uh+1); + udpdata32[0] = udpdata32[1] = 0; + esph = (struct ip_esp_hdr*)(udpdata32+2); + alen += 2; + top_iph->protocol = IPPROTO_UDP; + break; default: printk(KERN_INFO "esp_output(): Unhandled encap: %u\n", @@ -132,6 +141,14 @@ int esp_output(struct sk_buff *skb) esph = (struct ip_esp_hdr*)(uh+1); top_iph->protocol = IPPROTO_UDP; break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + uh = (struct udphdr*) esph; + udpdata32 = (u32*)(uh+1); + udpdata32[0] = udpdata32[1] = 0; + esph = (struct ip_esp_hdr*)(udpdata32+2); + alen += 2; + top_iph->protocol = IPPROTO_UDP; + break; default: printk(KERN_INFO "esp_output(): Unhandled encap: %u\n", @@ -294,6 +311,7 @@ int esp_input(struct xfrm_state *x, stru switch (decap->decap_type) { case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: if ((void*)uh == (void*)esph) { printk(KERN_DEBUG @@ -354,6 +372,7 @@ int esp_post_input(struct xfrm_state *x, switch (encap->encap_type) { case UDP_ENCAP_ESPINUDP: + case UDP_ENCAP_ESPINUDP_NON_IKE: /* * 1) if the NAT-T peer's IP or port changed then * advertize the change to the keying daemon. @@ -428,8 +447,8 @@ void esp4_err(struct sk_buff *skb, u32 i x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", - ntohl(esph->spi), ntohl(iph->daddr)); + NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", + ntohl(esph->spi), ntohl(iph->daddr))); xfrm_state_put(x); } @@ -492,10 +511,10 @@ int esp_init_state(struct xfrm_state *x, if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_tfm_alg_digestsize(esp->auth.tfm)) { - printk(KERN_INFO "ESP: %s digestsize %u != %hu\n", + NETDEBUG(printk(KERN_INFO "ESP: %s digestsize %u != %hu\n", x->aalg->alg_name, crypto_tfm_alg_digestsize(esp->auth.tfm), - aalg_desc->uinfo.auth.icv_fullbits/8); + aalg_desc->uinfo.auth.icv_fullbits/8)); goto error; } @@ -534,6 +553,9 @@ int esp_init_state(struct xfrm_state *x, case UDP_ENCAP_ESPINUDP: x->props.header_len += sizeof(struct udphdr); break; + case UDP_ENCAP_ESPINUDP_NON_IKE: + x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32); + break; default: printk (KERN_INFO "esp_init_state(): Unhandled encap type: %u\n", --- linux-2.6.6-rc1/net/ipv4/icmp.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv4/icmp.c 2004-04-18 22:25:25.066019864 -0700 @@ -620,11 +620,11 @@ static void icmp_unreach(struct sk_buff break; case ICMP_FRAG_NEEDED: if (ipv4_config.no_pmtu_disc) { - if (net_ratelimit()) + LIMIT_NETDEBUG( printk(KERN_INFO "ICMP: %u.%u.%u.%u: " "fragmentation needed " "and DF set.\n", - NIPQUAD(iph->daddr)); + NIPQUAD(iph->daddr))); } else { info = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu)); @@ -633,10 +633,10 @@ static void icmp_unreach(struct sk_buff } break; case ICMP_SR_FAILED: - if (net_ratelimit()) + LIMIT_NETDEBUG( printk(KERN_INFO "ICMP: %u.%u.%u.%u: Source " "Route Failed.\n", - NIPQUAD(iph->daddr)); + NIPQUAD(iph->daddr))); break; default: break; --- linux-2.6.6-rc1/net/ipv4/igmp.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/ipv4/igmp.c 2004-04-18 22:25:25.068019560 -0700 @@ -2223,7 +2223,9 @@ static int igmp_mc_seq_show(struct seq_f struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); char *querier; #ifdef CONFIG_IP_MULTICAST - querier = IGMP_V1_SEEN(state->in_dev) ? "V1" : "V2"; + querier = IGMP_V1_SEEN(state->in_dev) ? "V1" : + IGMP_V2_SEEN(state->in_dev) ? "V2" : + "V3"; #else querier = "NONE"; #endif @@ -2236,7 +2238,9 @@ static int igmp_mc_seq_show(struct seq_f seq_printf(seq, "\t\t\t\t%08lX %5d %d:%08lX\t\t%d\n", im->multiaddr, im->users, - im->tm_running, jiffies_to_clock_t(im->timer.expires-jiffies), im->reporter); + im->tm_running, im->tm_running ? + jiffies_to_clock_t(im->timer.expires-jiffies) : 0, + im->reporter); } return 0; } --- linux-2.6.6-rc1/net/ipv4/ipcomp.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/ipv4/ipcomp.c 2004-04-18 22:25:25.068019560 -0700 @@ -258,8 +258,8 @@ static void ipcomp4_err(struct sk_buff * spi, IPPROTO_COMP, AF_INET); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", - spi, NIPQUAD(iph->daddr)); + NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n", + spi, NIPQUAD(iph->daddr))); xfrm_state_put(x); } --- linux-2.6.6-rc1/net/ipv4/netfilter/ip_conntrack_amanda.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv4/netfilter/ip_conntrack_amanda.c 2004-04-18 22:25:25.070019256 -0700 @@ -46,10 +46,11 @@ static DECLARE_LOCK(amanda_buffer_lock); static int help(struct sk_buff *skb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { - struct ip_conntrack_expect exp; + struct ip_conntrack_expect *exp; struct ip_ct_amanda_expect *exp_amanda_info; char *data, *data_limit, *tmp; unsigned int dataoff, i; + u_int16_t port, len; /* Only look at packets from the Amanda server */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) @@ -79,33 +80,40 @@ static int help(struct sk_buff *skb, goto out; data += strlen("CONNECT "); - memset(&exp, 0, sizeof(exp)); - exp.tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - exp.tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - exp.tuple.dst.protonum = IPPROTO_TCP; - exp.mask.src.ip = 0xFFFFFFFF; - exp.mask.dst.ip = 0xFFFFFFFF; - exp.mask.dst.protonum = 0xFFFF; - exp.mask.dst.u.tcp.port = 0xFFFF; - /* Only search first line. */ if ((tmp = strchr(data, '\n'))) *tmp = '\0'; - exp_amanda_info = &exp.help.exp_amanda_info; for (i = 0; i < ARRAY_SIZE(conns); i++) { char *match = strstr(data, conns[i]); if (!match) continue; tmp = data = match + strlen(conns[i]); - exp_amanda_info->offset = data - amanda_buffer; - exp_amanda_info->port = simple_strtoul(data, &data, 10); - exp_amanda_info->len = data - tmp; - if (exp_amanda_info->port == 0 || exp_amanda_info->len > 5) + port = simple_strtoul(data, &data, 10); + len = data - tmp; + if (port == 0 || len > 5) break; - exp.tuple.dst.u.tcp.port = htons(exp_amanda_info->port); - ip_conntrack_expect_related(ct, &exp); + exp = ip_conntrack_expect_alloc(); + if (exp == NULL) + goto out; + + exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + exp->tuple.dst.protonum = IPPROTO_TCP; + exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.dst.ip = 0xFFFFFFFF; + exp->mask.dst.protonum = 0xFFFF; + exp->mask.dst.u.tcp.port = 0xFFFF; + + exp_amanda_info = &exp->help.exp_amanda_info; + exp_amanda_info->offset = data - amanda_buffer; + exp_amanda_info->port = port; + exp_amanda_info->len = len; + + exp->tuple.dst.u.tcp.port = htons(port); + + ip_conntrack_expect_related(exp, ct); } out: --- linux-2.6.6-rc1/net/ipv4/netfilter/ip_conntrack_core.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv4/netfilter/ip_conntrack_core.c 2004-04-18 22:25:25.072018952 -0700 @@ -67,6 +67,7 @@ int ip_conntrack_max; static atomic_t ip_conntrack_count = ATOMIC_INIT(0); struct list_head *ip_conntrack_hash; static kmem_cache_t *ip_conntrack_cachep; +struct ip_conntrack ip_conntrack_untracked; extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; @@ -794,6 +795,15 @@ unsigned int ip_conntrack_in(unsigned in int set_reply; int ret; + /* Never happen */ + if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { + if (net_ratelimit()) { + printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n", + (*pskb)->nh.iph->protocol, hooknum); + } + return NF_DROP; + } + /* FIXME: Do this right please. --RR */ (*pskb)->nfcache |= NFC_UNKNOWN; @@ -812,18 +822,10 @@ unsigned int ip_conntrack_in(unsigned in } #endif - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ + /* Previously seen (loopback or untracked)? Ignore. */ if ((*pskb)->nfct) return NF_ACCEPT; - /* Gather fragments. */ - if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { - *pskb = ip_ct_gather_frags(*pskb); - if (!*pskb) - return NF_STOLEN; - } - proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); /* It may be an icmp error... */ @@ -917,11 +919,55 @@ static void expectation_timed_out(unsign WRITE_UNLOCK(&ip_conntrack_lock); } +struct ip_conntrack_expect * +ip_conntrack_expect_alloc() +{ + struct ip_conntrack_expect *new; + + new = (struct ip_conntrack_expect *) + kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC); + if (!new) { + DEBUGP("expect_related: OOM allocating expect\n"); + return NULL; + } + + /* tuple_cmp compares whole union, we have to initialized cleanly */ + memset(new, 0, sizeof(struct ip_conntrack_expect)); + + return new; +} + +static void +ip_conntrack_expect_insert(struct ip_conntrack_expect *new, + struct ip_conntrack *related_to) +{ + DEBUGP("new expectation %p of conntrack %p\n", new, related_to); + new->expectant = related_to; + new->sibling = NULL; + atomic_set(&new->use, 1); + + /* add to expected list for this connection */ + list_add(&new->expected_list, &related_to->sibling_list); + /* add to global list of expectations */ + + list_prepend(&ip_conntrack_expect_list, &new->list); + /* add and start timer if required */ + if (related_to->helper->timeout) { + init_timer(&new->timeout); + new->timeout.data = (unsigned long)new; + new->timeout.function = expectation_timed_out; + new->timeout.expires = jiffies + + related_to->helper->timeout * HZ; + add_timer(&new->timeout); + } + related_to->expecting++; +} + /* Add a related connection. */ -int ip_conntrack_expect_related(struct ip_conntrack *related_to, - struct ip_conntrack_expect *expect) +int ip_conntrack_expect_related(struct ip_conntrack_expect *expect, + struct ip_conntrack *related_to) { - struct ip_conntrack_expect *old, *new; + struct ip_conntrack_expect *old; int ret = 0; WRITE_LOCK(&ip_conntrack_lock); @@ -943,7 +989,7 @@ int ip_conntrack_expect_related(struct i if (related_to->helper->timeout) { if (!del_timer(&old->timeout)) { /* expectation is dying. Fall through */ - old = NULL; + goto out; } else { old->timeout.expires = jiffies + related_to->helper->timeout * HZ; @@ -951,10 +997,10 @@ int ip_conntrack_expect_related(struct i } } - if (old) { - WRITE_UNLOCK(&ip_conntrack_lock); - return -EEXIST; - } + WRITE_UNLOCK(&ip_conntrack_lock); + kfree(expect); + return -EEXIST; + } else if (related_to->helper->max_expected && related_to->expecting >= related_to->helper->max_expected) { struct list_head *cur_item; @@ -971,6 +1017,7 @@ int ip_conntrack_expect_related(struct i related_to->helper->name, NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); + kfree(expect); return -EPERM; } DEBUGP("ip_conntrack: max number of expected " @@ -1010,37 +1057,12 @@ int ip_conntrack_expect_related(struct i &expect->mask)) { WRITE_UNLOCK(&ip_conntrack_lock); DEBUGP("expect_related: busy!\n"); + + kfree(expect); return -EBUSY; } - - new = (struct ip_conntrack_expect *) - kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC); - if (!new) { - WRITE_UNLOCK(&ip_conntrack_lock); - DEBUGP("expect_relaed: OOM allocating expect\n"); - return -ENOMEM; - } - - DEBUGP("new expectation %p of conntrack %p\n", new, related_to); - memcpy(new, expect, sizeof(*expect)); - new->expectant = related_to; - new->sibling = NULL; - atomic_set(&new->use, 1); - - /* add to expected list for this connection */ - list_add(&new->expected_list, &related_to->sibling_list); - /* add to global list of expectations */ - list_prepend(&ip_conntrack_expect_list, &new->list); - /* add and start timer if required */ - if (related_to->helper->timeout) { - init_timer(&new->timeout); - new->timeout.data = (unsigned long)new; - new->timeout.function = expectation_timed_out; - new->timeout.expires = jiffies + - related_to->helper->timeout * HZ; - add_timer(&new->timeout); - } - related_to->expecting++; + +out: ip_conntrack_expect_insert(expect, related_to); WRITE_UNLOCK(&ip_conntrack_lock); @@ -1158,18 +1180,18 @@ void ip_ct_refresh(struct ip_conntrack * { IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); - WRITE_LOCK(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) ct->timeout.expires = extra_jiffies; else { + WRITE_LOCK(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); } + WRITE_UNLOCK(&ip_conntrack_lock); } - WRITE_UNLOCK(&ip_conntrack_lock); } /* Returns new sk_buff, or NULL */ @@ -1422,6 +1444,18 @@ int __init ip_conntrack_init(void) /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach; + + /* Set up fake conntrack: + - to never be deleted, not in any hashes */ + atomic_set(&ip_conntrack_untracked.ct_general.use, 1); + /* - and look it like as a confirmed connection */ + set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status); + /* - and prepare the ctinfo field for REJECT & NAT. */ + ip_conntrack_untracked.infos[IP_CT_NEW].master = + ip_conntrack_untracked.infos[IP_CT_RELATED].master = + ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master = + &ip_conntrack_untracked.ct_general; + return ret; err_free_hash: --- linux-2.6.6-rc1/net/ipv4/netfilter/ip_conntrack_ftp.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv4/netfilter/ip_conntrack_ftp.c 2004-04-18 22:25:25.073018800 -0700 @@ -256,8 +256,8 @@ static int help(struct sk_buff *skb, int dir = CTINFO2DIR(ctinfo); unsigned int matchlen, matchoff; struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info; - struct ip_conntrack_expect expect, *exp = &expect; - struct ip_ct_ftp_expect *exp_ftp_info = &exp->help.exp_ftp_info; + struct ip_conntrack_expect *exp; + struct ip_ct_ftp_expect *exp_ftp_info; unsigned int i; int found = 0; @@ -346,8 +346,15 @@ static int help(struct sk_buff *skb, DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", (int)matchlen, data + matchoff, matchlen, ntohl(tcph.seq) + matchoff); - - memset(&expect, 0, sizeof(expect)); + + /* Allocate expectation which will be inserted */ + exp = ip_conntrack_expect_alloc(); + if (exp == NULL) { + ret = NF_ACCEPT; + goto out; + } + + exp_ftp_info = &exp->help.exp_ftp_info; /* Update the ftp info */ if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]) @@ -389,7 +396,7 @@ static int help(struct sk_buff *skb, exp->expectfn = NULL; /* Ignore failure; should only happen with NAT */ - ip_conntrack_expect_related(ct, &expect); + ip_conntrack_expect_related(exp, ct); ret = NF_ACCEPT; out: UNLOCK_BH(&ip_ftp_lock); --- linux-2.6.6-rc1/net/ipv4/netfilter/ip_conntrack_irc.c 2003-10-08 15:07:10.000000000 -0700 +++ 25/net/ipv4/netfilter/ip_conntrack_irc.c 2004-04-18 22:25:25.073018800 -0700 @@ -60,8 +60,8 @@ DECLARE_LOCK(ip_irc_lock); struct module *ip_conntrack_irc = THIS_MODULE; #if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG __FILE__ ":" __FUNCTION__ \ - ":" format, ## args) +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \ + __FILE__, __FUNCTION__ , ## args) #else #define DEBUGP(format, args...) #endif @@ -106,8 +106,8 @@ static int help(struct sk_buff *skb, struct tcphdr tcph; char *data, *data_limit; int dir = CTINFO2DIR(ctinfo); - struct ip_conntrack_expect expect, *exp = &expect; - struct ip_ct_irc_expect *exp_irc_info = &exp->help.exp_irc_info; + struct ip_conntrack_expect *exp; + struct ip_ct_irc_expect *exp_irc_info = NULL; u_int32_t dcc_ip; u_int16_t dcc_port; @@ -190,8 +190,12 @@ static int help(struct sk_buff *skb, continue; } - - memset(&expect, 0, sizeof(expect)); + + exp = ip_conntrack_expect_alloc(); + if (exp == NULL) + goto out; + + exp_irc_info = &exp->help.exp_irc_info; /* save position of address in dcc string, * necessary for NAT */ @@ -218,7 +222,7 @@ static int help(struct sk_buff *skb, NIPQUAD(exp->tuple.dst.ip), ntohs(exp->tuple.dst.u.tcp.port)); - ip_conntrack_expect_related(ct, &expect); + ip_conntrack_expect_related(exp, ct); goto out; } /* for .. NUM_DCCPROTO */ --- linux-2.6.6-rc1/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv4/netfilter/ip_conntrack_proto_tcp.c 2004-04-18 22:25:25.074018648 -0700 @@ -178,6 +178,16 @@ static int tcp_packet(struct ip_conntrac if (skb_copy_bits(skb, skb->nh.iph->ihl * 4, &tcph, sizeof(tcph)) != 0) return -1; + /* If only reply is a RST, we can consider ourselves not to + have an established connection: this is a fairly common + problem case, so we can delete the conntrack + immediately. --RR */ + if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && tcph.rst) { + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long)conntrack); + return NF_ACCEPT; + } + WRITE_LOCK(&tcp_lock); oldtcpstate = conntrack->proto.tcp.state; newconntrack @@ -199,29 +209,21 @@ static int tcp_packet(struct ip_conntrac /* Poor man's window tracking: record SYN/ACK for handshake check */ if (oldtcpstate == TCP_CONNTRACK_SYN_SENT && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY - && tcph.syn && tcph.ack) + && tcph.syn && tcph.ack) { conntrack->proto.tcp.handshake_ack = htonl(ntohl(tcph.seq) + 1); + goto out; + } - /* If only reply is a RST, we can consider ourselves not to - have an established connection: this is a fairly common - problem case, so we can delete the conntrack - immediately. --RR */ - if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) && tcph.rst) { - WRITE_UNLOCK(&tcp_lock); - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long)conntrack); - } else { - /* Set ASSURED if we see see valid ack in ESTABLISHED after SYN_RECV */ - if (oldtcpstate == TCP_CONNTRACK_SYN_RECV - && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL - && tcph.ack && !tcph.syn - && tcph.ack_seq == conntrack->proto.tcp.handshake_ack) - set_bit(IPS_ASSURED_BIT, &conntrack->status); + /* Set ASSURED if we see valid ack in ESTABLISHED after SYN_RECV */ + if (oldtcpstate == TCP_CONNTRACK_SYN_RECV + && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL + && tcph.ack && !tcph.syn + && tcph.ack_seq == conntrack->proto.tcp.handshake_ack) + set_bit(IPS_ASSURED_BIT, &conntrack->status); - WRITE_UNLOCK(&tcp_lock); - ip_ct_refresh(conntrack, *tcp_timeouts[newconntrack]); - } +out: WRITE_UNLOCK(&tcp_lock); + ip_ct_refresh(conntrack, *tcp_timeouts[newconntrack]); return NF_ACCEPT; } --- linux-2.6.6-rc1/net/ipv4/netfilter/ip_conntrack_standalone.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv4/netfilter/ip_conntrack_standalone.c 2004-04-18 22:25:25.076018344 -0700 @@ -194,6 +194,26 @@ static unsigned int ip_confirm(unsigned return ip_conntrack_confirm(*pskb); } +static unsigned int ip_conntrack_defrag(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + /* Gather fragments. */ + if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + if (!*pskb) + return NF_STOLEN; + } + return NF_ACCEPT; +} + static unsigned int ip_refrag(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, @@ -236,6 +256,14 @@ static unsigned int ip_conntrack_local(u /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ +static struct nf_hook_ops ip_conntrack_defrag_ops = { + .hook = ip_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, +}; + static struct nf_hook_ops ip_conntrack_in_ops = { .hook = ip_conntrack_in, .owner = THIS_MODULE, @@ -244,6 +272,14 @@ static struct nf_hook_ops ip_conntrack_i .priority = NF_IP_PRI_CONNTRACK, }; +static struct nf_hook_ops ip_conntrack_defrag_local_out_ops = { + .hook = ip_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, +}; + static struct nf_hook_ops ip_conntrack_local_out_ops = { .hook = ip_conntrack_local, .owner = THIS_MODULE, @@ -470,10 +506,20 @@ static int init_or_cleanup(int init) if (!proc) goto cleanup_init; proc->owner = THIS_MODULE; + ret = nf_register_hook(&ip_conntrack_defrag_ops); + if (ret < 0) { + printk("ip_conntrack: can't register pre-routing defrag hook.\n"); + goto cleanup_proc; + } + ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops); + if (ret < 0) { + printk("ip_conntrack: can't register local_out defrag hook.\n"); + goto cleanup_defragops; + } ret = nf_register_hook(&ip_conntrack_in_ops); if (ret < 0) { printk("ip_conntrack: can't register pre-routing hook.\n"); - goto cleanup_proc; + goto cleanup_defraglocalops; } ret = nf_register_hook(&ip_conntrack_local_out_ops); if (ret < 0) { @@ -511,6 +557,10 @@ static int init_or_cleanup(int init) nf_unregister_hook(&ip_conntrack_local_out_ops); cleanup_inops: nf_unregister_hook(&ip_conntrack_in_ops); + cleanup_defraglocalops: + nf_unregister_hook(&ip_conntrack_defrag_local_out_ops); + cleanup_defragops: + nf_unregister_hook(&ip_conntrack_defrag_ops); cleanup_proc: proc_net_remove("ip_conntrack"); cleanup_init: @@ -591,6 +641,7 @@ EXPORT_SYMBOL(ip_ct_refresh); EXPORT_SYMBOL(ip_ct_find_proto); EXPORT_SYMBOL(__ip_ct_find_proto); EXPORT_SYMBOL(ip_ct_find_helper); +EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_change_expect); EXPORT_SYMBOL(ip_conntrack_unexpect_related); @@ -602,5 +653,6 @@ EXPORT_SYMBOL(ip_conntrack_htable_size); EXPORT_SYMBOL(ip_conntrack_expect_list); EXPORT_SYMBOL(ip_conntrack_lock); EXPORT_SYMBOL(ip_conntrack_hash); +EXPORT_SYMBOL(ip_conntrack_untracked); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); EXPORT_SYMBOL_GPL(ip_conntrack_put); --- linux-2.6.6-rc1/net/ipv4/netfilter/ip_conntrack_tftp.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv4/netfilter/ip_conntrack_tftp.c 2004-04-18 22:25:25.077018192 -0700 @@ -33,8 +33,8 @@ MODULE_PARM_DESC(ports, "port numbers of #endif #if 0 -#define DEBUGP(format, args...) printk(__FILE__ ":" __FUNCTION__ ": " \ - format, ## args) +#define DEBUGP(format, args...) printk("%s:%s:" format, \ + __FILE__, __FUNCTION__ , ## args) #else #define DEBUGP(format, args...) #endif @@ -44,7 +44,7 @@ static int tftp_help(struct sk_buff *skb enum ip_conntrack_info ctinfo) { struct tftphdr tftph; - struct ip_conntrack_expect exp; + struct ip_conntrack_expect *exp; if (skb_copy_bits(skb, skb->nh.iph->ihl * 4 + sizeof(struct udphdr), &tftph, sizeof(tftph)) != 0) @@ -57,19 +57,29 @@ static int tftp_help(struct sk_buff *skb DEBUGP(""); DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - memset(&exp, 0, sizeof(exp)); - exp.tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp.mask.src.ip = 0xffffffff; - exp.mask.dst.ip = 0xffffffff; - exp.mask.dst.u.udp.port = 0xffff; - exp.mask.dst.protonum = 0xffff; - exp.expectfn = NULL; + exp = ip_conntrack_expect_alloc(); + if (exp == NULL) + return NF_ACCEPT; + + exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + exp->mask.src.ip = 0xffffffff; + exp->mask.dst.ip = 0xffffffff; + exp->mask.dst.u.udp.port = 0xffff; + exp->mask.dst.protonum = 0xffff; + exp->expectfn = NULL; DEBUGP("expect: "); - DUMP_TUPLE(&exp.tuple); - DUMP_TUPLE(&exp.mask); - ip_conntrack_expect_related(ct, &exp); + DUMP_TUPLE(&exp->tuple); + DUMP_TUPLE(&exp->mask); + ip_conntrack_expect_related(exp, ct); + break; + case TFTP_OPCODE_DATA: + case TFTP_OPCODE_ACK: + DEBUGP("Data/ACK opcode\n"); + break; + case TFTP_OPCODE_ERROR: + DEBUGP("Error opcode\n"); break; default: DEBUGP("Unknown opcode\n"); --- linux-2.6.6-rc1/net/ipv4/netfilter/ip_nat_core.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv4/netfilter/ip_nat_core.c 2004-04-18 22:25:25.077018192 -0700 @@ -1016,6 +1016,10 @@ int __init ip_nat_init(void) /* FIXME: Man, this is a hack. */ IP_NF_ASSERT(ip_conntrack_destroyed == NULL); ip_conntrack_destroyed = &ip_nat_cleanup_conntrack; + + /* Initialize fake conntrack so that NAT will skip it */ + ip_conntrack_untracked.nat.info.initialized |= + (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST); return 0; } --- linux-2.6.6-rc1/net/ipv4/netfilter/ip_nat_tftp.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv4/netfilter/ip_nat_tftp.c 2004-04-18 22:25:25.078018040 -0700 @@ -47,8 +47,8 @@ MODULE_PARM_DESC(ports, "port numbers of #endif #if 0 -#define DEBUGP(format, args...) printk(__FILE__ ":" __FUNCTION__ ": " \ - format, ## args) +#define DEBUGP(format, args...) printk("%s:%s:" format, \ + __FILE__, __FUNCTION__ , ## args) #else #define DEBUGP(format, args...) #endif --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/net/ipv4/netfilter/iptable_raw.c 2004-04-18 22:25:25.082017432 -0700 @@ -0,0 +1,149 @@ +/* + * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT . + * + * Copyright (C) 2003 Jozsef Kadlecsik + */ +#include +#include + +#define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) + +/* Standard entry. */ +struct ipt_standard +{ + struct ipt_entry entry; + struct ipt_standard_target target; +}; + +struct ipt_error_target +{ + struct ipt_entry_target target; + char errorname[IPT_FUNCTION_MAXNAMELEN]; +}; + +struct ipt_error +{ + struct ipt_entry entry; + struct ipt_error_target target; +}; + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[2]; + struct ipt_error term; +} initial_table __initdata += { { "raw", RAW_VALID_HOOKS, 3, + sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + 0, NULL, { } }, + { + /* PRE_ROUTING */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } }, + /* LOCAL_OUT */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, { 0, 0 }, { } }, + { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } } + }, + /* ERROR */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_error), + 0, { 0, 0 }, { } }, + { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, + { } }, + "ERROR" + } + } +}; + +static struct ipt_table packet_raw = { + .name = "raw", + .table = &initial_table.repl, + .valid_hooks = RAW_VALID_HOOKS, + .lock = RW_LOCK_UNLOCKED, + .me = THIS_MODULE +}; + +/* The work comes in here from netfilter.c. */ +static unsigned int +ipt_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); +} + +/* 'raw' is the very first table. */ +static struct nf_hook_ops ipt_ops[] = { + { + .hook = ipt_hook, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_RAW + }, + { + .hook = ipt_hook, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_RAW + }, +}; + +static int __init init(void) +{ + int ret; + + /* Register table */ + ret = ipt_register_table(&packet_raw); + if (ret < 0) + return ret; + + /* Register hooks */ + ret = nf_register_hook(&ipt_ops[0]); + if (ret < 0) + goto cleanup_table; + + ret = nf_register_hook(&ipt_ops[1]); + if (ret < 0) + goto cleanup_hook0; + + return ret; + + cleanup_hook0: + nf_unregister_hook(&ipt_ops[0]); + cleanup_table: + ipt_unregister_table(&packet_raw); + + return ret; +} + +static void __exit fini(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++) + nf_unregister_hook(&ipt_ops[i]); + + ipt_unregister_table(&packet_raw); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); --- linux-2.6.6-rc1/net/ipv4/netfilter/ipt_conntrack.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv4/netfilter/ipt_conntrack.c 2004-04-18 22:25:25.081017584 -0700 @@ -35,11 +35,13 @@ match(const struct sk_buff *skb, #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) - if (ct) - statebit = IPT_CONNTRACK_STATE_BIT(ctinfo); - else - statebit = IPT_CONNTRACK_STATE_INVALID; - + if (skb->nfct == &ip_conntrack_untracked.infos[IP_CT_NEW]) + statebit = IPT_CONNTRACK_STATE_UNTRACKED; + else if (ct) + statebit = IPT_CONNTRACK_STATE_BIT(ctinfo); + else + statebit = IPT_CONNTRACK_STATE_INVALID; + if(sinfo->flags & IPT_CONNTRACK_STATE) { if (ct) { if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip != --- linux-2.6.6-rc1/net/ipv4/netfilter/ipt_LOG.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv4/netfilter/ipt_LOG.c 2004-04-18 22:25:25.079017888 -0700 @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -26,6 +27,10 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables syslog logging module"); +static unsigned int nflog = 1; +MODULE_PARM(nflog, "i"); +MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); + #if 0 #define DEBUGP printk #else @@ -324,28 +329,25 @@ static void dump_packet(const struct ipt /* maxlen = 230+ 91 + 230 + 252 = 803 */ } -static unsigned int -ipt_log_target(struct sk_buff **pskb, +static void +ipt_log_packet(unsigned int hooknum, + const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) + const struct ipt_log_info *loginfo, + const char *level_string, + const char *prefix) { - const struct ipt_log_info *loginfo = targinfo; - char level_string[4] = "< >"; - - level_string[1] = '0' + (loginfo->level % 8); spin_lock_bh(&log_lock); printk(level_string); printk("%sIN=%s OUT=%s ", - loginfo->prefix, + prefix == NULL ? loginfo->prefix : prefix, in ? in->name : "", out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER - if ((*pskb)->nf_bridge) { - struct net_device *physindev = (*pskb)->nf_bridge->physindev; - struct net_device *physoutdev = (*pskb)->nf_bridge->physoutdev; + if (skb->nf_bridge) { + struct net_device *physindev = skb->nf_bridge->physindev; + struct net_device *physoutdev = skb->nf_bridge->physoutdev; if (physindev && in != physindev) printk("PHYSIN=%s ", physindev->name); @@ -357,25 +359,56 @@ ipt_log_target(struct sk_buff **pskb, if (in && !out) { /* MAC logging for input chain only. */ printk("MAC="); - if ((*pskb)->dev && (*pskb)->dev->hard_header_len - && (*pskb)->mac.raw != (void*)(*pskb)->nh.iph) { + if (skb->dev && skb->dev->hard_header_len + && skb->mac.raw != (void*)skb->nh.iph) { int i; - unsigned char *p = (*pskb)->mac.raw; - for (i = 0; i < (*pskb)->dev->hard_header_len; i++,p++) + unsigned char *p = skb->mac.raw; + for (i = 0; i < skb->dev->hard_header_len; i++,p++) printk("%02x%c", *p, - i==(*pskb)->dev->hard_header_len - 1 + i==skb->dev->hard_header_len - 1 ? ' ':':'); } else printk(" "); } - dump_packet(loginfo, *pskb, 0); + dump_packet(loginfo, skb, 0); printk("\n"); spin_unlock_bh(&log_lock); +} + +static unsigned int +ipt_log_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_log_info *loginfo = targinfo; + char level_string[4] = "< >"; + + level_string[1] = '0' + (loginfo->level % 8); + ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); return IPT_CONTINUE; } +static void +ipt_logfn(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + struct ipt_log_info loginfo = { + .level = 0, + .logflags = IPT_LOG_MASK, + .prefix = "" + }; + + ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); +} + static int ipt_log_checkentry(const char *tablename, const struct ipt_entry *e, void *targinfo, @@ -413,11 +446,18 @@ static struct ipt_target ipt_log_reg = { static int __init init(void) { - return ipt_register_target(&ipt_log_reg); + if (ipt_register_target(&ipt_log_reg)) + return -EINVAL; + if (nflog) + nf_log_register(PF_INET, &ipt_logfn); + + return 0; } static void __exit fini(void) { + if (nflog) + nf_log_unregister(PF_INET, &ipt_logfn); ipt_unregister_target(&ipt_log_reg); } --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/net/ipv4/netfilter/ipt_NOTRACK.c 2004-04-18 22:25:25.079017888 -0700 @@ -0,0 +1,75 @@ +/* This is a module which is used for setting up fake conntracks + * on packets so that they are not seen by the conntrack/NAT code. + */ +#include +#include + +#include +#include + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + /* Previously seen (loopback)? Ignore. */ + if ((*pskb)->nfct != NULL) + return IPT_CONTINUE; + + /* Attach fake conntrack entry. + If there is a real ct entry correspondig to this packet, + it'll hang aroun till timing out. We don't deal with it + for performance reasons. JK */ + (*pskb)->nfct = &ip_conntrack_untracked.infos[IP_CT_NEW]; + nf_conntrack_get((*pskb)->nfct); + + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != 0) { + printk(KERN_WARNING "NOTRACK: targinfosize %u != 0\n", + targinfosize); + return 0; + } + + if (strcmp(tablename, "raw") != 0) { + printk(KERN_WARNING "NOTRACK: can only be called from \"raw\" table, not \"%s\"\n", tablename); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_notrack_reg = { + .name = "NOTRACK", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_notrack_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_notrack_reg); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); --- linux-2.6.6-rc1/net/ipv4/netfilter/ipt_state.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv4/netfilter/ipt_state.c 2004-04-18 22:25:25.081017584 -0700 @@ -30,7 +30,9 @@ match(const struct sk_buff *skb, enum ip_conntrack_info ctinfo; unsigned int statebit; - if (!ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) + if (skb->nfct == &ip_conntrack_untracked.infos[IP_CT_NEW]) + statebit = IPT_STATE_UNTRACKED; + else if (!ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) statebit = IPT_STATE_INVALID; else statebit = IPT_STATE_BIT(ctinfo); --- linux-2.6.6-rc1/net/ipv4/netfilter/ipt_ULOG.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv4/netfilter/ipt_ULOG.c 2004-04-18 22:25:25.081017584 -0700 @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -64,13 +65,13 @@ MODULE_DESCRIPTION("iptables userspace l #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ #if 0 -#define DEBUGP(format, args...) printk(__FILE__ ":" __FUNCTION__ ":" \ - format, ## args) +#define DEBUGP(format, args...) printk("%s:%s:" format, \ + __FILE__, __FUNCTION__ , ## args) #else #define DEBUGP(format, args...) #endif -#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format, ## args); } while (0) +#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0) static unsigned int nlbufsiz = 4096; MODULE_PARM(nlbufsiz, "i"); @@ -80,6 +81,10 @@ static unsigned int flushtimeout = 10 * MODULE_PARM(flushtimeout, "i"); MODULE_PARM_DESC(flushtimeout, "buffer flush timeout"); +static unsigned int nflog = 1; +MODULE_PARM(nflog, "i"); +MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); + /* global data structures */ typedef struct { @@ -157,17 +162,17 @@ struct sk_buff *ulog_alloc_skb(unsigned return skb; } -static unsigned int ipt_ulog_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, void *userinfo) +static void ipt_ulog_packet(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct ipt_ulog_info *loginfo, + const char *prefix) { ulog_buff_t *ub; ulog_packet_msg_t *pm; size_t size, copy_len; struct nlmsghdr *nlh; - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; /* ffs == find first bit set, necessary because userspace * is already shifting groupnumber, but we need unshifted. @@ -176,8 +181,8 @@ static unsigned int ipt_ulog_target(stru /* calculate the size of the skb needed */ if ((loginfo->copy_range == 0) || - (loginfo->copy_range > (*pskb)->len)) { - copy_len = (*pskb)->len; + (loginfo->copy_range > skb->len)) { + copy_len = skb->len; } else { copy_len = loginfo->copy_range; } @@ -214,19 +219,21 @@ static unsigned int ipt_ulog_target(stru /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = (*pskb)->stamp.tv_sec; - pm->timestamp_usec = (*pskb)->stamp.tv_usec; - pm->mark = (*pskb)->nfmark; + pm->timestamp_sec = skb->stamp.tv_sec; + pm->timestamp_usec = skb->stamp.tv_usec; + pm->mark = skb->nfmark; pm->hook = hooknum; - if (loginfo->prefix[0] != '\0') + if (prefix != NULL) + strncpy(pm->prefix, prefix, sizeof(pm->prefix)); + else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); else *(pm->prefix) = '\0'; if (in && in->hard_header_len > 0 - && (*pskb)->mac.raw != (void *) (*pskb)->nh.iph + && skb->mac.raw != (void *) skb->nh.iph && in->hard_header_len <= ULOG_MAC_LEN) { - memcpy(pm->mac, (*pskb)->mac.raw, in->hard_header_len); + memcpy(pm->mac, skb->mac.raw, in->hard_header_len); pm->mac_len = in->hard_header_len; } else pm->mac_len = 0; @@ -241,8 +248,8 @@ static unsigned int ipt_ulog_target(stru else pm->outdev_name[0] = '\0'; - /* copy_len <= (*pskb)->len, so can't fail. */ - if (skb_copy_bits(*pskb, 0, pm->payload, copy_len) < 0) + /* copy_len <= skb->len, so can't fail. */ + if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) BUG(); /* check if we are building multi-part messages */ @@ -266,8 +273,7 @@ static unsigned int ipt_ulog_target(stru UNLOCK_BH(&ulog_lock); - return IPT_CONTINUE; - + return; nlmsg_failure: PRINTR("ipt_ULOG: error during NLMSG_PUT\n"); @@ -276,8 +282,35 @@ alloc_failure: PRINTR("ipt_ULOG: Error building netlink message\n"); UNLOCK_BH(&ulog_lock); +} + +static unsigned int ipt_ulog_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, void *userinfo) +{ + struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - return IPT_CONTINUE; + ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); + + return IPT_CONTINUE; +} + +static void ipt_logfn(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + struct ipt_ulog_info loginfo = { + .nl_group = ULOG_DEFAULT_NLGROUP, + .copy_range = 0, + .qthreshold = ULOG_DEFAULT_QTHRESHOLD, + .prefix = "" + }; + + ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } static int ipt_ulog_checkentry(const char *tablename, @@ -341,7 +374,9 @@ static int __init init(void) sock_release(nflognl->sk_socket); return -EINVAL; } - + if (nflog) + nf_log_register(PF_INET, &ipt_logfn); + return 0; } @@ -352,6 +387,8 @@ static void __exit fini(void) DEBUGP("ipt_ULOG: cleanup_module\n"); + if (nflog) + nf_log_unregister(PF_INET, &ipt_logfn); ipt_unregister_target(&ipt_ulog_reg); sock_release(nflognl->sk_socket); --- linux-2.6.6-rc1/net/ipv4/netfilter/Kconfig 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/ipv4/netfilter/Kconfig 2004-04-18 22:25:25.069019408 -0700 @@ -579,5 +579,29 @@ config IP_NF_COMPAT_IPFWADM To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_NOTRACK + tristate 'NOTRACK target support' + depends on IP_NF_RAW + help + The NOTRACK target allows a select rule to specify + which packets *not* to enter the conntrack/NAT + subsystem with all the consequences (no ICMP error tracking, + no protocol helpers for the selected packets). + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + +config IP_NF_RAW + tristate 'raw table support (required for NOTRACK/TRACE)' + depends on IP_NF_IPTABLES + help + This option adds a `raw' table to iptables. This table is the very + first in the netfilter framework and hooks in at the PREROUTING + and OUTPUT chains. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + help + endmenu --- linux-2.6.6-rc1/net/ipv4/netfilter/Makefile 2003-09-08 13:58:59.000000000 -0700 +++ 25/net/ipv4/netfilter/Makefile 2004-04-18 22:25:25.070019256 -0700 @@ -38,6 +38,7 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_table obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o @@ -81,6 +82,7 @@ obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o +obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o --- linux-2.6.6-rc1/net/ipv4/udp.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv4/udp.c 2004-04-18 22:25:25.083017280 -0700 @@ -975,6 +975,7 @@ static int udp_encap_rcv(struct sock * s /* Must be an IKE packet.. pass it through */ return 1; + decaps: /* At this point we are sure that this is an ESPinUDP packet, * so we need to remove 'len' bytes from the packet (the UDP * header and optional ESP marker bytes) and then modify the @@ -1002,6 +1003,20 @@ static int udp_encap_rcv(struct sock * s /* and let the caller know to send this into the ESP processor... */ return -1; + case UDP_ENCAP_ESPINUDP_NON_IKE: + /* Check if this is a keepalive packet. If so, eat it. */ + if (len == 1 && udpdata[0] == 0xff) { + return 0; + } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && + udpdata32[0] == 0 && udpdata32[1] == 0) { + + /* ESP Packet with Non-IKE marker */ + len = sizeof(struct udphdr) + 2 * sizeof(u32); + goto decaps; + } else + /* Must be an IKE packet.. pass it through */ + return 1; + default: if (net_ratelimit()) printk(KERN_INFO "udp_encap_rcv(): Unhandled UDP encap type: %u\n", --- linux-2.6.6-rc1/net/ipv6/addrconf.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/ipv6/addrconf.c 2004-04-18 22:25:25.086016824 -0700 @@ -2553,7 +2553,89 @@ rtattr_failure: return -1; } -static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, + u32 pid, u32 seq, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + struct ifa_cacheinfo ci; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); + if (pid) nlh->nlmsg_flags |= NLM_F_MULTI; + ifm = NLMSG_DATA(nlh); + ifm->ifa_family = AF_INET6; + ifm->ifa_prefixlen = 128; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE) + ifm->ifa_scope = RT_SCOPE_SITE; + ifm->ifa_index = ifmca->idev->dev->ifindex; + RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr); + ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ + * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ + * 100 / HZ); + ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ + * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ + * 100 / HZ); + ci.ifa_prefered = INFINITY_LIFE_TIME; + ci.ifa_valid = INFINITY_LIFE_TIME; + RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, + u32 pid, u32 seq, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + struct ifa_cacheinfo ci; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm)); + if (pid) nlh->nlmsg_flags |= NLM_F_MULTI; + ifm = NLMSG_DATA(nlh); + ifm->ifa_family = AF_INET6; + ifm->ifa_prefixlen = 128; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE) + ifm->ifa_scope = RT_SCOPE_SITE; + ifm->ifa_index = ifaca->aca_idev->dev->ifindex; + RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr); + ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ + * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ + * 100 / HZ); + ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ + * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ + * 100 / HZ); + ci.ifa_prefered = INFINITY_LIFE_TIME; + ci.ifa_valid = INFINITY_LIFE_TIME; + RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +enum addr_type_t +{ + UNICAST_ADDR, + MULTICAST_ADDR, + ANYCAST_ADDR, +}; + +static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, + enum addr_type_t type) { int idx, ip_idx; int s_idx, s_ip_idx; @@ -2561,7 +2643,9 @@ static int inet6_dump_ifaddr(struct sk_b struct net_device *dev; struct inet6_dev *idev = NULL; struct inet6_ifaddr *ifa; - + struct ifmcaddr6 *ifmca; + struct ifacaddr6 *ifaca; + s_idx = cb->args[0]; s_ip_idx = ip_idx = cb->args[1]; read_lock(&dev_base_lock); @@ -2575,28 +2659,58 @@ static int inet6_dump_ifaddr(struct sk_b if ((idev = in6_dev_get(dev)) == NULL) continue; read_lock_bh(&idev->lock); - /* unicast address */ - for (ifa = idev->addr_list; ifa; - ifa = ifa->if_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - if ((err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) - goto done; - } - /* temp addr */ + switch (type) { + case UNICAST_ADDR: + /* unicast address */ + for (ifa = idev->addr_list; ifa; + ifa = ifa->if_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + if ((err = inet6_fill_ifaddr(skb, ifa, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) + goto done; + } + /* temp addr */ #ifdef CONFIG_IPV6_PRIVACY - for (ifa = idev->tempaddr_list; ifa; - ifa = ifa->tmp_next, ip_idx++) { - if (ip_idx < s_ip_idx) - continue; - if ((err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) - goto done; - } + for (ifa = idev->tempaddr_list; ifa; + ifa = ifa->tmp_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + if ((err = inet6_fill_ifaddr(skb, ifa, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0) + goto done; + } #endif + break; + case MULTICAST_ADDR: + /* multicast address */ + for (ifmca = idev->mc_list; ifmca; + ifmca = ifmca->next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + if ((err = inet6_fill_ifmcaddr(skb, ifmca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0) + goto done; + } + break; + case ANYCAST_ADDR: + /* anycast address */ + for (ifaca = idev->ac_list; ifaca; + ifaca = ifaca->aca_next, ip_idx++) { + if (ip_idx < s_ip_idx) + continue; + if ((err = inet6_fill_ifacaddr(skb, ifaca, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0) + goto done; + } + break; + default: + break; + } read_unlock_bh(&idev->lock); in6_dev_put(idev); } @@ -2611,6 +2725,25 @@ done: return skb->len; } +static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + enum addr_type_t type = UNICAST_ADDR; + return inet6_dump_addr(skb, cb, type); +} + +static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + enum addr_type_t type = MULTICAST_ADDR; + return inet6_dump_addr(skb, cb, type); +} + + +static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) +{ + enum addr_type_t type = ANYCAST_ADDR; + return inet6_dump_addr(skb, cb, type); +} + static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; @@ -2835,6 +2968,8 @@ static struct rtnetlink_link inet6_rtnet [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, }, [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, }, [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, }, + [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, }, + [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, }, [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, }, [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, --- linux-2.6.6-rc1/net/ipv6/af_inet6.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv6/af_inet6.c 2004-04-18 22:25:25.087016672 -0700 @@ -474,13 +474,7 @@ int inet6_ioctl(struct socket *sock, uns switch(cmd) { case SIOCGSTAMP: - if (!sk->sk_stamp.tv_sec) - return -ENOENT; - err = copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval)); - if (err) - return -EFAULT; - return 0; + return sock_get_timestamp(sk, (struct timeval *)arg); case SIOCADDRT: case SIOCDELRT: --- linux-2.6.6-rc1/net/ipv6/ah6.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/ipv6/ah6.c 2004-04-18 22:25:25.088016520 -0700 @@ -92,8 +92,8 @@ static int ipv6_clear_mutable_options(st *nh_offset = offset; offset += ipv6_optlen(exthdr); if (!zero_out_mutable_opts(exthdr)) { - if (net_ratelimit()) - printk(KERN_WARNING "overrun hopopts\n"); + LIMIT_NETDEBUG( + printk(KERN_WARNING "overrun hopopts\n")); return 0; } nexthdr = exthdr->nexthdr; @@ -112,8 +112,8 @@ static int ipv6_clear_mutable_options(st *nh_offset = offset; offset += ipv6_optlen(exthdr); if (!zero_out_mutable_opts(exthdr)) { - if (net_ratelimit()) - printk(KERN_WARNING "overrun destopt\n"); + LIMIT_NETDEBUG( + printk(KERN_WARNING "overrun destopt\n")); return 0; } nexthdr = exthdr->nexthdr; @@ -130,8 +130,8 @@ static int ipv6_clear_mutable_options(st exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); nextnexthdr = exthdr->nexthdr; if (!zero_out_mutable_opts(exthdr)) { - if (net_ratelimit()) - printk(KERN_WARNING "overrun destopt\n"); + LIMIT_NETDEBUG( + printk(KERN_WARNING "overrun destopt\n")); return 0; } } @@ -162,7 +162,7 @@ int ah6_output(struct sk_buff *skb) } spin_lock_bh(&x->lock); - err = xfrm_check_output(x, skb, AF_INET); + err = xfrm_check_output(x, skb, AF_INET6); if (err) goto error; @@ -322,8 +322,8 @@ int ah6_input(struct xfrm_state *x, stru skb_push(skb, skb->data - skb->nh.raw); ahp->icv(ahp, skb, ah->auth_data); if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { - if (net_ratelimit()) - printk(KERN_WARNING "ipsec ah authentication error\n"); + LIMIT_NETDEBUG( + printk(KERN_WARNING "ipsec ah authentication error\n")); x->stats.integrity_failed++; goto free_out; } @@ -368,9 +368,9 @@ void ah6_err(struct sk_buff *skb, struct if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/" + NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/" "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", - ntohl(ah->spi), NIP6(iph->daddr)); + ntohl(ah->spi), NIP6(iph->daddr))); xfrm_state_put(x); } --- linux-2.6.6-rc1/net/ipv6/datagram.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv6/datagram.c 2004-04-18 22:25:25.088016520 -0700 @@ -427,8 +427,8 @@ int datagram_send_ctl(struct msghdr *msg break; default: - if (net_ratelimit()) - printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type)); err = -EINVAL; break; }; --- linux-2.6.6-rc1/net/ipv6/esp6.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv6/esp6.c 2004-04-18 22:25:25.089016368 -0700 @@ -278,9 +278,8 @@ int esp6_input(struct xfrm_state *x, str padlen = nexthdr[0]; if (padlen+2 >= elen) { - if (net_ratelimit()) { - printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen); - } + LIMIT_NETDEBUG( + printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen)); ret = -EINVAL; goto out; } --- linux-2.6.6-rc1/net/ipv6/exthdrs.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/ipv6/exthdrs.c 2004-04-18 22:25:25.090016216 -0700 @@ -159,6 +159,7 @@ static int ipv6_destopt_rcv(struct sk_bu if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { + IP6_INC_STATS_BH(Ip6InHdrErrors); kfree_skb(skb); return -1; } @@ -171,6 +172,7 @@ static int ipv6_destopt_rcv(struct sk_bu return 1; } + IP6_INC_STATS_BH(Ip6InHdrErrors); return -1; } @@ -234,6 +236,7 @@ static int ipv6_rthdr_rcv(struct sk_buff if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) || skb->pkt_type != PACKET_HOST) { + IP6_INC_STATS_BH(Ip6InAddrErrors); kfree_skb(skb); return -1; } @@ -249,11 +252,13 @@ looped_back: } if (hdr->type != IPV6_SRCRT_TYPE_0) { + IP6_INC_STATS_BH(Ip6InHdrErrors); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); return -1; } if (hdr->hdrlen & 0x01) { + IP6_INC_STATS_BH(Ip6InHdrErrors); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); return -1; } @@ -266,6 +271,7 @@ looped_back: n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { + IP6_INC_STATS_BH(Ip6InHdrErrors); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw); return -1; } @@ -276,8 +282,11 @@ looped_back: if (skb_cloned(skb)) { struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); kfree_skb(skb); - if (skb2 == NULL) + /* the copy is a forwarded packet */ + if (skb2 == NULL) { + IP6_INC_STATS_BH(Ip6OutDiscards); return -1; + } *skbp = skb = skb2; opt = (struct inet6_skb_parm *)skb2->cb; hdr = (struct ipv6_rt_hdr *) skb2->h.raw; @@ -293,6 +302,7 @@ looped_back: addr += i - 1; if (ipv6_addr_is_multicast(addr)) { + IP6_INC_STATS_BH(Ip6InAddrErrors); kfree_skb(skb); return -1; } @@ -309,6 +319,7 @@ looped_back: } if (skb->dst->dev->flags&IFF_LOOPBACK) { if (skb->nh.ipv6h->hop_limit <= 1) { + IP6_INC_STATS_BH(Ip6InHdrErrors); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); kfree_skb(skb); @@ -410,8 +421,8 @@ static int ipv6_hop_ra(struct sk_buff *s ((struct inet6_skb_parm*)skb->cb)->ra = optoff; return 1; } - if (net_ratelimit()) - printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1]); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1])); kfree_skb(skb); return 0; } @@ -423,17 +434,20 @@ static int ipv6_hop_jumbo(struct sk_buff u32 pkt_len; if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { - if (net_ratelimit()) - printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1])); + IP6_INC_STATS_BH(Ip6InHdrErrors); goto drop; } pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2)); if (pkt_len <= IPV6_MAXPLEN) { + IP6_INC_STATS_BH(Ip6InHdrErrors); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return 0; } if (skb->nh.ipv6h->payload_len) { + IP6_INC_STATS_BH(Ip6InHdrErrors); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return 0; } --- linux-2.6.6-rc1/net/ipv6/icmp.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv6/icmp.c 2004-04-18 22:25:25.091016064 -0700 @@ -329,8 +329,8 @@ void icmpv6_send(struct sk_buff *skb, in * for now we don't know that. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { - if (net_ratelimit()) - printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n")); return; } @@ -338,8 +338,8 @@ void icmpv6_send(struct sk_buff *skb, in * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { - if (net_ratelimit()) - printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n"); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n")); return; } @@ -385,8 +385,8 @@ void icmpv6_send(struct sk_buff *skb, in len = skb->len - msg.offset; len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr)); if (len < 0) { - if (net_ratelimit()) - printk(KERN_DEBUG "icmp: len problem\n"); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "icmp: len problem\n")); goto out_dst_release; } @@ -570,17 +570,17 @@ static int icmpv6_rcv(struct sk_buff **p skb->ip_summed = CHECKSUM_UNNECESSARY; if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb->csum)) { - if (net_ratelimit()) - printk(KERN_DEBUG "ICMPv6 hw checksum failed\n"); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "ICMPv6 hw checksum failed\n")); skb->ip_summed = CHECKSUM_NONE; } } if (skb->ip_summed == CHECKSUM_NONE) { if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb_checksum(skb, 0, skb->len, 0))) { - if (net_ratelimit()) + LIMIT_NETDEBUG( printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", - NIP6(*saddr), NIP6(*daddr)); + NIP6(*saddr), NIP6(*daddr))); goto discard_it; } } @@ -646,11 +646,12 @@ static int icmpv6_rcv(struct sk_buff **p break; case ICMPV6_MGM_REDUCTION: + case ICMPV6_MLD2_REPORT: break; default: - if (net_ratelimit()) - printk(KERN_DEBUG "icmpv6: msg of unknown type\n"); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "icmpv6: msg of unknown type\n")); /* informational */ if (type & ICMPV6_INFOMSG_MASK) --- linux-2.6.6-rc1/net/ipv6/ip6_input.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv6/ip6_input.c 2004-04-18 22:25:25.091016064 -0700 @@ -79,8 +79,10 @@ int ipv6_rcv(struct sk_buff *skb, struct if (skb->len < sizeof(struct ipv6hdr)) goto err; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) { + IP6_INC_STATS_BH(Ip6InHdrErrors); goto drop; + } hdr = skb->nh.ipv6h; @@ -94,8 +96,10 @@ int ipv6_rcv(struct sk_buff *skb, struct if (pkt_len + sizeof(struct ipv6hdr) > skb->len) goto truncated; if (pkt_len + sizeof(struct ipv6hdr) < skb->len) { - if (__pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr))) + if (__pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr))){ + IP6_INC_STATS_BH(Ip6InHdrErrors); goto drop; + } hdr = skb->nh.ipv6h; if (skb->ip_summed == CHECKSUM_HW) skb->ip_summed = CHECKSUM_NONE; @@ -206,6 +210,7 @@ resubmit: return 0; discard: + IP6_INC_STATS_BH(Ip6InDiscards); rcu_read_unlock(); kfree_skb(skb); return 0; --- linux-2.6.6-rc1/net/ipv6/ip6_output.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/ipv6/ip6_output.c 2004-04-18 22:25:25.093015760 -0700 @@ -87,6 +87,7 @@ static inline int ip6_output_finish(stru } else if (dst->neighbour) return dst->neighbour->output(skb); + IP6_INC_STATS_BH(Ip6OutNoRoutes); kfree_skb(skb); return -EINVAL; @@ -131,6 +132,7 @@ int ip6_output2(struct sk_buff *skb) ip6_dev_loopback_xmit); if (skb->nh.ipv6h->hop_limit == 0) { + IP6_INC_STATS(Ip6OutDiscards); kfree_skb(skb); return 0; } @@ -167,8 +169,9 @@ int ip6_route_me_harder(struct sk_buff * dst = ip6_route_output(skb->sk, &fl); if (dst->error) { - if (net_ratelimit()) - printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); + IP6_INC_STATS(Ip6OutNoRoutes); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n")); dst_release(dst); return -EINVAL; } @@ -224,8 +227,10 @@ int ip6_xmit(struct sock *sk, struct sk_ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); kfree_skb(skb); skb = skb2; - if (skb == NULL) + if (skb == NULL) { + IP6_INC_STATS(Ip6OutDiscards); return -ENOBUFS; + } if (sk) skb_set_owner_w(skb, sk); } @@ -265,6 +270,7 @@ int ip6_xmit(struct sock *sk, struct sk_ printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP6_INC_STATS(Ip6FragFails); kfree_skb(skb); return -EMSGSIZE; } @@ -345,8 +351,10 @@ int ip6_forward(struct sk_buff *skb) if (ipv6_devconf.forwarding == 0) goto error; - if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) + if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { + IP6_INC_STATS(Ip6InDiscards); goto drop; + } skb->ip_summed = CHECKSUM_NONE; @@ -382,8 +390,10 @@ int ip6_forward(struct sk_buff *skb) return -ETIMEDOUT; } - if (!xfrm6_route_forward(skb)) + if (!xfrm6_route_forward(skb)) { + IP6_INC_STATS(Ip6InDiscards); goto drop; + } /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. @@ -420,12 +430,15 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev); IP6_INC_STATS_BH(Ip6InTooBigErrors); + IP6_INC_STATS_BH(Ip6FragFails); kfree_skb(skb); return -EMSGSIZE; } - if (skb_cow(skb, dst->dev->hard_header_len)) + if (skb_cow(skb, dst->dev->hard_header_len)) { + IP6_INC_STATS(Ip6OutDiscards); goto drop; + } hdr = skb->nh.ipv6h; @@ -648,6 +661,7 @@ slow_path: if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n")); + IP6_INC_STATS(Ip6FragFails); err = -ENOMEM; goto fail; } @@ -1062,6 +1076,7 @@ int ip6_push_pending_frames(struct sock ipv6_addr_copy(&hdr->daddr, final_dst); skb->dst = dst_clone(&rt->u.dst); + IP6_INC_STATS(Ip6OutRequests); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output); if (err) { if (err > 0) @@ -1092,8 +1107,10 @@ void ip6_flush_pending_frames(struct soc struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; - while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) + while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { + IP6_INC_STATS(Ip6OutDiscards); kfree_skb(skb); + } inet->cork.flags &= ~IPCORK_OPT; --- linux-2.6.6-rc1/net/ipv6/mcast.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv6/mcast.c 2004-04-18 22:25:25.094015608 -0700 @@ -1317,6 +1317,7 @@ static void mld_sendpack(struct sk_buff struct inet6_dev *idev = in6_dev_get(skb->dev); int err; + IP6_INC_STATS(Ip6OutRequests); payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h - sizeof(struct ipv6hdr); mldlen = skb->tail - skb->h.raw; @@ -1326,8 +1327,12 @@ static void mld_sendpack(struct sk_buff IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0)); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev, dev_queue_xmit); - if (!err) + if (!err) { ICMP6_INC_STATS(idev,Icmp6OutMsgs); + IP6_INC_STATS(Ip6OutMcastPkts); + } else + IP6_INC_STATS(Ip6OutDiscards); + if (likely(idev != NULL)) in6_dev_put(idev); } @@ -1608,6 +1613,7 @@ static void igmp6_send(struct in6_addr * IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; + IP6_INC_STATS(Ip6OutRequests); snd_addr = addr; if (type == ICMPV6_MGM_REDUCTION) { snd_addr = &all_routers; @@ -1620,8 +1626,10 @@ static void igmp6_send(struct in6_addr * skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err); - if (skb == NULL) + if (skb == NULL) { + IP6_INC_STATS(Ip6OutDiscards); return; + } skb_reserve(skb, LL_RESERVED_SPACE(dev)); if (dev->hard_header) { @@ -1664,13 +1672,16 @@ static void igmp6_send(struct in6_addr * else ICMP6_INC_STATS(idev, Icmp6OutGroupMembResponses); ICMP6_INC_STATS(idev, Icmp6OutMsgs); - } + IP6_INC_STATS(Ip6OutMcastPkts); + } else + IP6_INC_STATS(Ip6OutDiscards); if (likely(idev != NULL)) in6_dev_put(idev); return; out: + IP6_INC_STATS(Ip6OutDiscards); kfree_skb(skb); } --- linux-2.6.6-rc1/net/ipv6/ndisc.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv6/ndisc.c 2004-04-18 22:25:25.096015304 -0700 @@ -452,6 +452,7 @@ static void ndisc_send_na(struct net_dev skb->dst = dst; idev = in6_dev_get(dst->dev); + IP6_INC_STATS(Ip6OutRequests); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, Icmp6OutNeighborAdvertisements); @@ -535,6 +536,7 @@ void ndisc_send_ns(struct net_device *de /* send it! */ skb->dst = dst; idev = in6_dev_get(dst->dev); + IP6_INC_STATS(Ip6OutRequests); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, Icmp6OutNeighborSolicits); @@ -607,6 +609,7 @@ void ndisc_send_rs(struct net_device *de /* send it! */ skb->dst = dst; idev = in6_dev_get(dst->dev); + IP6_INC_STATS(Ip6OutRequests); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, Icmp6OutRouterSolicits); @@ -761,7 +764,7 @@ static void ndisc_recv_ns(struct sk_buff if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { - if (skb->stamp.tv_sec != 0 && + if (skb->stamp.tv_sec != LOCALLY_ENQUEUED && skb->pkt_type != PACKET_HOST && inc != 0 && idev->nd_parms->proxy_delay != 0) { @@ -1332,6 +1335,7 @@ void ndisc_send_redirect(struct sk_buff buff->dst = dst; idev = in6_dev_get(dst->dev); + IP6_INC_STATS(Ip6OutRequests); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { ICMP6_INC_STATS(idev, Icmp6OutRedirects); @@ -1403,6 +1407,10 @@ static int ndisc_netdev_event(struct not neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(0); break; + case NETDEV_DOWN: + neigh_ifdown(&nd_tbl, dev); + fib6_run_gc(0); + break; default: break; } --- /dev/null 2003-09-15 06:40:47.000000000 -0700 +++ 25/net/ipv6/netfilter/ip6table_raw.c 2004-04-18 22:25:25.098015000 -0700 @@ -0,0 +1,154 @@ +/* + * IPv6 raw table, a port of the IPv4 raw table to IPv6 + * + * Copyright (C) 2003 Jozsef Kadlecsik + */ +#include +#include + +#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT)) + +#if 0 +#define DEBUGP(x, args...) printk(KERN_DEBUG x, ## args) +#else +#define DEBUGP(x, args...) +#endif + +/* Standard entry. */ +struct ip6t_standard +{ + struct ip6t_entry entry; + struct ip6t_standard_target target; +}; + +struct ip6t_error_target +{ + struct ip6t_entry_target target; + char errorname[IP6T_FUNCTION_MAXNAMELEN]; +}; + +struct ip6t_error +{ + struct ip6t_entry entry; + struct ip6t_error_target target; +}; + +static struct +{ + struct ip6t_replace repl; + struct ip6t_standard entries[2]; + struct ip6t_error term; +} initial_table __initdata += { { "raw", RAW_VALID_HOOKS, 3, + sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error), + { [NF_IP6_PRE_ROUTING] 0, + [NF_IP6_LOCAL_OUT] sizeof(struct ip6t_standard) }, + { [NF_IP6_PRE_ROUTING] 0, + [NF_IP6_LOCAL_OUT] sizeof(struct ip6t_standard) }, + 0, NULL, { } }, + { + /* PRE_ROUTING */ + { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ip6t_entry), + sizeof(struct ip6t_standard), + 0, { 0, 0 }, { } }, + { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } }, + /* LOCAL_OUT */ + { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ip6t_entry), + sizeof(struct ip6t_standard), + 0, { 0, 0 }, { } }, + { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } }, + }, + /* ERROR */ + { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ip6t_entry), + sizeof(struct ip6t_error), + 0, { 0, 0 }, { } }, + { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } }, + { } }, + "ERROR" + } + } +}; + +static struct ip6t_table packet_raw = { + .name = "raw", + .table = &initial_table.repl, + .valid_hooks = RAW_VALID_HOOKS, + .lock = RW_LOCK_UNLOCKED, + .me = THIS_MODULE +}; + +/* The work comes in here from netfilter.c. */ +static unsigned int +ip6t_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL); +} + +static struct nf_hook_ops ip6t_ops[] = { + { + .hook = ip6t_hook, + .pf = PF_INET6, + .hooknum = NF_IP6_PRE_ROUTING, + .priority = NF_IP6_PRI_FIRST + }, + { + .hook = ip6t_hook, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_FIRST + }, +}; + +static int __init init(void) +{ + int ret; + + /* Register table */ + ret = ip6t_register_table(&packet_raw); + if (ret < 0) + return ret; + + /* Register hooks */ + ret = nf_register_hook(&ip6t_ops[0]); + if (ret < 0) + goto cleanup_table; + + ret = nf_register_hook(&ip6t_ops[1]); + if (ret < 0) + goto cleanup_hook0; + + return ret; + + cleanup_hook0: + nf_unregister_hook(&ip6t_ops[0]); + cleanup_table: + ip6t_unregister_table(&packet_raw); + + return ret; +} + +static void __exit fini(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++) + nf_unregister_hook(&ip6t_ops[i]); + + ip6t_unregister_table(&packet_raw); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); --- linux-2.6.6-rc1/net/ipv6/netfilter/ip6t_LOG.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv6/netfilter/ip6t_LOG.c 2004-04-18 22:25:25.098015000 -0700 @@ -18,12 +18,17 @@ #include #include #include +#include #include MODULE_AUTHOR("Jan Rekorajski "); MODULE_DESCRIPTION("IP6 tables LOG target module"); MODULE_LICENSE("GPL"); +static unsigned int nflog = 1; +MODULE_PARM(nflog, "i"); +MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); + struct in_device; #include #include @@ -265,40 +270,38 @@ static void dump_packet(const struct ip6 } } -static unsigned int -ip6t_log_target(struct sk_buff **pskb, - unsigned int hooknum, +static void +ip6t_log_packet(unsigned int hooknum, + const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - const void *targinfo, - void *userinfo) + const struct ip6t_log_info *loginfo, + const char *level_string, + const char *prefix) { - struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h; - const struct ip6t_log_info *loginfo = targinfo; - char level_string[4] = "< >"; + struct ipv6hdr *ipv6h = skb->nh.ipv6h; - level_string[1] = '0' + (loginfo->level % 8); spin_lock_bh(&log_lock); printk(level_string); printk("%sIN=%s OUT=%s ", - loginfo->prefix, + prefix == NULL ? loginfo->prefix : prefix, in ? in->name : "", out ? out->name : ""); if (in && !out) { /* MAC logging for input chain only. */ printk("MAC="); - if ((*pskb)->dev && (*pskb)->dev->hard_header_len && (*pskb)->mac.raw != (void*)ipv6h) { - if ((*pskb)->dev->type != ARPHRD_SIT){ + if (skb->dev && skb->dev->hard_header_len && skb->mac.raw != (void*)ipv6h) { + if (skb->dev->type != ARPHRD_SIT){ int i; - unsigned char *p = (*pskb)->mac.raw; - for (i = 0; i < (*pskb)->dev->hard_header_len; i++,p++) + unsigned char *p = skb->mac.raw; + for (i = 0; i < skb->dev->hard_header_len; i++,p++) printk("%02x%c", *p, - i==(*pskb)->dev->hard_header_len - 1 + i==skb->dev->hard_header_len - 1 ? ' ':':'); } else { int i; - unsigned char *p = (*pskb)->mac.raw; - if ( p - (ETH_ALEN*2+2) > (*pskb)->head ){ + unsigned char *p = skb->mac.raw; + if ( p - (ETH_ALEN*2+2) > skb->head ){ p -= (ETH_ALEN+2); for (i = 0; i < (ETH_ALEN); i++,p++) printk("%02x%s", *p, @@ -309,10 +312,10 @@ ip6t_log_target(struct sk_buff **pskb, i == ETH_ALEN-1 ? ' ' : ':'); } - if (((*pskb)->dev->addr_len == 4) && - (*pskb)->dev->hard_header_len > 20){ + if ((skb->dev->addr_len == 4) && + skb->dev->hard_header_len > 20){ printk("TUNNEL="); - p = (*pskb)->mac.raw + 12; + p = skb->mac.raw + 12; for (i = 0; i < 4; i++,p++) printk("%3d%s", *p, i == 3 ? "->" : "."); @@ -328,10 +331,41 @@ ip6t_log_target(struct sk_buff **pskb, dump_packet(loginfo, ipv6h, 1); printk("\n"); spin_unlock_bh(&log_lock); +} + +static unsigned int +ip6t_log_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ip6t_log_info *loginfo = targinfo; + char level_string[4] = "< >"; + + level_string[1] = '0' + (loginfo->level % 8); + ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); return IP6T_CONTINUE; } +static void +ip6t_logfn(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + struct ip6t_log_info loginfo = { + .level = 0, + .logflags = IP6T_LOG_MASK, + .prefix = "" + }; + + ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); +} + static int ip6t_log_checkentry(const char *tablename, const struct ip6t_entry *e, void *targinfo, @@ -360,20 +394,27 @@ static int ip6t_log_checkentry(const cha return 1; } -static struct ip6t_target ip6t_log_reg -= { { NULL, NULL }, "LOG", ip6t_log_target, ip6t_log_checkentry, NULL, - THIS_MODULE }; +static struct ip6t_target ip6t_log_reg = { + .name = "LOG", + .target = ip6t_log_target, + .checkentry = ip6t_log_checkentry, + .me = THIS_MODULE, +}; static int __init init(void) { if (ip6t_register_target(&ip6t_log_reg)) return -EINVAL; + if (nflog) + nf_log_register(PF_INET6, &ip6t_logfn); return 0; } static void __exit fini(void) { + if (nflog) + nf_log_unregister(PF_INET6, &ip6t_logfn); ip6t_unregister_target(&ip6t_log_reg); } --- linux-2.6.6-rc1/net/ipv6/netfilter/Kconfig 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipv6/netfilter/Kconfig 2004-04-18 22:25:25.096015304 -0700 @@ -218,5 +218,17 @@ config IP6_NF_TARGET_MARK To compile it as a module, choose M here. If unsure, say N. #dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES +config IP6_NF_RAW + tristate 'raw table support (required for TRACE)' + depends on IP6_NF_IPTABLES + help + This option adds a `raw' table to ip6tables. This table is the very + first in the netfilter framework and hooks in at the PREROUTING + and OUTPUT chains. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + help + endmenu --- linux-2.6.6-rc1/net/ipv6/netfilter/Makefile 2003-06-14 12:18:23.000000000 -0700 +++ 25/net/ipv6/netfilter/Makefile 2004-04-18 22:25:25.096015304 -0700 @@ -21,4 +21,5 @@ obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_ obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o +obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o --- linux-2.6.6-rc1/net/ipv6/raw.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv6/raw.c 2004-04-18 22:25:25.099014848 -0700 @@ -328,7 +328,8 @@ int rawv6_rcv(struct sock *sk, struct sk if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, skb->len, inet->num, skb->csum)) { - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "raw v6 hw csum failure.\n")); + LIMIT_NETDEBUG( + printk(KERN_DEBUG "raw v6 hw csum failure.\n")); skb->ip_summed = CHECKSUM_NONE; } } @@ -526,6 +527,7 @@ static int rawv6_send_hdrinc(struct sock if (err) goto error_fault; + IP6_INC_STATS(Ip6OutRequests); err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) --- linux-2.6.6-rc1/net/ipv6/reassembly.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/ipv6/reassembly.c 2004-04-18 22:25:25.100014696 -0700 @@ -426,6 +426,7 @@ static void ip6_frag_queue(struct frag_q ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { + IP6_INC_STATS_BH(Ip6InHdrErrors); icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw); return; } @@ -452,6 +453,7 @@ static void ip6_frag_queue(struct frag_q /* RFC2460 says always send parameter problem in * this case. -DaveM */ + IP6_INC_STATS_BH(Ip6InHdrErrors); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); return; @@ -570,6 +572,7 @@ static void ip6_frag_queue(struct frag_q return; err: + IP6_INC_STATS(Ip6ReasmFails); kfree_skb(skb); } @@ -694,10 +697,12 @@ static int ipv6_frag_rcv(struct sk_buff /* Jumbo payload inhibits frag. header */ if (hdr->payload_len==0) { + IP6_INC_STATS(Ip6InHdrErrors); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); return -1; } if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) { + IP6_INC_STATS(Ip6InHdrErrors); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); return -1; } --- linux-2.6.6-rc1/net/ipv6/tcp_ipv6.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/ipv6/tcp_ipv6.c 2004-04-18 22:25:25.101014544 -0700 @@ -1425,7 +1425,7 @@ static int tcp_v6_checksum_init(struct s if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,skb->csum)) return 0; - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "hw tcp v6 csum failed\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n")); } if (skb->len <= 76) { if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, --- linux-2.6.6-rc1/net/ipv6/udp.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/ipv6/udp.c 2004-04-18 22:25:25.102014392 -0700 @@ -634,8 +634,8 @@ static int udpv6_rcv(struct sk_buff **ps /* RFC 2460 section 8.1 says that we SHOULD log this error. Well, it is reasonable. */ - if (net_ratelimit()) - printk(KERN_INFO "IPv6: udp checksum is 0\n"); + LIMIT_NETDEBUG( + printk(KERN_INFO "IPv6: udp checksum is 0\n")); goto discard; } @@ -650,7 +650,7 @@ static int udpv6_rcv(struct sk_buff **ps if (skb->ip_summed==CHECKSUM_HW) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) { - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v6 hw csum failure.\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v6 hw csum failure.\n")); skb->ip_summed = CHECKSUM_NONE; } } @@ -913,6 +913,7 @@ do_udp_sendmsg: if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); + opt->tot_len = sizeof(*opt); err = datagram_send_ctl(msg, fl, opt, &hlimit); if (err < 0) { @@ -970,7 +971,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n")); err = -EINVAL; goto out; } --- linux-2.6.6-rc1/net/ipx/af_ipx.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/ipx/af_ipx.c 2004-04-18 22:25:25.104014088 -0700 @@ -1797,7 +1797,8 @@ static int ipx_recvmsg(struct kiocb *ioc copied); if (rc) goto out_free; - sk->sk_stamp = skb->stamp; + if (skb->stamp.tv_sec) + sk->sk_stamp = skb->stamp; msg->msg_namelen = sizeof(*sipx); @@ -1870,15 +1871,8 @@ static int ipx_ioctl(struct socket *sock break; case SIOCGSTAMP: rc = -EINVAL; - if (sk) { - rc = -ENOENT; - if (!sk->sk_stamp.tv_sec) - break; - rc = -EFAULT; - if (!copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval))) - rc = 0; - } + if (sk) + rc = sock_get_timestamp(sk, (struct timeval *)arg); break; case SIOCGIFDSTADDR: case SIOCSIFDSTADDR: --- linux-2.6.6-rc1/net/irda/af_irda.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/irda/af_irda.c 2004-04-18 22:25:25.106013784 -0700 @@ -1796,14 +1796,8 @@ static int irda_ioctl(struct socket *soc } case SIOCGSTAMP: - if (sk != NULL) { - if (!sk->sk_stamp.tv_sec) - return -ENOENT; - if (copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval))) - return -EFAULT; - return 0; - } + if (sk != NULL) + return sock_get_timestamp(sk, (struct timeval *)arg); return -EINVAL; case SIOCGIFADDR: --- linux-2.6.6-rc1/net/irda/irlan/irlan_client.c 2003-08-22 19:23:42.000000000 -0700 +++ 25/net/irda/irlan/irlan_client.c 2004-04-18 22:25:25.107013632 -0700 @@ -343,6 +343,52 @@ void irlan_client_reconnect_data_channel irttp_data_request(self->client.tsap_ctrl, skb); } + +/* + * Function print_ret_code (code) + * + * Print return code of request to peer IrLAN layer. + * + */ +static void print_ret_code(__u8 code) +{ + switch(code) { + case 0: + printk(KERN_INFO "Success\n"); + break; + case 1: + WARNING("IrLAN: Insufficient resources\n"); + break; + case 2: + WARNING("IrLAN: Invalid command format\n"); + break; + case 3: + WARNING("IrLAN: Command not supported\n"); + break; + case 4: + WARNING("IrLAN: Parameter not supported\n"); + break; + case 5: + WARNING("IrLAN: Value not supported\n"); + break; + case 6: + WARNING("IrLAN: Not open\n"); + break; + case 7: + WARNING("IrLAN: Authentication required\n"); + break; + case 8: + WARNING("IrLAN: Invalid password\n"); + break; + case 9: + WARNING("IrLAN: Protocol error\n"); + break; + case 255: + WARNING("IrLAN: Asynchronous status\n"); + break; + } +} + /* * Function irlan_client_parse_response (self, skb) * --- linux-2.6.6-rc1/net/irda/irlan/irlan_common.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/irda/irlan/irlan_common.c 2004-04-18 22:25:25.109013328 -0700 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -75,14 +76,14 @@ static int eth; /* Use "eth" or "irlan static int access = ACCESS_PEER; /* PEER, DIRECT or HOSTED */ #ifdef CONFIG_PROC_FS -static char *irlan_access[] = { +static const char *irlan_access[] = { "UNKNOWN", "DIRECT", "PEER", "HOSTED" }; -static char *irlan_media[] = { +static const char *irlan_media[] = { "UNKNOWN", "802.3", "802.5" @@ -115,12 +116,12 @@ void irlan_close_tsaps(struct irlan_cb * * Initialize IrLAN layer * */ -int __init irlan_init(void) +static int __init irlan_init(void) { struct irlan_cb *new; __u16 hints; - IRDA_DEBUG(0, "%s()\n", __FUNCTION__ ); + IRDA_DEBUG(2, "%s()\n", __FUNCTION__ ); #ifdef CONFIG_PROC_FS { struct proc_dir_entry *proc; @@ -156,7 +157,7 @@ int __init irlan_init(void) return 0; } -void __exit irlan_cleanup(void) +static void __exit irlan_cleanup(void) { struct irlan_cb *self, *next; @@ -191,9 +192,7 @@ struct irlan_cb *irlan_open(__u32 saddr, IRDA_DEBUG(2, "%s()\n", __FUNCTION__ ); /* Create network device with irlan */ - dev = alloc_netdev(sizeof(*self), - eth ? "eth%d" : "irlan%d", - irlan_eth_setup); + dev = alloc_irlandev(eth ? "eth%d" : "irlan%d"); if (!dev) return NULL; @@ -209,6 +208,19 @@ struct irlan_cb *irlan_open(__u32 saddr, /* Provider access can only be PEER, DIRECT, or HOSTED */ self->provider.access_type = access; + if (access == ACCESS_DIRECT) { + /* + * Since we are emulating an IrLAN sever we will have to + * give ourself an ethernet address! + */ + dev->dev_addr[0] = 0x40; + dev->dev_addr[1] = 0x00; + dev->dev_addr[2] = 0x00; + dev->dev_addr[3] = 0x00; + get_random_bytes(dev->dev_addr+4, 1); + get_random_bytes(dev->dev_addr+5, 1); + } + self->media = MEDIA_802_3; self->disconnect_reason = LM_USER_REQUEST; init_timer(&self->watchdog_timer); @@ -242,16 +254,14 @@ struct irlan_cb *irlan_open(__u32 saddr, */ static void __irlan_close(struct irlan_cb *self) { - struct sk_buff *skb; - IRDA_DEBUG(2, "%s()\n", __FUNCTION__ ); ASSERT_RTNL(); ASSERT(self != NULL, return;); ASSERT(self->magic == IRLAN_MAGIC, return;); - del_timer(&self->watchdog_timer); - del_timer(&self->client.kick_timer); + del_timer_sync(&self->watchdog_timer); + del_timer_sync(&self->client.kick_timer); /* Close all open connections and remove TSAPs */ irlan_close_tsaps(self); @@ -260,8 +270,7 @@ static void __irlan_close(struct irlan_c iriap_close(self->client.iriap); /* Remove frames queued on the control channel */ - while ((skb = skb_dequeue(&self->client.txq))) - dev_kfree_skb(skb); + skb_queue_purge(&self->client.txq); /* Unregister and free self via destructor */ unregister_netdevice(self->dev); @@ -303,7 +312,7 @@ void irlan_connect_indication(void *inst self->max_sdu_size = max_sdu_size; self->max_header_size = max_header_size; - IRDA_DEBUG(0, "IrLAN, We are now connected!\n"); + IRDA_DEBUG(0, "%s: We are now connected!\n", __FUNCTION__); del_timer(&self->watchdog_timer); @@ -345,7 +354,7 @@ void irlan_connect_confirm(void *instanc /* TODO: we could set the MTU depending on the max_sdu_size */ - IRDA_DEBUG(2, "IrLAN, We are now connected!\n"); + IRDA_DEBUG(0, "%s: We are now connected!\n", __FUNCTION__); del_timer(&self->watchdog_timer); /* @@ -451,7 +460,7 @@ void irlan_open_data_tsap(struct irlan_c notify.udata_indication = irlan_eth_receive; notify.connect_indication = irlan_connect_indication; notify.connect_confirm = irlan_connect_confirm; - /*notify.flow_indication = irlan_eth_flow_indication;*/ + notify.flow_indication = irlan_eth_flow_indication; notify.disconnect_indication = irlan_disconnect_indication; notify.instance = self; strlcpy(notify.name, "IrLAN data", sizeof(notify.name)); @@ -1168,51 +1177,6 @@ static int irlan_seq_open(struct inode * } #endif -/* - * Function print_ret_code (code) - * - * Print return code of request to peer IrLAN layer. - * - */ -void print_ret_code(__u8 code) -{ - switch(code) { - case 0: - printk(KERN_INFO "Success\n"); - break; - case 1: - WARNING("IrLAN: Insufficient resources\n"); - break; - case 2: - WARNING("IrLAN: Invalid command format\n"); - break; - case 3: - WARNING("IrLAN: Command not supported\n"); - break; - case 4: - WARNING("IrLAN: Parameter not supported\n"); - break; - case 5: - WARNING("IrLAN: Value not supported\n"); - break; - case 6: - WARNING("IrLAN: Not open\n"); - break; - case 7: - WARNING("IrLAN: Authentication required\n"); - break; - case 8: - WARNING("IrLAN: Invalid password\n"); - break; - case 9: - WARNING("IrLAN: Protocol error\n"); - break; - case 255: - WARNING("IrLAN: Asynchronous status\n"); - break; - } -} - MODULE_AUTHOR("Dag Brattli "); MODULE_DESCRIPTION("The Linux IrDA LAN protocol"); MODULE_LICENSE("GPL"); @@ -1222,19 +1186,6 @@ MODULE_PARM_DESC(eth, "Name devices ethX MODULE_PARM(access, "i"); MODULE_PARM_DESC(access, "Access type DIRECT=1, PEER=2, HOSTED=3"); -/* - * Function init_module (void) - * - * Initialize the IrLAN module, this function is called by the - * modprobe(1) program. - */ module_init(irlan_init); - -/* - * Function cleanup_module (void) - * - * Remove the IrLAN module, this function is called by the rmmod(1) - * program - */ module_exit(irlan_cleanup); --- linux-2.6.6-rc1/net/irda/irlan/irlan_eth.c 2004-03-10 20:41:31.000000000 -0800 +++ 25/net/irda/irlan/irlan_eth.c 2004-04-18 22:25:25.110013176 -0700 @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -41,20 +40,20 @@ #include #include +static int irlan_eth_open(struct net_device *dev); +static int irlan_eth_close(struct net_device *dev); +static int irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); +static void irlan_eth_set_multicast_list( struct net_device *dev); +static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); + /* - * Function irlan_eth_init (dev) + * Function irlan_eth_setup (dev) * * The network device initialization function. * */ -void irlan_eth_setup(struct net_device *dev) +static void irlan_eth_setup(struct net_device *dev) { - struct irlan_cb *self; - - IRDA_DEBUG(2, "%s()\n", __FUNCTION__ ); - - self = (struct irlan_cb *) dev->priv; - dev->open = irlan_eth_open; dev->stop = irlan_eth_close; dev->hard_start_xmit = irlan_eth_xmit; @@ -71,20 +70,30 @@ void irlan_eth_setup(struct net_device * * Queueing here as well can introduce some strange latency * problems, which we will avoid by setting the queue size to 0. */ - dev->tx_queue_len = 0; + /* + * The bugs in IrTTP and IrLAN that created this latency issue + * have now been fixed, and we can propagate flow control properly + * to the network layer. However, this requires a minimal queue of + * packets for the device. + * Without flow control, the Tx Queue is 14 (ttp) + 0 (dev) = 14 + * With flow control, the Tx Queue is 7 (ttp) + 4 (dev) = 11 + * See irlan_eth_flow_indication()... + * Note : this number was randomly selected and would need to + * be adjusted. + * Jean II */ + dev->tx_queue_len = 4; +} - if (self->provider.access_type == ACCESS_DIRECT) { - /* - * Since we are emulating an IrLAN sever we will have to - * give ourself an ethernet address! - */ - dev->dev_addr[0] = 0x40; - dev->dev_addr[1] = 0x00; - dev->dev_addr[2] = 0x00; - dev->dev_addr[3] = 0x00; - get_random_bytes(dev->dev_addr+4, 1); - get_random_bytes(dev->dev_addr+5, 1); - } +/* + * Function alloc_irlandev + * + * Allocate network device and control block + * + */ +struct net_device *alloc_irlandev(const char *name) +{ + return alloc_netdev(sizeof(struct irlan_cb), name, + irlan_eth_setup); } /* @@ -93,18 +102,12 @@ void irlan_eth_setup(struct net_device * * Network device has been opened by user * */ -int irlan_eth_open(struct net_device *dev) +static int irlan_eth_open(struct net_device *dev) { - struct irlan_cb *self; + struct irlan_cb *self = netdev_priv(dev); IRDA_DEBUG(2, "%s()\n", __FUNCTION__ ); - ASSERT(dev != NULL, return -1;); - - self = (struct irlan_cb *) dev->priv; - - ASSERT(self != NULL, return -1;); - /* Ready to play! */ netif_stop_queue(dev); /* Wait until data link is ready */ @@ -112,10 +115,10 @@ int irlan_eth_open(struct net_device *de self->disconnect_reason = 0; irlan_client_wakeup(self, self->saddr, self->daddr); - /* Make sure we have a hardware address before we return, so DHCP clients gets happy */ - interruptible_sleep_on(&self->open_wait); - - return 0; + /* Make sure we have a hardware address before we return, + so DHCP clients gets happy */ + return wait_event_interruptible(self->open_wait, + !self->tsap_data->connected); } /* @@ -126,10 +129,9 @@ int irlan_eth_open(struct net_device *de * close timer, so that the instance will be removed if we are unable * to discover the remote device after the disconnect. */ -int irlan_eth_close(struct net_device *dev) +static int irlan_eth_close(struct net_device *dev) { - struct irlan_cb *self = (struct irlan_cb *) dev->priv; - struct sk_buff *skb; + struct irlan_cb *self = netdev_priv(dev); IRDA_DEBUG(2, "%s()\n", __FUNCTION__ ); @@ -143,8 +145,7 @@ int irlan_eth_close(struct net_device *d irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL); /* Remove frames queued on the control channel */ - while ((skb = skb_dequeue(&self->client.txq))) - dev_kfree_skb(skb); + skb_queue_purge(&self->client.txq); self->client.tx_busy = 0; @@ -157,16 +158,11 @@ int irlan_eth_close(struct net_device *d * Transmits ethernet frames over IrDA link. * */ -int irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev) +static int irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev) { - struct irlan_cb *self; + struct irlan_cb *self = netdev_priv(dev); int ret; - self = (struct irlan_cb *) dev->priv; - - ASSERT(self != NULL, return 0;); - ASSERT(self->magic == IRLAN_MAGIC, return 0;); - /* skb headroom large enough to contain all IrDA-headers? */ if ((skb_headroom(skb) < self->max_header_size) || (skb_shared(skb))) { struct sk_buff *new_skb = @@ -220,9 +216,7 @@ int irlan_eth_xmit(struct sk_buff *skb, */ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) { - struct irlan_cb *self; - - self = (struct irlan_cb *) instance; + struct irlan_cb *self = instance; if (skb == NULL) { ++self->stats.rx_dropped; @@ -251,6 +245,14 @@ int irlan_eth_receive(void *instance, vo * * Do flow control between IP/Ethernet and IrLAN/IrTTP. This is done by * controlling the queue stop/start. + * + * The IrDA link layer has the advantage to have flow control, and + * IrTTP now properly handles that. Flow controlling the higher layers + * prevent us to drop Tx packets in here (up to 15% for a TCP socket, + * more for UDP socket). + * Also, this allow us to reduce the overall transmit queue, which means + * less latency in case of mixed traffic. + * Jean II */ void irlan_eth_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) { @@ -266,37 +268,25 @@ void irlan_eth_flow_indication(void *ins ASSERT(dev != NULL, return;); + IRDA_DEBUG(0, "%s() : flow %s ; running %d\n", __FUNCTION__, + flow == FLOW_STOP ? "FLOW_STOP" : "FLOW_START", + netif_running(dev)); + switch (flow) { case FLOW_STOP: + /* IrTTP is full, stop higher layers */ netif_stop_queue(dev); break; case FLOW_START: default: /* Tell upper layers that its time to transmit frames again */ /* Schedule network layer */ - netif_start_queue(dev); + netif_wake_queue(dev); break; } } /* - * Function irlan_eth_rebuild_header (buff, dev, dest, skb) - * - * If we don't want to use ARP. Currently not used!! - * - */ -void irlan_eth_rebuild_header(void *buff, struct net_device *dev, - unsigned long dest, struct sk_buff *skb) -{ - struct ethhdr *eth = (struct ethhdr *) buff; - - memcpy(eth->h_source, dev->dev_addr, dev->addr_len); - memcpy(eth->h_dest, dev->dev_addr, dev->addr_len); - - /* return 0; */ -} - -/* * Function irlan_etc_send_gratuitous_arp (dev) * * Send gratuitous ARP to announce that we have changed @@ -336,17 +326,12 @@ void irlan_eth_send_gratuitous_arp(struc * */ #define HW_MAX_ADDRS 4 /* Must query to get it! */ -void irlan_eth_set_multicast_list(struct net_device *dev) +static void irlan_eth_set_multicast_list(struct net_device *dev) { - struct irlan_cb *self; - - self = dev->priv; + struct irlan_cb *self = netdev_priv(dev); IRDA_DEBUG(2, "%s()\n", __FUNCTION__ ); - ASSERT(self != NULL, return;); - ASSERT(self->magic == IRLAN_MAGIC, return;); - /* Check if data channel has been connected yet */ if (self->client.state != IRLAN_DATA) { IRDA_DEBUG(1, "%s(), delaying!\n", __FUNCTION__ ); @@ -388,12 +373,9 @@ void irlan_eth_set_multicast_list(struct * Get the current statistics for this device * */ -struct net_device_stats *irlan_eth_get_stats(struct net_device *dev) +static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev) { - struct irlan_cb *self = (struct irlan_cb *) dev->priv; - - ASSERT(self != NULL, return NULL;); - ASSERT(self->magic == IRLAN_MAGIC, return NULL;); + struct irlan_cb *self = netdev_priv(dev); return &self->stats; } --- linux-2.6.6-rc1/net/irda/irlan/irlan_filter.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/irda/irlan/irlan_filter.c 2004-04-18 22:25:25.111013024 -0700 @@ -29,12 +29,12 @@ #include /* - * Function handle_filter_request (self, skb) + * Function irlan_filter_request (self, skb) * * Handle filter request from client peer device * */ -void handle_filter_request(struct irlan_cb *self, struct sk_buff *skb) +void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb) { ASSERT(self != NULL, return;); ASSERT(self->magic == IRLAN_MAGIC, return;); --- linux-2.6.6-rc1/net/irda/irlan/irlan_provider.c 2003-06-14 12:18:07.000000000 -0700 +++ 25/net/irda/irlan/irlan_provider.c 2004-04-18 22:25:25.111013024 -0700 @@ -358,7 +358,7 @@ void irlan_provider_send_reply(struct ir 12); break; case CMD_FILTER_OPERATION: - handle_filter_request(self, skb); + irlan_filter_request(self, skb); break; default: IRDA_DEBUG(2, "%s(), Unknown command!\n", __FUNCTION__ ); --- linux-2.6.6-rc1/net/irda/irlap_event.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/irda/irlap_event.c 2004-04-18 22:25:25.113012720 -0700 @@ -2236,6 +2236,14 @@ static int irlap_state_sclose(struct irl irlap_disconnect_indication(self, LAP_DISC_INDICATION); break; case RECV_DM_RSP: + /* IrLAP-1.1 p.82: in SCLOSE, S and I type RSP frames + * shall take us down into default NDM state, like DM_RSP + */ + case RECV_RR_RSP: + case RECV_RNR_RSP: + case RECV_REJ_RSP: + case RECV_SREJ_RSP: + case RECV_I_RSP: /* Always switch state before calling upper layers */ irlap_next_state(self, LAP_NDM); @@ -2253,6 +2261,17 @@ static int irlap_state_sclose(struct irl irlap_disconnect_indication(self, LAP_DISC_INDICATION); break; default: + /* IrLAP-1.1 p.82: in SCLOSE, basically any received frame + * with pf=1 shall restart the wd-timer and resend the rd:rsp + */ + if (info != NULL && info->pf) { + del_timer(&self->wd_timer); + irlap_wait_min_turn_around(self, &self->qos_tx); + irlap_send_rd_frame(self); + irlap_start_wd_timer(self, self->wd_timeout); + break; /* stay in SCLOSE */ + } + IRDA_DEBUG(1, "%s(), Unknown event %d, (%s)\n", __FUNCTION__, event, irlap_event[event]); --- linux-2.6.6-rc1/net/irda/Kconfig 2003-10-08 15:07:10.000000000 -0700 +++ 25/net/irda/Kconfig 2004-04-18 22:25:25.104014088 -0700 @@ -2,11 +2,9 @@ # IrDA protocol configuration # -menu "IrDA (infrared) support" +menuconfig IRDA depends on NET - -config IRDA - tristate "IrDA subsystem support" + tristate "IrDA (infrared) subsystem support" ---help--- Say Y here if you want to build support for the IrDA (TM) protocols. The Infrared Data Associations (tm) specifies standards for wireless @@ -95,5 +93,3 @@ config IRDA_DEBUG source "drivers/net/irda/Kconfig" -endmenu - --- linux-2.6.6-rc1/net/Kconfig 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/Kconfig 2004-04-18 22:25:30.866138112 -0700 @@ -129,59 +129,6 @@ config IPV6 source "net/ipv6/Kconfig" -config DECNET - tristate "DECnet Support" - ---help--- - The DECnet networking protocol was used in many products made by - Digital (now Compaq). It provides reliable stream and sequenced - packet communications over which run a variety of services similar - to those which run over TCP/IP. - - To find some tools to use with the kernel layer support, please - look at Patrick Caulfield's web site: - . - - More detailed documentation is available in - . - - Be sure to say Y to "/proc file system support" and "Sysctl support" - below when using DECnet, since you will need sysctl support to aid - in configuration at run time. - - The DECnet code is also available as a module ( = code which can be - inserted in and removed from the running kernel whenever you want). - The module is called decnet. - -source "net/decnet/Kconfig" - -config BRIDGE - tristate "802.1d Ethernet Bridging" - ---help--- - If you say Y here, then your Linux box will be able to act as an - Ethernet bridge, which means that the different Ethernet segments it - is connected to will appear as one Ethernet to the participants. - Several such bridges can work together to create even larger - networks of Ethernets using the IEEE 802.1 spanning tree algorithm. - As this is a standard, Linux bridges will cooperate properly with - other third party bridge products. - - In order to use the Ethernet bridge, you'll need the bridge - configuration tools; see - for location. Please read the Bridge mini-HOWTO for more - information. - - If you enable iptables support along with the bridge support then you - turn your bridge into a bridging IP firewall. - iptables will then see the IP packets being bridged, so you need to - take this into account when setting up your firewall rules. - Enabling arptables support when bridging will let arptables see - bridged ARP traffic in the arptables FORWARD chain. - - To compile this code as a module, choose M here: the module - will be called bridge. - - If unsure, say N. - menuconfig NETFILTER bool "Network packet filtering (replaces ipchains)" ---help--- @@ -345,9 +292,62 @@ config ATM_BR2684_IPFILTER large number of IP-only vcc's. Do not enable this unless you are sure you know what you are doing. +config BRIDGE + tristate "802.1d Ethernet Bridging" + ---help--- + If you say Y here, then your Linux box will be able to act as an + Ethernet bridge, which means that the different Ethernet segments it + is connected to will appear as one Ethernet to the participants. + Several such bridges can work together to create even larger + networks of Ethernets using the IEEE 802.1 spanning tree algorithm. + As this is a standard, Linux bridges will cooperate properly with + other third party bridge products. + + In order to use the Ethernet bridge, you'll need the bridge + configuration tools; see + for location. Please read the Bridge mini-HOWTO for more + information. + + If you enable iptables support along with the bridge support then you + turn your bridge into a bridging IP firewall. + iptables will then see the IP packets being bridged, so you need to + take this into account when setting up your firewall rules. + Enabling arptables support when bridging will let arptables see + bridged ARP traffic in the arptables FORWARD chain. + + To compile this code as a module, choose M here: the module + will be called bridge. + + If unsure, say N. + config VLAN_8021Q tristate "802.1Q VLAN Support" +config DECNET + tristate "DECnet Support" + ---help--- + The DECnet networking protocol was used in many products made by + Digital (now Compaq). It provides reliable stream and sequenced + packet communications over which run a variety of services similar + to those which run over TCP/IP. + + To find some tools to use with the kernel layer support, please + look at Patrick Caulfield's web site: + . + + More detailed documentation is available in + . + + Be sure to say Y to "/proc file system support" and "Sysctl support" + below when using DECnet, since you will need sysctl support to aid + in configuration at run time. + + The DECnet code is also available as a module ( = code which can be + inserted in and removed from the running kernel whenever you want). + The module is called decnet. + +source "net/decnet/Kconfig" + source "net/llc/Kconfig" config IPX @@ -650,28 +650,28 @@ endmenu endmenu -source "drivers/net/Kconfig" - -source "net/ax25/Kconfig" - -source "net/irda/Kconfig" - -source "net/bluetooth/Kconfig" +config KGDBOE + def_bool X86 && KGDB config NETPOLL - def_bool NETCONSOLE + def_bool NETCONSOLE || KGDBOE config NETPOLL_RX - bool "Netpoll support for trapping incoming packets" - default n - depends on NETPOLL + def_bool KGDBOE config NETPOLL_TRAP - bool "Netpoll traffic trapping" - default n - depends on NETPOLL + def_bool KGDBOE config NET_POLL_CONTROLLER def_bool NETPOLL +source "net/ax25/Kconfig" + +source "net/irda/Kconfig" + +source "net/bluetooth/Kconfig" + +source "drivers/net/Kconfig" + endmenu + --- linux-2.6.6-rc1/net/key/af_key.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/key/af_key.c 2004-04-18 22:25:25.115012416 -0700 @@ -2636,7 +2636,7 @@ static int pfkey_send_new_mapping(struct addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); - addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; + addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; if (x->props.family == AF_INET) { --- linux-2.6.6-rc1/net/netrom/af_netrom.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/netrom/af_netrom.c 2004-04-18 22:25:25.116012264 -0700 @@ -1200,17 +1200,11 @@ static int nr_ioctl(struct socket *sock, } case SIOCGSTAMP: - if (sk != NULL) { - if (!sk->sk_stamp.tv_sec) { - release_sock(sk); - return -ENOENT; - } - ret = copy_to_user((void *)arg, &sk->sk_stamp, sizeof(struct timeval)) ? -EFAULT : 0; - release_sock(sk); - return ret; - } + ret = -EINVAL; + if (sk != NULL) + ret = sock_get_timestamp(sk, (struct timeval *)arg); release_sock(sk); - return -EINVAL; + return ret; case SIOCGIFADDR: case SIOCSIFADDR: --- linux-2.6.6-rc1/net/packet/af_packet.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/packet/af_packet.c 2004-04-18 22:25:25.117012112 -0700 @@ -625,6 +625,10 @@ static int tpacket_rcv(struct sk_buff *s h->tp_snaplen = snaplen; h->tp_mac = macoff; h->tp_net = netoff; + if (skb->stamp.tv_sec == 0) { + do_gettimeofday(&skb->stamp); + sock_enable_timestamp(sk); + } h->tp_sec = skb->stamp.tv_sec; h->tp_usec = skb->stamp.tv_usec; @@ -1461,13 +1465,8 @@ static int packet_ioctl(struct socket *s return put_user(amount, (int *)arg); } case SIOCGSTAMP: - if (!sk->sk_stamp.tv_sec) - return -ENOENT; - if (copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval))) - return -EFAULT; - break; - + return sock_get_timestamp(sk, (struct timeval *)arg); + #ifdef CONFIG_INET case SIOCADDRT: case SIOCDELRT: --- linux-2.6.6-rc1/net/rose/af_rose.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/rose/af_rose.c 2004-04-18 22:25:25.118011960 -0700 @@ -1269,12 +1269,8 @@ static int rose_ioctl(struct socket *soc } case SIOCGSTAMP: - if (sk != NULL) { - if (!sk->sk_stamp.tv_sec) - return -ENOENT; - return copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval)) ? -EFAULT : 0; - } + if (sk != NULL) + return sock_get_timestamp(sk, (struct timeval *)arg); return -EINVAL; case SIOCGIFADDR: --- linux-2.6.6-rc1/net/rxrpc/transport.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/rxrpc/transport.c 2004-04-18 22:25:25.119011808 -0700 @@ -341,6 +341,11 @@ static int rxrpc_incoming_msg(struct rxr msg->trans = trans; msg->state = RXRPC_MSG_RECEIVED; msg->stamp = pkt->stamp; + if (msg->stamp.tv_sec == 0) { + do_gettimeofday(&msg->stamp); + if (pkt->sk) + sock_enable_timestamp(pkt->sk); + } msg->seq = ntohl(msg->hdr.seq); /* attach the packet */ --- linux-2.6.6-rc1/net/sched/sch_dsmark.c 2004-03-10 20:41:32.000000000 -0800 +++ 25/net/sched/sch_dsmark.c 2004-04-18 22:25:25.120011656 -0700 @@ -326,7 +326,8 @@ int dsmark_init(struct Qdisc *sch,struct __u16 tmp; DPRINTK("dsmark_init(sch %p,[qdisc %p],opt %p)\n",sch,p,opt); - if (rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 || + if (!opt || + rtattr_parse(tb,TCA_DSMARK_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0 || !tb[TCA_DSMARK_INDICES-1] || RTA_PAYLOAD(tb[TCA_DSMARK_INDICES-1]) < sizeof(__u16)) return -EINVAL; --- linux-2.6.6-rc1/net/sctp/associola.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/sctp/associola.c 2004-04-18 22:25:25.121011504 -0700 @@ -1,5 +1,5 @@ /* SCTP kernel reference Implementation - * (C) Copyright IBM Corp. 2001, 2003 + * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001 Intel Corp. @@ -276,7 +276,7 @@ struct sctp_association *sctp_associatio asoc->need_ecne = 0; - asoc->eyecatcher = SCTP_ASSOC_EYECATCHER; + asoc->assoc_id = (sctp_assoc_t)-1; /* Assume that peer would support both address types unless we are * told otherwise. @@ -360,8 +360,6 @@ void sctp_association_free(struct sctp_a sctp_transport_free(transport); } - asoc->eyecatcher = 0; - /* Free any cached ASCONF_ACK chunk. */ if (asoc->addip_last_asconf_ack) sctp_chunk_free(asoc->addip_last_asconf_ack); @@ -381,6 +379,12 @@ static void sctp_association_destroy(str sctp_endpoint_put(asoc->ep); sock_put(asoc->base.sk); + if ((int)asoc->assoc_id != -1) { + spin_lock_bh(&sctp_assocs_id_lock); + idr_remove(&sctp_assocs_id, (int)asoc->assoc_id); + spin_unlock_bh(&sctp_assocs_id_lock); + } + if (asoc->base.malloced) { kfree(asoc); SCTP_DBG_OBJCNT_DEC(assoc); @@ -856,26 +860,6 @@ out: return transport; } -/* Is this a live association structure. */ -int sctp_assoc_valid(struct sock *sk, struct sctp_association *asoc) -{ - - /* First, verify that this is a kernel address. */ - if (!sctp_is_valid_kaddr((unsigned long) asoc)) - return 0; - - /* Verify that this _is_ an sctp_association - * data structure and if so, that the socket matches. - */ - if (SCTP_ASSOC_EYECATCHER != asoc->eyecatcher) - return 0; - if (asoc->base.sk != sk) - return 0; - - /* The association is valid. */ - return 1; -} - /* Do delayed input processing. This is scheduled by sctp_rcv(). */ static void sctp_assoc_bh_rcv(struct sctp_association *asoc) { @@ -891,6 +875,7 @@ static void sctp_assoc_bh_rcv(struct sct sk = asoc->base.sk; inqueue = &asoc->base.inqueue; + sctp_association_hold(asoc); while (NULL != (chunk = sctp_inq_pop(inqueue))) { state = asoc->state; subtype = chunk->chunk_hdr->type; @@ -913,14 +898,14 @@ static void sctp_assoc_bh_rcv(struct sct /* Check to see if the association is freed in response to * the incoming chunk. If so, get out of the while loop. */ - if (!sctp_assoc_valid(sk, asoc)) + if (asoc->base.dead) break; /* If there is an error on chunk, discard this packet. */ if (error && chunk) chunk->pdiscard = 1; } - + sctp_association_put(asoc); } /* This routine moves an association from its old sk to a new sk. */ --- linux-2.6.6-rc1/net/sctp/input.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/sctp/input.c 2004-04-18 22:25:25.122011352 -0700 @@ -175,6 +175,12 @@ int sctp_rcv(struct sk_buff *skb) rcvr = asoc ? &asoc->base : &ep->base; sk = rcvr->sk; + /* SCTP seems to always need a timestamp right now (FIXME) */ + if (skb->stamp.tv_sec == 0) { + do_gettimeofday(&skb->stamp); + sock_enable_timestamp(sk); + } + if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family)) goto discard_release; --- linux-2.6.6-rc1/net/sctp/ipv6.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/net/sctp/ipv6.c 2004-04-18 22:25:25.122011352 -0700 @@ -1,7 +1,7 @@ /* SCTP kernel reference Implementation + * (C) Copyright IBM Corp. 2002, 2004 * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll - * Copyright (c) 2002-2003 International Business Machines, Corp. * Copyright (c) 2002-2003 Intel Corp. * * This file is part of the SCTP kernel reference Implementation @@ -698,7 +698,7 @@ static void sctp_inet6_event_msgname(str union sctp_addr *addr; struct sctp_association *asoc; - asoc = event->sndrcvinfo.sinfo_assoc_id; + asoc = event->asoc; sctp_inet6_msgname(msgname, addrlen); sin6 = (struct sockaddr_in6 *)msgname; sin6->sin6_port = htons(asoc->peer.port); --- linux-2.6.6-rc1/net/sctp/objcnt.c 2003-06-14 12:17:57.000000000 -0700 +++ 25/net/sctp/objcnt.c 2004-04-18 22:25:25.123011200 -0700 @@ -1,5 +1,5 @@ /* SCTP kernel reference Implementation - * Copyright (c) 2001 International Business Machines Corp. + * (C) Copyright IBM Corp. 2001, 2004 * * This file is part of the SCTP kernel reference Implementation * @@ -134,7 +134,7 @@ void sctp_dbg_objcnt_init(void) /* Cleanup the objcount entry in the proc filesystem. */ void sctp_dbg_objcnt_exit(void) { - remove_proc_entry("sctp_dbg_objcount", proc_net_sctp); + remove_proc_entry("sctp_dbg_objcnt", proc_net_sctp); } --- linux-2.6.6-rc1/net/sctp/protocol.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/sctp/protocol.c 2004-04-18 22:25:25.124011048 -0700 @@ -64,6 +64,9 @@ struct sctp_globals sctp_globals; struct proc_dir_entry *proc_net_sctp; DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics); +struct idr sctp_assocs_id; +spinlock_t sctp_assocs_id_lock = SPIN_LOCK_UNLOCKED; + /* This is the global socket data structure used for responding to * the Out-of-the-blue (OOTB) packets. A control sock will be created * for this socket at the initialization time. @@ -721,7 +724,7 @@ static void sctp_inet_event_msgname(stru if (msgname) { struct sctp_association *asoc; - asoc = event->sndrcvinfo.sinfo_assoc_id; + asoc = event->asoc; sctp_inet_msgname(msgname, addr_len); sin = (struct sockaddr_in *)msgname; sinfrom = &asoc->peer.primary_addr.v4; @@ -1049,6 +1052,9 @@ __init int sctp_init(void) sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; + /* Initialize handle used for association ids. */ + idr_init(&sctp_assocs_id); + /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. */ --- linux-2.6.6-rc1/net/sctp/sm_make_chunk.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/sctp/sm_make_chunk.c 2004-04-18 22:25:25.126010744 -0700 @@ -1,5 +1,5 @@ /* SCTP kernel reference Implementation - * (C) Copyright IBM Corp. 2001, 2003 + * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2002 Intel Corp. @@ -1817,10 +1817,23 @@ int sctp_process_init(struct sctp_associ /* Allocate storage for the negotiated streams if it is not a temporary * association. */ if (!asoc->temp) { + sctp_assoc_t assoc_id; + asoc->ssnmap = sctp_ssnmap_new(asoc->c.sinit_max_instreams, asoc->c.sinit_num_ostreams, gfp); if (!asoc->ssnmap) - goto nomem_ssnmap; + goto clean_up; + + do { + if (unlikely(!idr_pre_get(&sctp_assocs_id, gfp))) + goto clean_up; + spin_lock_bh(&sctp_assocs_id_lock); + assoc_id = (sctp_assoc_t)idr_get_new(&sctp_assocs_id, + (void *)asoc); + spin_unlock_bh(&sctp_assocs_id_lock); + } while (unlikely((int)assoc_id == -1)); + + asoc->assoc_id = assoc_id; } /* ADDIP Section 4.1 ASCONF Chunk Procedures @@ -1836,7 +1849,6 @@ int sctp_process_init(struct sctp_associ asoc->peer.addip_serial = asoc->peer.i.initial_tsn - 1; return 1; -nomem_ssnmap: clean_up: /* Release the transport structures. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { --- linux-2.6.6-rc1/net/sctp/socket.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/sctp/socket.c 2004-04-18 22:25:25.129010288 -0700 @@ -135,8 +135,14 @@ struct sctp_association *sctp_id2assoc(s } /* Otherwise this is a UDP-style socket. */ - asoc = (struct sctp_association *)id; - if (!sctp_assoc_valid(sk, asoc)) + if (!id || (id == (sctp_assoc_t)-1)) + return NULL; + + spin_lock_bh(&sctp_assocs_id_lock); + asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id); + spin_unlock_bh(&sctp_assocs_id_lock); + + if (!asoc || (asoc->base.sk != sk) || asoc->base.dead) return NULL; return asoc; @@ -1010,7 +1016,7 @@ SCTP_STATIC int sctp_sendmsg(struct kioc struct list_head *pos; int msg_flags = msg->msg_flags; - SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %u)\n", + SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %zu)\n", sk, msg, msg_len); err = 0; @@ -1066,7 +1072,7 @@ SCTP_STATIC int sctp_sendmsg(struct kioc associd = sinfo->sinfo_assoc_id; } - SCTP_DEBUG_PRINTK("msg_len: %u, sinfo_flags: 0x%x\n", + SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n", msg_len, sinfo_flags); /* MSG_EOF or MSG_ABORT cannot be set on a TCP-style socket. */ @@ -1432,7 +1438,7 @@ SCTP_STATIC int sctp_recvmsg(struct kioc int err = 0; int skb_len; - SCTP_DEBUG_PRINTK("sctp_recvmsg(%s: %p, %s: %p, %s: %d, %s: %d, %s: " + SCTP_DEBUG_PRINTK("sctp_recvmsg(%s: %p, %s: %p, %s: %zd, %s: %d, %s: " "0x%x, %s: %p)\n", "sk", sk, "msghdr", msg, "len", len, "knoblauch", noblock, "flags", flags, "addr_len", addr_len); @@ -1498,8 +1504,7 @@ SCTP_STATIC int sctp_recvmsg(struct kioc * rwnd by that amount. If all the data in the skb is read, * rwnd is updated when the event is freed. */ - sctp_assoc_rwnd_increase(event->sndrcvinfo.sinfo_assoc_id, - copied); + sctp_assoc_rwnd_increase(event->asoc, copied); goto out; } else if ((event->msg_flags & MSG_NOTIFICATION) || (event->msg_flags & MSG_EOR)) @@ -4233,7 +4238,7 @@ static int sctp_wait_for_sndbuf(struct s long current_timeo = *timeo_p; DEFINE_WAIT(wait); - SCTP_DEBUG_PRINTK("wait_for_sndbuf: asoc=%p, timeo=%ld, msg_len=%u\n", + SCTP_DEBUG_PRINTK("wait_for_sndbuf: asoc=%p, timeo=%ld, msg_len=%zu\n", asoc, (long)(*timeo_p), msg_len); /* Increment the association's refcnt. */ @@ -4477,7 +4482,7 @@ static void sctp_sock_migrate(struct soc */ sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { event = sctp_skb2event(skb); - if (event->sndrcvinfo.sinfo_assoc_id == assoc) { + if (event->asoc == assoc) { __skb_unlink(skb, skb->list); __skb_queue_tail(&newsk->sk_receive_queue, skb); } @@ -4506,7 +4511,7 @@ static void sctp_sock_migrate(struct soc */ sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { event = sctp_skb2event(skb); - if (event->sndrcvinfo.sinfo_assoc_id == assoc) { + if (event->asoc == assoc) { __skb_unlink(skb, skb->list); __skb_queue_tail(queue, skb); } --- linux-2.6.6-rc1/net/sctp/ulpevent.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/sctp/ulpevent.c 2004-04-18 22:25:25.131009984 -0700 @@ -1,7 +1,7 @@ /* SCTP kernel reference Implementation + * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. - * Copyright (c) 2001 International Business Machines, Corp. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll @@ -590,8 +590,7 @@ struct sctp_ulpevent *sctp_ulpevent_make struct sctp_chunk *chunk, int gfp) { - struct sctp_ulpevent *event; - struct sctp_sndrcvinfo *info; + struct sctp_ulpevent *event = NULL; struct sk_buff *skb; size_t padding, len; @@ -624,101 +623,21 @@ struct sctp_ulpevent *sctp_ulpevent_make /* Initialize event with flags 0. */ sctp_ulpevent_init(event, 0); - event->iif = sctp_chunk_iif(chunk); - sctp_ulpevent_receive_data(event, asoc); - info = (struct sctp_sndrcvinfo *) &event->sndrcvinfo; - - /* Sockets API Extensions for SCTP - * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * sinfo_stream: 16 bits (unsigned integer) - * - * For recvmsg() the SCTP stack places the message's stream number in - * this value. - */ - info->sinfo_stream = ntohs(chunk->subh.data_hdr->stream); - - /* Sockets API Extensions for SCTP - * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * sinfo_ssn: 16 bits (unsigned integer) - * - * For recvmsg() this value contains the stream sequence number that - * the remote endpoint placed in the DATA chunk. For fragmented - * messages this is the same number for all deliveries of the message - * (if more than one recvmsg() is needed to read the message). - */ - info->sinfo_ssn = ntohs(chunk->subh.data_hdr->ssn); - - /* Sockets API Extensions for SCTP - * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * sinfo_ppid: 32 bits (unsigned integer) - * - * In recvmsg() this value is - * the same information that was passed by the upper layer in the peer - * application. Please note that byte order issues are NOT accounted - * for and this information is passed opaquely by the SCTP stack from - * one end to the other. - */ - info->sinfo_ppid = chunk->subh.data_hdr->ppid; - - /* Sockets API Extensions for SCTP - * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * sinfo_flags: 16 bits (unsigned integer) - * - * This field may contain any of the following flags and is composed of - * a bitwise OR of these values. - * - * recvmsg() flags: - * - * MSG_UNORDERED - This flag is present when the message was sent - * non-ordered. - */ + event->stream = ntohs(chunk->subh.data_hdr->stream); + event->ssn = ntohs(chunk->subh.data_hdr->ssn); + event->ppid = chunk->subh.data_hdr->ppid; if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) { - info->sinfo_flags |= MSG_UNORDERED; - - /* sinfo_cumtsn: 32 bit (unsigned integer) - * - * This field will hold the current cumulative TSN as - * known by the underlying SCTP layer. Note this field is - * ignored when sending and only valid for a receive - * operation when sinfo_flags are set to MSG_UNORDERED. - */ - info->sinfo_cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); + event->flags |= MSG_UNORDERED; + event->cumtsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); } - - /* Note: For reassembly, we need to have the fragmentation bits. - * For now, merge these into the msg_flags, since those bit - * possitions are not used. - */ + event->tsn = ntohl(chunk->subh.data_hdr->tsn); event->msg_flags |= chunk->chunk_hdr->flags; - - /* With 04 draft, tsn moves into sndrcvinfo. */ - info->sinfo_tsn = ntohl(chunk->subh.data_hdr->tsn); - - /* Context is not used on receive. */ - info->sinfo_context = 0; - - /* Sockets API Extensions for SCTP - * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) - * - * sinfo_assoc_id: sizeof (sctp_assoc_t) - * - * The association handle field, sinfo_assoc_id, holds the identifier - * for the association announced in the COMMUNICATION_UP notification. - * All notifications for a given association have the same identifier. - * Ignored for TCP-style sockets. - */ - info->sinfo_assoc_id = sctp_assoc2id(asoc); - - return event; + event->iif = sctp_chunk_iif(chunk); fail: - return NULL; + return event; } /* Create a partial delivery related event. @@ -797,11 +716,77 @@ __u16 sctp_ulpevent_get_notification_typ void sctp_ulpevent_read_sndrcvinfo(const struct sctp_ulpevent *event, struct msghdr *msghdr) { - if (!sctp_ulpevent_is_notification(event)) { - put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, - sizeof(struct sctp_sndrcvinfo), - (void *) &event->sndrcvinfo); - } + struct sctp_sndrcvinfo sinfo; + + if (sctp_ulpevent_is_notification(event)) + return; + + /* Sockets API Extensions for SCTP + * Section 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV) + * + * sinfo_stream: 16 bits (unsigned integer) + * + * For recvmsg() the SCTP stack places the message's stream number in + * this value. + */ + sinfo.sinfo_stream = event->stream; + /* sinfo_ssn: 16 bits (unsigned integer) + * + * For recvmsg() this value contains the stream sequence number that + * the remote endpoint placed in the DATA chunk. For fragmented + * messages this is the same number for all deliveries of the message + * (if more than one recvmsg() is needed to read the message). + */ + sinfo.sinfo_ssn = event->ssn; + /* sinfo_ppid: 32 bits (unsigned integer) + * + * In recvmsg() this value is + * the same information that was passed by the upper layer in the peer + * application. Please note that byte order issues are NOT accounted + * for and this information is passed opaquely by the SCTP stack from + * one end to the other. + */ + sinfo.sinfo_ppid = event->ppid; + /* sinfo_flags: 16 bits (unsigned integer) + * + * This field may contain any of the following flags and is composed of + * a bitwise OR of these values. + * + * recvmsg() flags: + * + * MSG_UNORDERED - This flag is present when the message was sent + * non-ordered. + */ + sinfo.sinfo_flags = event->flags; + /* sinfo_tsn: 32 bit (unsigned integer) + * + * For the receiving side, this field holds a TSN that was + * assigned to one of the SCTP Data Chunks. + */ + sinfo.sinfo_tsn = event->tsn; + /* sinfo_cumtsn: 32 bit (unsigned integer) + * + * This field will hold the current cumulative TSN as + * known by the underlying SCTP layer. Note this field is + * ignored when sending and only valid for a receive + * operation when sinfo_flags are set to MSG_UNORDERED. + */ + sinfo.sinfo_cumtsn = event->cumtsn; + /* sinfo_assoc_id: sizeof (sctp_assoc_t) + * + * The association handle field, sinfo_assoc_id, holds the identifier + * for the association announced in the COMMUNICATION_UP notification. + * All notifications for a given association have the same identifier. + * Ignored for one-to-one style sockets. + */ + sinfo.sinfo_assoc_id = sctp_assoc2id(event->asoc); + + /* These fields are not used while receiving. */ + sinfo.sinfo_context = 0; + sinfo.sinfo_timetolive = 0; + + put_cmsg(msghdr, IPPROTO_SCTP, SCTP_SNDRCV, + sizeof(struct sctp_sndrcvinfo), (void *)&sinfo); } /* Stub skb destructor. */ @@ -831,14 +816,14 @@ static inline void sctp_ulpevent_set_own sctp_association_hold((struct sctp_association *)asoc); skb = sctp_event2skb(event); skb->sk = asoc->base.sk; - event->sndrcvinfo.sinfo_assoc_id = sctp_assoc2id(asoc); + event->asoc = (struct sctp_association *)asoc; skb->destructor = sctp_stub_rfree; } /* A simple destructor to give up the reference to the association. */ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event) { - sctp_association_put(event->sndrcvinfo.sinfo_assoc_id); + sctp_association_put(event->asoc); } /* Do accounting for bytes received and hold a reference to the association @@ -880,8 +865,7 @@ static void sctp_ulpevent_release_data(s */ skb = sctp_event2skb(event); - sctp_assoc_rwnd_increase(event->sndrcvinfo.sinfo_assoc_id, - skb_headlen(skb)); + sctp_assoc_rwnd_increase(event->asoc, skb_headlen(skb)); /* Don't forget the fragments. */ for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) { --- linux-2.6.6-rc1/net/sctp/ulpqueue.c 2003-07-27 12:14:40.000000000 -0700 +++ 25/net/sctp/ulpqueue.c 2004-04-18 22:25:25.133009680 -0700 @@ -1,7 +1,7 @@ /* SCTP kernel reference Implementation + * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. - * Copyright (c) 2001-2003 International Business Machines, Corp. * Copyright (c) 2001 Intel Corp. * Copyright (c) 2001 Nokia, Inc. * Copyright (c) 2001 La Monte H.P. Yarroll @@ -251,7 +251,7 @@ static inline void sctp_ulpq_store_reasm struct sctp_ulpevent *cevent; __u32 tsn, ctsn; - tsn = event->sndrcvinfo.sinfo_tsn; + tsn = event->tsn; /* See if it belongs at the end. */ pos = skb_peek_tail(&ulpq->reasm); @@ -262,7 +262,7 @@ static inline void sctp_ulpq_store_reasm /* Short circuit just dropping it at the end. */ cevent = sctp_skb2event(pos); - ctsn = cevent->sndrcvinfo.sinfo_tsn; + ctsn = cevent->tsn; if (TSN_lt(ctsn, tsn)) { __skb_queue_tail(&ulpq->reasm, sctp_event2skb(event)); return; @@ -271,7 +271,7 @@ static inline void sctp_ulpq_store_reasm /* Find the right place in this list. We store them by TSN. */ skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); - ctsn = cevent->sndrcvinfo.sinfo_tsn; + ctsn = cevent->tsn; if (TSN_lt(tsn, ctsn)) break; @@ -368,7 +368,7 @@ static inline struct sctp_ulpevent *sctp */ skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); - ctsn = cevent->sndrcvinfo.sinfo_tsn; + ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: @@ -425,7 +425,7 @@ static inline struct sctp_ulpevent *sctp skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); - ctsn = cevent->sndrcvinfo.sinfo_tsn; + ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_MIDDLE_FRAG: @@ -486,7 +486,7 @@ static inline struct sctp_ulpevent *sctp /* Do not even bother unless this is the next tsn to * be delivered. */ - ctsn = event->sndrcvinfo.sinfo_tsn; + ctsn = event->tsn; ctsnap = sctp_tsnmap_get_ctsn(&ulpq->asoc->peer.tsn_map); if (TSN_lte(ctsn, ctsnap)) retval = sctp_ulpq_retrieve_partial(ulpq); @@ -517,7 +517,7 @@ static inline struct sctp_ulpevent *sctp skb_queue_walk(&ulpq->reasm, pos) { cevent = sctp_skb2event(pos); - ctsn = cevent->sndrcvinfo.sinfo_tsn; + ctsn = cevent->tsn; switch (cevent->msg_flags & SCTP_DATA_FRAG_MASK) { case SCTP_DATA_FIRST_FRAG: @@ -563,15 +563,15 @@ static inline void sctp_ulpq_retrieve_or __u16 sid, csid; __u16 ssn, cssn; - sid = event->sndrcvinfo.sinfo_stream; - ssn = event->sndrcvinfo.sinfo_ssn; + sid = event->stream; + ssn = event->ssn; in = &ulpq->asoc->ssnmap->in; /* We are holding the chunks by stream, by SSN. */ sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; - csid = cevent->sndrcvinfo.sinfo_stream; - cssn = cevent->sndrcvinfo.sinfo_ssn; + csid = cevent->stream; + cssn = cevent->ssn; /* Have we gone too far? */ if (csid > sid) @@ -609,12 +609,12 @@ static inline void sctp_ulpq_store_order return; } - sid = event->sndrcvinfo.sinfo_stream; - ssn = event->sndrcvinfo.sinfo_ssn; + sid = event->stream; + ssn = event->ssn; cevent = (struct sctp_ulpevent *) pos->cb; - csid = cevent->sndrcvinfo.sinfo_stream; - cssn = cevent->sndrcvinfo.sinfo_ssn; + csid = cevent->stream; + cssn = cevent->ssn; if (sid > csid) { __skb_queue_tail(&ulpq->lobby, sctp_event2skb(event)); return; @@ -630,8 +630,8 @@ static inline void sctp_ulpq_store_order */ skb_queue_walk(&ulpq->lobby, pos) { cevent = (struct sctp_ulpevent *) pos->cb; - csid = cevent->sndrcvinfo.sinfo_stream; - cssn = cevent->sndrcvinfo.sinfo_ssn; + csid = cevent->stream; + cssn = cevent->ssn; if (csid > sid) break; @@ -656,8 +656,8 @@ static inline struct sctp_ulpevent *sctp return event; /* Note: The stream ID must be verified before this routine. */ - sid = event->sndrcvinfo.sinfo_stream; - ssn = event->sndrcvinfo.sinfo_ssn; + sid = event->stream; + ssn = event->ssn; in = &ulpq->asoc->ssnmap->in; /* Is this the expected SSN for this stream ID? */ @@ -694,7 +694,7 @@ static __u16 sctp_ulpq_renege_order(stru while ((skb = __skb_dequeue_tail(&ulpq->lobby))) { freed += skb_headlen(skb); event = sctp_skb2event(skb); - tsn = event->sndrcvinfo.sinfo_tsn; + tsn = event->tsn; sctp_ulpevent_free(event); sctp_tsnmap_renege(tsnmap, tsn); @@ -720,7 +720,7 @@ static __u16 sctp_ulpq_renege_frags(stru while ((skb = __skb_dequeue_tail(&ulpq->reasm))) { freed += skb_headlen(skb); event = sctp_skb2event(skb); - tsn = event->sndrcvinfo.sinfo_tsn; + tsn = event->tsn; sctp_ulpevent_free(event); sctp_tsnmap_renege(tsnmap, tsn); --- linux-2.6.6-rc1/net/socket.c 2004-03-10 20:41:32.000000000 -0800 +++ 25/net/socket.c 2004-04-18 22:25:32.705858432 -0700 @@ -308,9 +308,9 @@ static void init_once(void * foo, kmem_c static int init_inodecache(void) { sock_inode_cachep = kmem_cache_create("sock_inode_cache", - sizeof(struct socket_alloc), - 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, - init_once, NULL); + sizeof(struct socket_alloc), + 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, + init_once, NULL); if (sock_inode_cachep == NULL) return -ENOMEM; return 0; --- linux-2.6.6-rc1/net/sunrpc/auth_gss/svcauth_gss.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/net/sunrpc/auth_gss/svcauth_gss.c 2004-04-18 22:25:25.134009528 -0700 @@ -895,6 +895,7 @@ svcauth_gss_accept(struct svc_rqst *rqst svc_putu32(resv, rpc_success); goto complete; case RPC_GSS_PROC_DATA: + *authp = rpc_autherr_badcred; rqstp->rq_client = find_gss_auth_domain(rsci->mechctx, gc->gc_svc); if (rqstp->rq_client == NULL) --- linux-2.6.6-rc1/net/sunrpc/svcsock.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/net/sunrpc/svcsock.c 2004-04-18 22:25:25.135009376 -0700 @@ -591,6 +591,12 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) /* possibly an icmp error */ dprintk("svc: recvfrom returned error %d\n", -err); } + if (skb->stamp.tv_sec == 0) { + skb->stamp.tv_sec = xtime.tv_sec; + skb->stamp.tv_usec = xtime.tv_nsec * 1000; + /* Don't enable netstamp, sunrpc doesn't + need that much accuracy */ + } svsk->sk_sk->sk_stamp = skb->stamp; set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ --- linux-2.6.6-rc1/net/unix/af_unix.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/net/unix/af_unix.c 2004-04-18 22:25:25.136009224 -0700 @@ -82,8 +82,6 @@ * with BSD names. */ -#undef unix /* KBUILD_MODNAME */ - #include #include #include --- linux-2.6.6-rc1/net/wanrouter/af_wanpipe.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/wanrouter/af_wanpipe.c 2004-04-18 22:25:25.138008920 -0700 @@ -1765,13 +1765,7 @@ static int wanpipe_ioctl(struct socket * switch(cmd) { case SIOCGSTAMP: - if (!sk->sk_stamp.tv_sec) - return -ENOENT; - err = -EFAULT; - if (!copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval))) - err = 0; - return err; + return sock_get_timestamp(sk, (struct timeval *)arg); case SIOC_WANPIPE_CHECK_TX: --- linux-2.6.6-rc1/net/x25/af_x25.c 2004-02-17 20:48:46.000000000 -0800 +++ 25/net/x25/af_x25.c 2004-04-18 22:25:25.139008768 -0700 @@ -1206,14 +1206,10 @@ static int x25_ioctl(struct socket *sock } case SIOCGSTAMP: - if (sk) { - rc = -ENOENT; - if (!sk->sk_stamp.tv_sec) - break; - rc = copy_to_user((void *)arg, &sk->sk_stamp, - sizeof(struct timeval)) ? -EFAULT : 0; - } rc = -EINVAL; + if (sk) + rc = sock_get_timestamp(sk, + (struct timeval *)arg); break; case SIOCGIFADDR: case SIOCSIFADDR: --- linux-2.6.6-rc1/scripts/modpost.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/scripts/modpost.c 2004-04-18 22:25:25.140008616 -0700 @@ -487,6 +487,7 @@ add_header(struct buffer *b) buf_printf(b, "\n"); buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n"); buf_printf(b, "\n"); + buf_printf(b, "#undef unix\n"); /* We have a module called "unix" */ buf_printf(b, "struct module __this_module\n"); buf_printf(b, "__attribute__((section(\".gnu.linkonce.this_module\"))) = {\n"); buf_printf(b, " .name = __stringify(KBUILD_MODNAME),\n"); --- linux-2.6.6-rc1/security/capability.c 2004-02-03 20:42:39.000000000 -0800 +++ 25/security/capability.c 2004-04-18 22:25:51.782958272 -0700 @@ -35,7 +35,7 @@ static struct security_operations capabi .netlink_send = cap_netlink_send, .netlink_recv = cap_netlink_recv, - .bprm_compute_creds = cap_bprm_compute_creds, + .bprm_apply_creds = cap_bprm_apply_creds, .bprm_set_security = cap_bprm_set_security, .bprm_secureexec = cap_bprm_secureexec, --- linux-2.6.6-rc1/security/commoncap.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/security/commoncap.c 2004-04-18 22:25:52.173898840 -0700 @@ -115,13 +115,15 @@ int cap_bprm_set_security (struct linux_ return 0; } -/* Copied from fs/exec.c */ static inline int must_not_trace_exec (struct task_struct *p) { - return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP); + return ((p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP)) + || atomic_read(&p->fs->count) > 1 + || atomic_read(&p->files->count) > 1 + || atomic_read(&p->sighand->count) > 1; } -void cap_bprm_compute_creds (struct linux_binprm *bprm) +void cap_bprm_apply_creds (struct linux_binprm *bprm) { /* Derived from fs/exec.c:compute_creds. */ kernel_cap_t new_permitted, working; @@ -132,18 +134,26 @@ void cap_bprm_compute_creds (struct linu new_permitted = cap_combine (new_permitted, working); task_lock(current); + + if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) { + current->mm->dumpable = 0; + + if (must_not_trace_exec(current) && !capable(CAP_SETUID)) { + bprm->e_uid = current->uid; + bprm->e_gid = current->gid; + } + } + + current->suid = current->euid = current->fsuid = bprm->e_uid; + current->sgid = current->egid = current->fsgid = bprm->e_gid; + if (!cap_issubset (new_permitted, current->cap_permitted)) { current->mm->dumpable = 0; - if (must_not_trace_exec (current) - || atomic_read (¤t->fs->count) > 1 - || atomic_read (¤t->files->count) > 1 - || atomic_read (¤t->sighand->count) > 1) { - if (!capable (CAP_SETPCAP)) { - new_permitted = cap_intersect (new_permitted, - current-> - cap_permitted); - } + if (must_not_trace_exec (current) && !capable (CAP_SETPCAP)) { + new_permitted = cap_intersect (new_permitted, + current-> + cap_permitted); } } @@ -315,7 +325,7 @@ int cap_vm_enough_memory(long pages) vm_acct_memory(pages); - /* + /* * Sometimes we want to use more memory than we have */ if (sysctl_overcommit_memory == 1) @@ -377,7 +387,7 @@ EXPORT_SYMBOL(cap_capget); EXPORT_SYMBOL(cap_capset_check); EXPORT_SYMBOL(cap_capset_set); EXPORT_SYMBOL(cap_bprm_set_security); -EXPORT_SYMBOL(cap_bprm_compute_creds); +EXPORT_SYMBOL(cap_bprm_apply_creds); EXPORT_SYMBOL(cap_bprm_secureexec); EXPORT_SYMBOL(cap_inode_setxattr); EXPORT_SYMBOL(cap_inode_removexattr); --- linux-2.6.6-rc1/security/dummy.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/security/dummy.c 2004-04-18 22:25:52.174898688 -0700 @@ -26,6 +26,8 @@ #include #include #include +#include +#include static int dummy_ptrace (struct task_struct *parent, struct task_struct *child) { @@ -116,7 +118,7 @@ static int dummy_vm_enough_memory(long p vm_acct_memory(pages); - /* + /* * Sometimes we want to use more memory than we have */ if (sysctl_overcommit_memory == 1) @@ -169,9 +171,30 @@ static void dummy_bprm_free_security (st return; } -static void dummy_bprm_compute_creds (struct linux_binprm *bprm) +static inline int must_not_trace_exec (struct task_struct *p) { - return; + return ((p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP)) + || atomic_read(&p->fs->count) > 1 + || atomic_read(&p->files->count) > 1 + || atomic_read(&p->sighand->count) > 1; +} + +static void dummy_bprm_apply_creds (struct linux_binprm *bprm) +{ + task_lock(current); + if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) { + current->mm->dumpable = 0; + + if (must_not_trace_exec(current) && !capable(CAP_SETUID)) { + bprm->e_uid = current->uid; + bprm->e_gid = current->gid; + } + } + + current->suid = current->euid = current->fsuid = bprm->e_uid; + current->sgid = current->egid = current->fsgid = bprm->e_gid; + + task_unlock(current); } static int dummy_bprm_set_security (struct linux_binprm *bprm) @@ -887,7 +910,7 @@ void security_fixup_ops (struct security set_to_dummy_if_null(ops, vm_enough_memory); set_to_dummy_if_null(ops, bprm_alloc_security); set_to_dummy_if_null(ops, bprm_free_security); - set_to_dummy_if_null(ops, bprm_compute_creds); + set_to_dummy_if_null(ops, bprm_apply_creds); set_to_dummy_if_null(ops, bprm_set_security); set_to_dummy_if_null(ops, bprm_check_security); set_to_dummy_if_null(ops, bprm_secureexec); --- linux-2.6.6-rc1/security/root_plug.c 2003-06-14 12:18:35.000000000 -0700 +++ 25/security/root_plug.c 2004-04-18 22:25:52.008923920 -0700 @@ -90,7 +90,7 @@ static struct security_operations rootpl .capset_set = cap_capset_set, .capable = cap_capable, - .bprm_compute_creds = cap_bprm_compute_creds, + .bprm_apply_creds = cap_bprm_apply_creds, .bprm_set_security = cap_bprm_set_security, .task_post_setuid = cap_task_post_setuid, --- linux-2.6.6-rc1/security/selinux/hooks.c 2004-04-14 23:14:49.000000000 -0700 +++ 25/security/selinux/hooks.c 2004-04-18 22:25:51.788957360 -0700 @@ -753,6 +753,7 @@ static int inode_doinit_with_dentry(stru inode->i_ino); goto out; } + BUG_ON(inode != dentry->d_inode); len = INITCONTEXTLEN; context = kmalloc(len, GFP_KERNEL); @@ -1745,7 +1746,7 @@ static inline void flush_unauthorized_fi spin_unlock(&files->file_lock); } -static void selinux_bprm_compute_creds(struct linux_binprm *bprm) +static void selinux_bprm_apply_creds(struct linux_binprm *bprm) { struct task_security_struct *tsec, *psec; struct bprm_security_struct *bsec; @@ -1755,7 +1756,7 @@ static void selinux_bprm_compute_creds(s struct rlimit *rlim, *initrlim; int rc, i; - secondary_ops->bprm_compute_creds(bprm); + secondary_ops->bprm_apply_creds(bprm); tsec = current->security; @@ -2560,7 +2561,7 @@ static int selinux_task_setrlimit(unsign /* Control the ability to change the hard limit (whether lowering or raising it), so that the hard limit can later be used as a safe reset point for the soft limit - upon context transitions. See selinux_bprm_compute_creds. */ + upon context transitions. See selinux_bprm_apply_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) return task_has_perm(current, current, PROCESS__SETRLIMIT); @@ -3971,7 +3972,7 @@ struct security_operations selinux_ops = .bprm_alloc_security = selinux_bprm_alloc_security, .bprm_free_security = selinux_bprm_free_security, - .bprm_compute_creds = selinux_bprm_compute_creds, + .bprm_apply_creds = selinux_bprm_apply_creds, .bprm_set_security = selinux_bprm_set_security, .bprm_check_security = selinux_bprm_check_security, .bprm_secureexec = selinux_bprm_secureexec, --- linux-2.6.6-rc1/sound/core/Kconfig 2004-04-03 20:39:14.000000000 -0800 +++ 25/sound/core/Kconfig 2004-04-18 22:25:25.140008616 -0700 @@ -1,7 +1,7 @@ # ALSA soundcard-configuration config SND_BIT32_EMUL tristate "Emulation for 32-bit applications" - depends on SND && (SPARC64 || PPC64 || X86_64 && IA32_EMULATION) + depends on SND && SND_PCM && (SPARC64 || PPC64 || X86_64 && IA32_EMULATION) config SND_TIMER tristate --- linux-2.6.6-rc1/sound/oss/dmasound/dmasound_atari.c 2003-09-08 13:58:59.000000000 -0700 +++ 25/sound/oss/dmasound/dmasound_atari.c 2004-04-18 22:26:02.385346464 -0700 @@ -22,7 +22,6 @@ #include #include -#include #include #include #include --- linux-2.6.6-rc1/sound/pci/au88x0/au8820.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/sound/pci/au88x0/au8820.c 2004-04-18 22:25:28.365518264 -0700 @@ -1,7 +1,7 @@ #include "au8820.h" #include "au88x0.h" static struct pci_device_id snd_vortex_ids[] = { - {PCI_VENDOR_ID_AUREAL, PCI_DEVICE_ID_AUREAL_VORTEX, + {PCI_VENDOR_ID_AUREAL, PCI_DEVICE_ID_AUREAL_VORTEX_1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0,}, {0,} }; --- linux-2.6.6-rc1/sound/pci/au88x0/au8830.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/sound/pci/au88x0/au8830.c 2004-04-18 22:25:28.365518264 -0700 @@ -1,7 +1,7 @@ #include "au8830.h" #include "au88x0.h" static struct pci_device_id snd_vortex_ids[] = { - {PCI_VENDOR_ID_AUREAL, PCI_DEVICE_ID_AUREAL_VORTEX2, + {PCI_VENDOR_ID_AUREAL, PCI_DEVICE_ID_AUREAL_VORTEX_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0,}, {0,} }; --- linux-2.6.6-rc1/sound/pci/au88x0/au88x0.h 2004-04-03 20:39:14.000000000 -0800 +++ 25/sound/pci/au88x0/au88x0.h 2004-04-18 22:25:28.366518112 -0700 @@ -80,8 +80,8 @@ #define VORTEX_IS_QUAD(x) ((x->codec == NULL) ? 0 : (x->codec->ext_id|0x80)) /* Check if chip has bug. */ #define IS_BAD_CHIP(x) (\ - (x->rev < 3 && x->device == PCI_DEVICE_ID_AUREAL_VORTEX) || \ - (x->rev < 0xfe && x->device == PCI_DEVICE_ID_AUREAL_VORTEX2) || \ + (x->rev < 3 && x->device == PCI_DEVICE_ID_AUREAL_VORTEX_1) || \ + (x->rev < 0xfe && x->device == PCI_DEVICE_ID_AUREAL_VORTEX_2) || \ (x->rev < 0xfe && x->device == PCI_DEVICE_ID_AUREAL_ADVANTAGE)) --- linux-2.6.6-rc1/sound/pci/intel8x0.c 2004-04-03 20:39:14.000000000 -0800 +++ 25/sound/pci/intel8x0.c 2004-04-18 22:26:01.983407568 -0700 @@ -445,6 +445,7 @@ struct _snd_intel8x0 { #ifdef CONFIG_PM int in_suspend; + u32 pci_state[64 / sizeof(u32)]; #endif }; @@ -2223,11 +2224,13 @@ static int snd_intel8x0_suspend(struct p { intel8x0_t *chip = snd_magic_cast(intel8x0_t, pci_get_drvdata(dev), return -ENXIO); intel8x0_suspend(chip); + pci_save_state(dev, chip->pci_state); return 0; } static int snd_intel8x0_resume(struct pci_dev *dev) { intel8x0_t *chip = snd_magic_cast(intel8x0_t, pci_get_drvdata(dev), return -ENXIO); + pci_restore_state(dev, chip->pci_state); intel8x0_resume(chip); return 0; }