Name: Later primitive Author: Rusty Russell Section: Misc Status: Tested on 2.5.15 D: This patch implements the wait_for_later() call needed for safe D: module unload and hotplug CPU. It is a minimal subset of RCU which D: works with a preemptible kernel. diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.15/include/linux/later.h working-2.5.15-rcu/include/linux/later.h --- linux-2.5.15/include/linux/later.h Thu Jan 1 10:00:00 1970 +++ working-2.5.15-rcu/include/linux/later.h Wed May 22 19:40:26 2002 @@ -0,0 +1,60 @@ +#ifndef __LINUX_LATER_H +#define __LINUX_LATER_H +/* Do something after system has calmed down. + (c) 2002 Rusty Russell, IBM Corporation. +*/ +#include +#include +#include +#include + +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) +struct later +{ + struct later *next; + void (*func)(void *data); + void *data; +}; + +extern void noone_running(void); +extern atomic_t runcounts[2]; +extern int which_runcount; + +/* We flip between two run counters. */ +static inline atomic_t *runcount(struct task_struct *task) +{ + return &runcounts[!(task->flags & PF_RUNCOUNT)]; +} + +/* Decrement counter on entering voluntary schedule. */ +static inline void put_runcount(void) +{ + /* If we hit zero and it's the not the active list... */ + if (atomic_dec_and_test(runcount(current)) + && (current->flags & PF_RUNCOUNT) != which_runcount) + noone_running(); +} + +/* Increment counter in leaving voluntary schedule. */ +static inline void get_runcount(struct task_struct *task) +{ + /* Sets PF_RUNCOUNT, or not */ + task->flags = ((task->flags & ~PF_RUNCOUNT) | which_runcount); + atomic_inc(runcount(task)); +} + +/* Queues future request: may sleep on UP if func sleeps... */ +void do_later(struct later *head, void (*func)(void *data), void *data); + +/* Wait until it's later. */ +void wait_for_later(void); + +#else /* !SMP, !PREEMPT */ +struct later { }; +#define do_later(head, func, data) (func(data)) +static inline void put_runcount(void) { } +static inline void get_runcount(void) { } + +extern inline void wait_for_later(void) { } +#endif +#endif /* __LINUX_LATER_H */ diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.15/include/linux/sched.h working-2.5.15-rcu/include/linux/sched.h --- linux-2.5.15/include/linux/sched.h Fri May 17 14:00:05 2002 +++ working-2.5.15-rcu/include/linux/sched.h Wed May 22 16:47:36 2002 @@ -370,6 +370,7 @@ #define PF_FREE_PAGES 0x00002000 /* per process page freeing */ #define PF_FLUSHER 0x00004000 /* responsible for disk writeback */ #define PF_RADIX_TREE 0x00008000 /* debug: performing radix tree alloc */ +#define PF_RUNCOUNT 0x00010000 /* which run counter to use */ /* * Ptrace flags diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.15/kernel/Makefile working-2.5.15-rcu/kernel/Makefile --- linux-2.5.15/kernel/Makefile Mon Apr 15 11:47:51 2002 +++ working-2.5.15-rcu/kernel/Makefile Wed May 15 20:25:59 2002 @@ -21,6 +21,11 @@ obj-$(CONFIG_MODULES) += ksyms.o obj-$(CONFIG_PM) += pm.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o +ifeq ($(CONFIG_PREEMPT),y) +obj-y += later.o +else +obj-$(CONFIG_SMP) += later.o +endif ifneq ($(CONFIG_IA64),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.15/kernel/fork.c working-2.5.15-rcu/kernel/fork.c --- linux-2.5.15/kernel/fork.c Mon May 13 12:00:40 2002 +++ working-2.5.15-rcu/kernel/fork.c Thu May 16 14:37:09 2002 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -740,6 +741,7 @@ p->swappable = 1; p->exit_signal = clone_flags & CSIGNAL; p->pdeath_signal = 0; + get_runcount(p); /* * Share the timeslice between parent and child, thus the diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.15/kernel/later.c working-2.5.15-rcu/kernel/later.c --- linux-2.5.15/kernel/later.c Thu Jan 1 10:00:00 1970 +++ working-2.5.15-rcu/kernel/later.c Wed May 22 19:40:54 2002 @@ -0,0 +1,149 @@ +/* Do something after system has calmed down. + + Copyright (C) 2002 Rusty Russell, IBM Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include +#include + +static spinlock_t later_lock = SPIN_LOCK_UNLOCKED; + +/* We track how many tasks are running or preempted. At opportune + times, we switch all new counting over to the alternate counter, + and when the old counter hits zero, we know that everyone has + voluntarily preempted. */ + +/* One for the boot cpu idle thread. */ +atomic_t runcounts[2] = { ATOMIC_INIT(0), ATOMIC_INIT(1) }; +int which_runcount; /* = 0, means runcount[1] active. */ + +/* The two lists of tasks. */ +static struct later *later_list[2]; /* = { NULL, NULL } */ + +static inline int active_count(void) +{ + if (which_runcount == PF_RUNCOUNT) + return 0; + else + return 1; +}; + +/* We're done: process batch. */ +void noone_running(void) +{ + struct later *list; + + /* Remove inactive list for processing. */ + spin_lock_irq(&later_lock); + list = later_list[!active_count()]; + later_list[!active_count()] = NULL; + spin_unlock_irq(&later_lock); + + /* Callback usually frees the entry, so be careful */ + while (list) { + struct later *next = list->next; + list->func(list->data); + list = next; + } +} + +/* Queues future request: if nothing happening, switch queues. */ +void do_later(struct later *head, void (*func)(void *data), void *data) +{ + unsigned long flags; + + head->func = func; + head->data = data; + + spin_lock_irqsave(&later_lock, flags); + /* Add to list */ + head->next = later_list[active_count()]; + later_list[active_count()] = head; + + later_report = 1; + /* If other list is empty, switch them. */ + if (later_list[!active_count()] == NULL) { + /* Beware: which_runcount being read without lock by sched.c */ + wmb(); + which_runcount ^= PF_RUNCOUNT; + } + spin_unlock_irqrestore(&later_lock, flags); +} + +/* Because of FASTCALL declaration of complete, we use this wrapper */ +static void wakeme(void *completion) +{ + complete(completion); +} + +/* Wait until it's later. */ +void wait_for_later(void) +{ + DECLARE_COMPLETION(completion); + struct later later; + + /* Queue it and wait... */ + do_later(&later, wakeme, &completion); + wait_for_completion(&completion); +} + +#if 0 +#include + +static int write_wait(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + wait_for_later(); + return count; +} + +static int read_wait(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + char *p = page; + int len; + + spin_lock_irq(&later_lock); + p += sprintf(p, "ACTIVE: %u (%p)\nOTHER: %u (%p)\n", + atomic_read(&runcounts[active_count()]), + later_list[active_count()], + atomic_read(&runcounts[!active_count()]), + later_list[!active_count()]); + spin_unlock_irq(&later_lock); + + len = (p - page) - off; + if (len < 0) + len = 0; + + *eof = (len <= count) ? 1 : 0; + *start = page + off; + + return len; +} + +static int __init create_wait_proc(void) +{ + struct proc_dir_entry *e; + + e = create_proc_entry("wait_for_later", 0644, NULL); + e->write_proc = &write_wait; + e->read_proc = &read_wait; + return 0; +} + +__initcall(create_wait_proc); +#endif diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.15/kernel/sched.c working-2.5.15-rcu/kernel/sched.c --- linux-2.5.15/kernel/sched.c Wed May 1 15:09:29 2002 +++ working-2.5.15-rcu/kernel/sched.c Thu May 16 15:58:21 2002 @@ -22,6 +22,7 @@ #include #include #include +#include /* * Priority of a process goes from 0 to 139. The 0-99 @@ -753,13 +754,16 @@ runqueue_t *rq; prio_array_t *array; list_t *queue; - int idx; + int idx, preempt; if (unlikely(in_interrupt())) BUG(); #if CONFIG_DEBUG_HIGHMEM check_highmem_ptes(); #endif + preempt = (preempt_get_count() & PREEMPT_ACTIVE); + if (!preempt) + put_runcount(); need_resched: preempt_disable(); prev = current; @@ -773,7 +779,7 @@ * if entering from preempt_schedule, off a kernel preemption, * go straight to picking the next task. */ - if (unlikely(preempt_get_count() & PREEMPT_ACTIVE)) + if (unlikely(preempt)) goto pick_next_task; switch (prev->state) { @@ -842,6 +848,9 @@ preempt_enable_no_resched(); if (test_thread_flag(TIF_NEED_RESCHED)) goto need_resched; + + if (!preempt) + get_runcount(current); return; }