Name: Read-Copy-Update Patch v0.9 Author: Rusty Russell Section: Misc Status: Seems to work D: This patch implements a variation on Read Copy Update for Linux D: (see D: http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf) D: This version only has an effect on SMP, and is really useful for D: lock-free module unloading. Note that there are three variations on D: this patch: mine, IBM's, and Andrea Arcangeli's, but all now use D: the same interface. diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.3-pre6/include/linux/rcupdate.h working-2.5.3-pre6-rcu/include/linux/rcupdate.h --- linux-2.5.3-pre6/include/linux/rcupdate.h Thu Jan 1 10:00:00 1970 +++ working-2.5.3-pre6-rcu/include/linux/rcupdate.h Tue Jan 29 17:50:31 2002 @@ -0,0 +1,43 @@ +#ifndef _LINUX_RCUPDATE_H +#define _LINUX_RCUPDATE_H +/* Read-Copy-Update For Linux. (c) 2001 Rusty Russell. */ +#include +#include +#include + +#ifdef CONFIG_SMP +struct rcu_head +{ + struct rcu_head *next; + void (*func)(void *data); + void *data; +}; + +/* Count of pending requests: for optimization in schedule() */ +extern atomic_t rcu_pending; +static inline int is_rcu_pending(void) +{ + return atomic_read(&rcu_pending) != 0; +} + +/* Wait for every CPU to have moved on. Sleeps. */ +void synchronize_kernel(void); + +/* Queues future request: may sleep on UP if func doesn't sleep... */ +void call_rcu(struct rcu_head *head, void (*func)(void *data), void *data); + +#else /* !SMP */ + +#define is_rcu_pending() 0 + +static inline void synchronize_kernel(void) +{ +} + +/* Call immediately for UP */ +#define call_rcu(head, func, data) (func(data)) +#endif /*CONFIG_SMP*/ + +/* Called by schedule() when batch reference count drops to zero. */ +void rcu_batch_done(void); +#endif diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.3-pre6/kernel/Makefile working-2.5.3-pre6-rcu/kernel/Makefile --- linux-2.5.3-pre6/kernel/Makefile Tue Jan 29 09:17:09 2002 +++ working-2.5.3-pre6-rcu/kernel/Makefile Tue Jan 29 18:42:36 2002 @@ -10,7 +10,7 @@ O_TARGET := kernel.o export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o \ - printk.o + printk.o rcupdate.o obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ @@ -20,6 +20,7 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += ksyms.o obj-$(CONFIG_PM) += pm.o +obj-$(CONFIG_SMP) += rcupdate.o ifneq ($(CONFIG_IA64),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.3-pre6/kernel/rcupdate.c working-2.5.3-pre6-rcu/kernel/rcupdate.c --- linux-2.5.3-pre6/kernel/rcupdate.c Thu Jan 1 10:00:00 1970 +++ working-2.5.3-pre6-rcu/kernel/rcupdate.c Tue Jan 29 17:50:31 2002 @@ -0,0 +1,116 @@ +/* Read-Copy-Update For Linux. + Copyright (c) International Business Machines Corp. + Copyright (c) Dipankar Sarma (dipankar@in.ibm.com) + Copyright (c) Paul McKenney (Paul.McKenney@us.ibm.com) + Copyright (C) 2001 Rusty Russell. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Concept stolen from original Read Copy Update paper: + + http://www.rdrop.com/users/paulmck/rclock/intro/rclock_intro.html and + http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + + Or the OLS paper: + + http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf +*/ +#include +#include +#include +#include +#include +#include + +/* Count of pending requests: for optimization in schedule() */ +atomic_t rcu_pending = ATOMIC_INIT(0); + +/* Two batches per CPU : one (pending) is an internal queue of waiting + requests, being prepended to as new requests come in. The other + (rcu_waiting) is waiting completion of schedule()s on all CPUs. */ +struct rcu_batch +{ + /* Two sets of queues: one queueing, one waiting quiescent finish */ + int queueing; + /* Three queues: hard interrupt, soft interrupt, neither */ + struct rcu_head *head[2][3]; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +static struct rcu_batch rcu_batch[NR_CPUS]; + +void call_rcu(struct rcu_head *head, void (*func)(void *data), void *data) +{ + unsigned cpu = smp_processor_id(); + unsigned state; + struct rcu_head **headp; + + head->func = func; + head->data = data; + if (in_interrupt()) { + if (in_irq()) state = 2; + else state = 1; + } else state = 0; + + /* Figure out which queue we're on. */ + headp = &rcu_batch[cpu].head[rcu_batch[cpu].queueing][state]; + + atomic_inc(&rcu_pending); + /* Prepend to this CPU's list: no locks needed. */ + head->next = *headp; + *headp = head; +} + +/* Calls every callback in the waiting rcu batch. */ +void rcu_batch_done(void) +{ + struct rcu_head *i, *next; + struct rcu_batch *mybatch; + unsigned int q; + + mybatch = &rcu_batch[smp_processor_id()]; + /* Call callbacks: probably delete themselves, may schedule. */ + for (q = 0; q < 3; q++) { + for (i = mybatch->head[!mybatch->queueing][q]; i; i = next) { + next = i->next; + i->func(i->data); + atomic_dec(&rcu_pending); + } + mybatch->head[!mybatch->queueing][q] = NULL; + } + + /* Start queueing on this batch. */ + mybatch->queueing = !mybatch->queueing; +} + +/* Because of FASTCALL declaration of complete, we use this wrapper */ +static void wakeme_after_rcu(void *completion) +{ + complete(completion); +} + +void synchronize_kernel(void) +{ + struct rcu_head rcu; + DECLARE_COMPLETION(completion); + + /* Will wake me after RCU finished */ + call_rcu(&rcu, wakeme_after_rcu, &completion); + + /* Wait for it */ + wait_for_completion(&completion); +} + +EXPORT_SYMBOL(synchronize_kernel); +EXPORT_SYMBOL(call_rcu); --- linux-2.5.3-pre6/kernel/sched.c Tue Jan 29 09:17:09 2002 +++ working-2.5.3-pre6-hotcpu/kernel/sched.c Wed Jan 30 16:13:27 2002 @@ -19,6 +19,7 @@ #include #include #include +#include #define BITMAP_SIZE ((((MAX_PRIO+7)/8)+sizeof(long)-1)/sizeof(long)) @@ -44,6 +45,7 @@ unsigned long nr_running, nr_switches, expired_timestamp; task_t *curr, *idle; prio_array_t *active, *expired, arrays[2]; + int ring_count, finished_count; int prev_nr_running[NR_CPUS]; } ____cacheline_aligned; @@ -54,6 +56,8 @@ #define task_rq(p) cpu_rq((p)->cpu) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define rt_task(p) ((p)->policy != SCHED_OTHER) +#define ring_count(cpu) (cpu_rq(cpu)->ring_count) +#define finished_count(cpu) (cpu_rq(cpu)->finished_count) static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags) @@ -624,6 +630,32 @@ void scheduling_functions_start_here(void) { } +static inline void handle_rcu(void) +{ + int i, count, diff = 1; + + /* Look for previous non-idle CPU */ + for (i = smp_processor_id()-1; i != smp_processor_id(); i--, diff++) { + if (i < 0) i = smp_num_cpus - 1; + if (cpu_curr(i) != cpu_rq(i)->idle) + break; + } + + count = ring_count(i) + diff; + /* Do we have to update ring count? */ + if (count != ring_count(smp_processor_id())) { + /* Do subtraction to avoid int wrap corner case */ + if (count - finished_count(smp_processor_id()) >= 0) { + /* Avoid reentry: temporarily set finish_count + far in the future */ + finished_count(smp_processor_id()) = count + INT_MAX; + rcu_batch_done(); + finished_count(smp_processor_id()) = count + smp_num_cpus; + } + ring_count(smp_processor_id()) = count; + } +} + /* * 'schedule()' is the main scheduler function. */ @@ -638,6 +670,9 @@ if (unlikely(in_interrupt())) BUG(); release_kernel_lock(prev, smp_processor_id()); + if (unlikely(is_rcu_pending())) + handle_rcu(); + spin_lock_irq(&rq->lock); switch (prev->state) {