Name: Reference Count Simplification Author: Roman Zippel, Rusty Russell Status: Experimental D: This patch vastly simplifies the try_module_get code, by optimistically D: incrementing the module count, then correcting that if neccessary in D: a slow path. D: D: Fixes by Rusty: D: o Restore rmmod --wait D: o Restore --force so it works when refcount != 0. D: o Change spin_lock_irqsave to spin_lock in module unload. D: o Rename module_get_sync to try_module_get_slow. D: o Use modlist lock instead of unload lock. D: o Add extra module refcount check on failed init for buggy refcounters. D: o Fix memory barriers: two needed on UP, and full mb() needed. diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .25762-linux-2.6.0/include/linux/module.h .25762-linux-2.6.0.updated/include/linux/module.h --- .25762-linux-2.6.0/include/linux/module.h 2003-09-22 10:26:13.000000000 +1000 +++ .25762-linux-2.6.0.updated/include/linux/module.h 2003-12-23 16:11:22.000000000 +1100 @@ -299,16 +299,18 @@ static inline void __module_get(struct m } } +extern int try_module_get_slow(struct module *module); + static inline int try_module_get(struct module *module) { int ret = 1; if (module) { unsigned int cpu = get_cpu(); - if (likely(module_is_live(module))) - local_inc(&module->ref[cpu].count); - else - ret = 0; + local_inc(&module->ref[cpu].count); + smp_mb(); /* Inc must happen before live flag tested */ + if (unlikely(!module_is_live(module))) + ret = try_module_get_slow(module); put_cpu(); } return ret; diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .25762-linux-2.6.0/kernel/module.c .25762-linux-2.6.0.updated/kernel/module.c --- .25762-linux-2.6.0/kernel/module.c 2003-11-24 15:42:33.000000000 +1100 +++ .25762-linux-2.6.0.updated/kernel/module.c 2003-12-23 16:28:21.000000000 +1100 @@ -53,7 +53,7 @@ #define symbol_is(literal, string) \ (strcmp(MODULE_SYMBOL_PREFIX literal, (string)) == 0) -/* Protects module list */ +/* Protects module list and module structure internals */ static spinlock_t modlist_lock = SPIN_LOCK_UNLOCKED; /* List of modules, protected by module_mutex AND modlist_lock */ @@ -457,158 +457,6 @@ static void module_unload_free(struct mo } } -#ifdef CONFIG_SMP -/* Thread to stop each CPU in user context. */ -enum stopref_state { - STOPREF_WAIT, - STOPREF_PREPARE, - STOPREF_DISABLE_IRQ, - STOPREF_EXIT, -}; - -static enum stopref_state stopref_state; -static unsigned int stopref_num_threads; -static atomic_t stopref_thread_ack; - -static int stopref(void *cpu) -{ - int irqs_disabled = 0; - int prepared = 0; - - sprintf(current->comm, "kmodule%lu\n", (unsigned long)cpu); - - /* Highest priority we can manage, and move to right CPU. */ -#if 0 /* FIXME */ - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - setscheduler(current->pid, SCHED_FIFO, ¶m); -#endif - set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu)); - - /* Ack: we are alive */ - mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ - atomic_inc(&stopref_thread_ack); - - /* Simple state machine */ - while (stopref_state != STOPREF_EXIT) { - if (stopref_state == STOPREF_DISABLE_IRQ && !irqs_disabled) { - local_irq_disable(); - irqs_disabled = 1; - /* Ack: irqs disabled. */ - mb(); /* Must read state first. */ - atomic_inc(&stopref_thread_ack); - } else if (stopref_state == STOPREF_PREPARE && !prepared) { - /* Everyone is in place, hold CPU. */ - preempt_disable(); - prepared = 1; - mb(); /* Must read state first. */ - atomic_inc(&stopref_thread_ack); - } - if (irqs_disabled || prepared) - cpu_relax(); - else - yield(); - } - - /* Ack: we are exiting. */ - mb(); /* Must read state first. */ - atomic_inc(&stopref_thread_ack); - - if (irqs_disabled) - local_irq_enable(); - if (prepared) - preempt_enable(); - - return 0; -} - -/* Change the thread state */ -static void stopref_set_state(enum stopref_state state, int sleep) -{ - atomic_set(&stopref_thread_ack, 0); - wmb(); - stopref_state = state; - while (atomic_read(&stopref_thread_ack) != stopref_num_threads) { - if (sleep) - yield(); - else - cpu_relax(); - } -} - -/* Stop the machine. Disables irqs. */ -static int stop_refcounts(void) -{ - unsigned int i, cpu; - cpumask_t old_allowed; - int ret = 0; - - /* One thread per cpu. We'll do our own. */ - cpu = smp_processor_id(); - - /* FIXME: racy with set_cpus_allowed. */ - old_allowed = current->cpus_allowed; - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - - atomic_set(&stopref_thread_ack, 0); - stopref_num_threads = 0; - stopref_state = STOPREF_WAIT; - - /* No CPUs can come up or down during this. */ - down(&cpucontrol); - - for (i = 0; i < NR_CPUS; i++) { - if (i == cpu || !cpu_online(i)) - continue; - ret = kernel_thread(stopref, (void *)(long)i, CLONE_KERNEL); - if (ret < 0) - break; - stopref_num_threads++; - } - - /* Wait for them all to come to life. */ - while (atomic_read(&stopref_thread_ack) != stopref_num_threads) - yield(); - - /* If some failed, kill them all. */ - if (ret < 0) { - stopref_set_state(STOPREF_EXIT, 1); - up(&cpucontrol); - return ret; - } - - /* Don't schedule us away at this point, please. */ - preempt_disable(); - - /* Now they are all scheduled, make them hold the CPUs, ready. */ - stopref_set_state(STOPREF_PREPARE, 0); - - /* Make them disable irqs. */ - stopref_set_state(STOPREF_DISABLE_IRQ, 0); - - local_irq_disable(); - return 0; -} - -/* Restart the machine. Re-enables irqs. */ -static void restart_refcounts(void) -{ - stopref_set_state(STOPREF_EXIT, 0); - local_irq_enable(); - preempt_enable(); - up(&cpucontrol); -} -#else /* ...!SMP */ -static inline int stop_refcounts(void) -{ - local_irq_disable(); - return 0; -} -static inline void restart_refcounts(void) -{ - local_irq_enable(); -} -#endif - unsigned int module_refcount(struct module *mod) { unsigned int i, total = 0; @@ -658,6 +506,22 @@ static void wait_for_zero_refcount(struc down(&module_mutex); } +int try_module_get_slow(struct module *module) +{ + unsigned long flags; + int ret = 1; + + spin_lock_irqsave(&modlist_lock, flags); + if (!module_is_live(module)) { + local_dec(&module->ref[smp_processor_id()].count); + wake_up_process(module->waiter); + ret = 0; + } + spin_unlock_irqrestore(&modlist_lock, flags); + return ret; +} +EXPORT_SYMBOL(try_module_get_slow); + asmlinkage long sys_delete_module(const char __user *name_user, unsigned int flags) { @@ -706,26 +570,26 @@ sys_delete_module(const char __user *nam goto out; } } - /* Stop the machine so refcounts can't move: irqs disabled. */ - DEBUGP("Stopping refcounts...\n"); - ret = stop_refcounts(); - if (ret != 0) - goto out; + + spin_lock_irq(&modlist_lock); + /* Force everyone through the slow path: they'll spin on lock. */ + mod->state = MODULE_STATE_GOING; + mb(); /* Must set state before reading counters. */ /* If it's not unused, quit unless we are told to block. */ if ((flags & O_NONBLOCK) && module_refcount(mod) != 0) { forced = try_force(flags); if (!forced) { + /* Back to normal: reset state and release lock. */ + mod->state = MODULE_STATE_LIVE; + spin_unlock_irq(&modlist_lock); ret = -EWOULDBLOCK; - restart_refcounts(); goto out; } } - /* Mark it as dying. */ mod->waiter = current; - mod->state = MODULE_STATE_GOING; - restart_refcounts(); + spin_unlock_irq(&modlist_lock); /* Never wait if forced. */ if (!forced && module_refcount(mod) != 0) @@ -734,6 +598,7 @@ sys_delete_module(const char __user *nam /* Final destruction now noone is using it. */ mod->exit(); free_module(mod); + ret = 0; out: up(&module_mutex); @@ -1731,25 +1596,26 @@ sys_init_module(void __user *umod, /* Start the module */ ret = mod->init(); + + down(&module_mutex); if (ret < 0) { /* Init routine failed: abort. Try to protect us from buggy refcounters. */ mod->state = MODULE_STATE_GOING; + mb(); /* Set state before checking reference counts */ synchronize_kernel(); - if (mod->unsafe) + if (mod->unsafe || module_refcount(mod) != 0) printk(KERN_ERR "%s: module is now stuck!\n", mod->name); else { module_put(mod); - down(&module_mutex); free_module(mod); - up(&module_mutex); } + up(&module_mutex); return ret; } /* Now it's a first class citizen! */ - down(&module_mutex); mod->state = MODULE_STATE_LIVE; /* Drop initial reference. */ module_put(mod);