Name: Allow Arbitrary Number of Init and Exit Functions Author: Rusty Russell, Jonathan Corbet Status: Tested on 2.5.73-bk1 One longstanding complaint is that modules can only have one module_init, and one module_exit (builtin code can have multiple __initcall however). This means, for example, that it is not possible to write a "module_proc_entry(name, readfn)" function which can be used like so: module_init(myinitfn); module_cleanup(myinitfn); module_proc_entry("some/path/foo", read_foo); The reason we don't allow multiple init functions in modules: if one fails, we won't know what to do. The solution is to explicitly pair them, hence module_init_exit(), with a priority arg. If the module fails to load the exit function will be called corresponding to each init function which was called, in backwards order. All exit functions are called on module removal. For non-modules, the exit arg is discarded, and the initcalls are run in priority order at boot (order within priorities is still controlled by linking, as previously). This means we can also get rid of the initcall levels at link time and simply use priorities. This patch also uses kallsyms to print function names when "initcall_debug" is set. Index: linux-2.6.10-bk14-Module/scripts/mod/modpost.h =================================================================== --- linux-2.6.10-bk14-Module.orig/scripts/mod/modpost.h 2004-12-28 12:31:11.000000000 +1100 +++ linux-2.6.10-bk14-Module/scripts/mod/modpost.h 2005-01-12 13:30:03.119077432 +1100 @@ -74,8 +74,6 @@ struct symbol *unres; int seen; int skip; - int has_init; - int has_cleanup; struct buffer dev_table_buf; char srcversion[25]; }; Index: linux-2.6.10-bk14-Module/init/main.c =================================================================== --- linux-2.6.10-bk14-Module.orig/init/main.c 2005-01-12 13:09:16.000000000 +1100 +++ linux-2.6.10-bk14-Module/init/main.c 2005-01-12 13:32:43.996620328 +1100 @@ -530,37 +530,66 @@ struct task_struct *child_reaper = &init_task; -extern initcall_t __initcall_start[], __initcall_end[]; +/* Defined by linker magic. */ +extern struct kernel_init __initcall_start[], __initcall_end[]; -static void __init do_initcalls(void) +/* Find first initfunc minimally >= this prio level. */ +static struct kernel_init *find_next_prio(int min_prio_todo) { - initcall_t *call; + struct kernel_init *i, *best; + + best = NULL; + for (i = __initcall_start; i < __initcall_end; i++) { + if (i->prio >= min_prio_todo + && (!best || i->prio < best->prio)) + best = i; + } + return best; +} + +static __init void do_initcall(initcall_t call) +{ + char *msg; int count = preempt_count(); - for (call = __initcall_start; call < __initcall_end; call++) { - char *msg; + if (initcall_debug) { + printk(KERN_DEBUG "Calling initcall 0x%p", *call); + print_fn_descriptor_symbol(": %s()", (unsigned long) *call); + printk("\n"); + } - if (initcall_debug) { - printk(KERN_DEBUG "Calling initcall 0x%p", *call); - print_fn_descriptor_symbol(": %s()", (unsigned long) *call); - printk("\n"); - } + call(); - (*call)(); + msg = NULL; + if (preempt_count() != count) { + msg = "preemption imbalance"; + preempt_count() = count; + } + if (irqs_disabled()) { + msg = "disabled interrupts"; + local_irq_enable(); + } + if (msg) { + printk("error in initcall at 0x%p: " + "returned with %s\n", *call, msg); + } +} - msg = NULL; - if (preempt_count() != count) { - msg = "preemption imbalance"; - preempt_count() = count; - } - if (irqs_disabled()) { - msg = "disabled interrupts"; - local_irq_enable(); - } - if (msg) { - printk("error in initcall at 0x%p: " - "returned with %s\n", *call, msg); +static void __init do_initcalls(void) +{ + struct kernel_init *next, *i; + int min_prio_todo = INT_MIN; + + /* This is O(num calls * num levels), which is OK. */ + while ((next = find_next_prio(min_prio_todo)) != NULL) { + if (initcall_debug) + printk("Doing priority %i initcalls:\n", next->prio); + /* Now call all at that priority. */ + for (i = next; i < __initcall_end; i++) { + if (i->prio == next->prio) + do_initcall(i->init); } + min_prio_todo = next->prio+1; } /* Make sure there is no pending stuff from the initcall sequence */ Index: linux-2.6.10-bk14-Module/include/linux/init.h =================================================================== --- linux-2.6.10-bk14-Module.orig/include/linux/init.h 2005-01-12 13:28:57.629033432 +1100 +++ linux-2.6.10-bk14-Module/include/linux/init.h 2005-01-12 13:30:03.123076824 +1100 @@ -3,6 +3,7 @@ #include #include +#include /* These macros are used to mark some functions or * initialized data (doesn't apply to uninitialized data) @@ -69,35 +70,54 @@ /* Defined in init/main.c */ extern char saved_command_line[]; + +struct module_init_exit +{ + int prio; + initcall_t init; + exitcall_t exit; +}; + +struct kernel_init +{ + int prio; + initcall_t init; +}; #endif #ifndef MODULE #ifndef __ASSEMBLY__ - /* initcalls are now grouped by functionality into separate * subsections. Ordering inside the subsections is determined * by link order. * For backwards compatibility, initcall() puts the call in * the device init subsection. */ +/* Suppress unused warning on exitfn, and test type. */ +#define __exitcall(fn) \ +static inline exitcall_t __unique_id(exit_test)(void) { return fn; } + +/* Pair of calls: one called at boot, one at exit (ie. never, when not + a module). Unlike module_init, you can have any number of these. + prio controls ordering: negative runs before module_init, positive after. + Within a priority, link order rules. +*/ +#define module_init_exit(prio, initfn, exitfn) \ + __exitcall(exitfn); \ + static struct kernel_init __initcall_##initfn \ + __attribute__((unused,__section__ (".initcall.init"))) \ + = { prio, initfn } + +#define core_initcall(fn) module_init_exit(-5000, fn, NULL) +#define postcore_initcall(fn) module_init_exit(-4000, fn, NULL) +#define arch_initcall(fn) module_init_exit(-3000, fn, NULL) +#define subsys_initcall(fn) module_init_exit(-2000, fn, NULL) +#define fs_initcall(fn) module_init_exit(-1000, fn, NULL) +#define device_initcall(fn) module_init_exit(0, fn, NULL) +#define late_initcall(fn) module_init_exit(1000, fn, NULL) -#define __define_initcall(level,fn) \ - static initcall_t __initcall_##fn __attribute_used__ \ - __attribute__((__section__(".initcall" level ".init"))) = fn - -#define core_initcall(fn) __define_initcall("1",fn) -#define postcore_initcall(fn) __define_initcall("2",fn) -#define arch_initcall(fn) __define_initcall("3",fn) -#define subsys_initcall(fn) __define_initcall("4",fn) -#define fs_initcall(fn) __define_initcall("5",fn) -#define device_initcall(fn) __define_initcall("6",fn) -#define late_initcall(fn) __define_initcall("7",fn) - -#define __initcall(fn) device_initcall(fn) - -#define __exitcall(fn) \ - static exitcall_t __exitcall_##fn __exit_call = fn +#define __initcall(fn) device_initcall(fn) #define console_initcall(fn) \ static initcall_t __initcall_##fn \ @@ -143,7 +163,6 @@ /* Relies on saved_command_line being set */ void __init parse_early_param(void); -#endif /* __ASSEMBLY__ */ /** * module_init() - driver initialization entry point @@ -166,6 +185,7 @@ * There can only be one per module. */ #define module_exit(x) __exitcall(x); +#endif /* __ASSEMBLY__ */ #else /* MODULE */ @@ -180,23 +200,25 @@ #define security_initcall(fn) module_init(fn) -/* These macros create a dummy inline: gcc 2.9x does not count alias - as usage, hence the `unused function' warning when __init functions - are declared static. We use the dummy __*_module_inline functions - both to kill the warning and check the type of the init/cleanup - function. */ - -/* Each module must use one module_init(), or one no_module_init */ -#define module_init(initfn) \ - static inline initcall_t __inittest(void) \ - { return initfn; } \ - int __init_module(void) __attribute__((alias(#initfn))); - -/* This is only required if you want to be unloadable. */ -#define module_exit(exitfn) \ - static inline exitcall_t __exittest(void) \ - { return exitfn; } \ - void __cleanup_module(void) __attribute__((alias(#exitfn))); +/* Each module can have one module_init(). */ +#define module_init(initfn) \ + static struct module_init_exit module_init \ + __attribute__ ((unused,__section__ ("__initexit"))) \ + = { 0, initfn, NULL }; + +/* If you have a module_init, and want to be unloadable, you need this too. */ +#define module_exit(exitfn) \ + static struct module_init_exit module_exit \ + __attribute__ ((unused,__section__ ("__initexit"))) \ + = { 0, NULL, exitfn }; + +/* Pair of calls: one called at init, one at exit. Unlike + module_init/module_exit, you can have any number of these: ordering is + controlled by priority, in ascending order: module_init is 0. */ +#define module_init_exit(prio, initfn, exitfn) \ + static struct module_init_exit __unique_id(mie) \ + __attribute__ ((unused,__section__ ("__initexit"))) \ + = { prio, initfn, exitfn } #define __setup_param(str, unique_id, fn) /* nothing */ #define __setup_null_param(str, unique_id) /* nothing */ Index: linux-2.6.10-bk14-Module/kernel/module.c =================================================================== --- linux-2.6.10-bk14-Module.orig/kernel/module.c 2005-01-12 13:28:32.544846808 +1100 +++ linux-2.6.10-bk14-Module/kernel/module.c 2005-01-12 13:30:03.123076824 +1100 @@ -514,6 +514,20 @@ /* This exists whether we can unload or not */ static void free_module(struct module *mod); +/* Need as many exits as inits to unload. */ +static int can_unload(const struct module *mod) +{ + int i, balance = 0; + + for (i = 0; i < mod->num_ie_pairs; i++) { + if (mod->ie_pairs[i].init) + balance++; + if (mod->ie_pairs[i].exit) + balance--; + } + return balance == 0; +} + static void wait_for_zero_refcount(struct module *mod) { /* Since we might sleep for some time, drop the semaphore first */ @@ -534,6 +548,7 @@ { struct module *mod; char name[MODULE_NAME_LEN]; + int i; int ret, forced = 0; if (!capable(CAP_SYS_MODULE)) @@ -567,9 +582,8 @@ goto out; } - /* If it has an init func, it must have an exit func to unload */ - if ((mod->init != NULL && mod->exit == NULL) - || mod->unsafe) { + /* Init and exit functions must balance to unload. */ + if (!can_unload(mod) || mod->unsafe) { forced = try_force(flags); if (!forced) { /* This module can't be removed */ @@ -592,12 +606,13 @@ if (!forced && module_refcount(mod) != 0) wait_for_zero_refcount(mod); - /* Final destruction now noone is using it. */ - if (mod->exit != NULL) { - up(&module_mutex); - mod->exit(); - down(&module_mutex); - } + /* Final destruction now noone is using it, reverse priority order. */ + up(&module_mutex); + for (i = mod->num_ie_pairs - 1; i >= 0; i--) + if (mod->ie_pairs[i].exit) + mod->ie_pairs[i].exit(); + down(&module_mutex); + module_notify(mod, MODULE_STATE_GONE); free_module(mod); @@ -625,7 +640,7 @@ seq_printf(m, "[unsafe],"); } - if (mod->init != NULL && mod->exit == NULL) { + if (can_unload(mod)) { printed_something = 1; seq_printf(m, "[permanent],"); } @@ -1414,6 +1429,23 @@ printk("ARGS: %s\n", args); } +/* Order init_exit pairs in ascending priority. num is small. */ +static void sort_ie_pairs(struct module_init_exit *ie_pairs, unsigned int num) +{ + unsigned int i, j; + struct module_init_exit tmp; + + for (i = 1; i < num; i++) { + for (j = i; j > 0; j--) { + if (ie_pairs[j-1].prio > ie_pairs[j].prio) { + tmp = ie_pairs[j-1]; + ie_pairs[j-1] = ie_pairs[j]; + ie_pairs[j] = tmp; + } + } + } +} + /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ static struct module *load_module(void __user *umod, @@ -1425,7 +1457,7 @@ char *secstrings, *args, *modmagic, *strtab = NULL; unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, exportindex, modindex, obsparmindex, infoindex, gplindex, - crcindex, gplcrcindex, versindex, pcpuindex; + crcindex, gplcrcindex, versindex, pcpuindex, iepairindex; long arglen; struct module *mod; long err = 0; @@ -1513,6 +1545,7 @@ obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); + iepairindex = find_sec(hdr, sechdrs, secstrings, "__initexit"); pcpuindex = find_pcpusec(hdr, sechdrs, secstrings); /* Don't keep modinfo section */ @@ -1686,6 +1719,12 @@ mod->extable = extable = (void *)sechdrs[exindex].sh_addr; sort_extable(extable, extable + mod->num_exentries); + /* Set up init/exit pair table, and sort into prio order. */ + mod->num_ie_pairs = sechdrs[iepairindex].sh_size + / sizeof(struct module_init_exit); + mod->ie_pairs = (void *)sechdrs[iepairindex].sh_addr; + sort_ie_pairs(mod->ie_pairs, mod->num_ie_pairs); + /* Finally, copy percpu area over. */ percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); @@ -1763,7 +1802,7 @@ const char __user *uargs) { struct module *mod; - int ret = 0; + int i, ret = 0; /* Must have permission */ if (!capable(CAP_SYS_MODULE)) @@ -1800,25 +1839,15 @@ module_notify(mod, MODULE_STATE_COMING); /* Start the module */ - if (mod->init != NULL) - ret = mod->init(); - if (ret < 0) { - /* Init routine failed: abort. Try to protect us from - buggy refcounters. */ - mod->state = MODULE_STATE_GOING; - module_notify(mod, MODULE_STATE_GOING); - synchronize_kernel(); - if (mod->unsafe) - printk(KERN_ERR "%s: module is now stuck!\n", - mod->name); - else { - module_put(mod); - module_notify(mod, MODULE_STATE_GONE); - down(&module_mutex); - free_module(mod); - up(&module_mutex); + for (i = 0; i < mod->num_ie_pairs; i++) { + if (mod->ie_pairs[i].init == NULL) + continue; + ret = mod->ie_pairs[i].init(); + if (ret != 0) { + DEBUGP("%s: init/exit pair init=%p failed: %i\n", + mod->name, mod->ie_pairs[i].init, ret); + goto unwind_iepairs; } - return ret; } /* Now it's a first class citizen! */ @@ -1834,6 +1863,26 @@ module_notify(mod, MODULE_STATE_LIVE); return 0; + +unwind_iepairs: + /* Init routine failed: abort. Try to protect us from buggy + refcounters. */ + mod->state = MODULE_STATE_GOING; + module_notify(mod, MODULE_STATE_GOING); + module_put(mod); + synchronize_kernel(); + if (module_refcount(mod)) { + printk(KERN_ERR "%s: module is now stuck!\n", mod->name); + return ret; + } + + while (--i >= 0) + if (mod->ie_pairs[i].exit) + mod->ie_pairs[i].exit(); + down(&module_mutex); + free_module(mod); + up(&module_mutex); + return ret; } static inline int within(unsigned long addr, void *start, unsigned long size) Index: linux-2.6.10-bk14-Module/scripts/mod/modpost.c =================================================================== --- linux-2.6.10-bk14-Module.orig/scripts/mod/modpost.c 2005-01-12 13:28:57.699022792 +1100 +++ linux-2.6.10-bk14-Module/scripts/mod/modpost.c 2005-01-12 13:30:03.124076672 +1100 @@ -382,10 +382,6 @@ add_exported_symbol(symname + strlen(KSYMTAB_PFX), mod, NULL); } - if (strcmp(symname, MODULE_SYMBOL_PREFIX "__init_module") == 0) - mod->has_init = 1; - if (strcmp(symname, MODULE_SYMBOL_PREFIX "__cleanup_module") == 0) - mod->has_cleanup = 1; break; } } @@ -529,19 +525,10 @@ buf_printf(b, "\n"); buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n"); buf_printf(b, "\n"); - buf_printf(b, "extern int __init_module(void);\n"); - buf_printf(b, "extern void __cleanup_module(void);\n"); - buf_printf(b, "\n"); buf_printf(b, "#undef unix\n"); /* We have a module called "unix" */ buf_printf(b, "struct module __this_module\n"); buf_printf(b, "__attribute__((section(\".gnu.linkonce.this_module\"))) = {\n"); buf_printf(b, " .name = __stringify(KBUILD_MODNAME),\n"); - if (mod->has_init) - buf_printf(b, " .init = __init_module,\n"); - if (mod->has_cleanup) - buf_printf(b, "#ifdef CONFIG_MODULE_UNLOAD\n" - " .exit = __cleanup_module,\n" - "#endif\n"); buf_printf(b, "};\n"); } Index: linux-2.6.10-bk14-Module/include/linux/module.h =================================================================== --- linux-2.6.10-bk14-Module.orig/include/linux/module.h 2005-01-12 13:29:46.947535880 +1100 +++ linux-2.6.10-bk14-Module/include/linux/module.h 2005-01-12 13:30:03.125076520 +1100 @@ -259,9 +259,6 @@ unsigned int num_exentries; const struct exception_table_entry *extable; - /* Startup function. */ - int (*init)(void); - /* If this is non-NULL, vfree after init() returns */ void *module_init; @@ -283,6 +280,10 @@ /* Am I GPL-compatible */ int license_gplok; + /* Pairs of init/exit functions. */ + struct module_init_exit *ie_pairs; + unsigned int num_ie_pairs; + #ifdef CONFIG_MODULE_UNLOAD /* Reference counts */ struct module_ref ref[NR_CPUS]; @@ -292,9 +293,6 @@ /* Who is waiting for us to be unloaded */ struct task_struct *waiter; - - /* Destruction function. */ - void (*exit)(void); #endif #ifdef CONFIG_KALLSYMS