Name: New Module Loader Base: ia64 support Author: Rusty Russell Status: Untested Depends: D: This patch provides basic ia64 support for modules. diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .13409-linux-2.5.43/arch/ia64/kernel/module.c .13409-linux-2.5.43.updated/arch/ia64/kernel/module.c --- .13409-linux-2.5.43/arch/ia64/kernel/module.c 1970-01-01 10:00:00.000000000 +1000 +++ .13409-linux-2.5.43.updated/arch/ia64/kernel/module.c 2002-10-17 17:35:37.000000000 +1000 @@ -0,0 +1,507 @@ +#include +#include +#include + +struct plt_entry +{ + /* Three instruction bundles in PLT. */ + unsigned char plt[16 * 3]; +}; + +struct got_entry +{ + uint64_t got; +}; + +struct opd_entry +{ + uint64_t function; + uint64_t gp; +}; + +/* Stolen from modutils 2.4.1 obj_ia64.c by Mike Stephens + */ +static struct plt_entry ia64_plt = +{ { + 0x0b, 0x80, 0x00, 0x02, 0x00, 0x24, /* [MMI] addl r16=aa,gp;; */ + 0xf0, 0x00, 0x40, 0x30, 0x20, 0x00, /* ld8 r15=[r16] */ + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0;; */ + 0x0b, 0x80, 0x20, 0x1e, 0x18, 0x14, /* [MMI] ld8 r16=[r15],8;; */ + 0x10, 0x00, 0x3c, 0x30, 0x20, 0xc0, /* ld8 gp=[r15] */ + 0x00, 0x09, 0x00, 0x07, /* mov b6=r16;; */ + 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0x0 */ + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */ + 0x60, 0x00, 0x80, 0x00 /* br.few b6;; */ +} }; + +/* Have we already seen one of these relocations? */ +/* FIXME: we could look in other sections, too --RR */ +static int duplicate_reloc(const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + if (rela[i].r_info == rela[num].r_info + && rela[i].r_addend == rela[num].r_addend) + return 1; + } + return 0; +} + +/* Count how many GOT entries we may need */ +static unsigned int count_gots(const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_PCREL21B: + case R_IA64_LTOFF22: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +/* Count how many PLT entries we may need */ +static unsigned int count_plts(const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_PCREL21B: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +/* We need to create an OPD entry for any internal function which is + referenced. */ +static unsigned int count_opds(const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_FPTR64LSB: + /* Jumps to static functions sometimes go straight to + their offset. Of course, that may not be possible + if the jump is from init -> core or vice. versa, so + we need to generate an OPD (and PLT etc) for that. */ + case R_IA64_PCREL21B: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +static void *alloc_and_zero(unsigned long size) +{ + void *ret; + + ret = vmalloc(size); + if (!ret) ret = ERR_PTR(-ENOMEM); + else memset(ret, 0, size); + + return ret; +} + +void *module_core_alloc(const Elf64_Ehdr *hdr, + const Elf64_Shdr *sechdrs, + const char *secstrings, + struct module *me) +{ + unsigned long gots = 0, plts = 0, opds = 3; /* See module_finalize */ + unsigned int i; + + /* GOT and PLTs can occur in any relocated section... */ + for (i = 1; i < hdr->e_shnum; i++) { + const Elf64_Rela *rels = (void *)hdr + sechdrs[i].sh_offset; + unsigned long numrels = sechdrs[i].sh_size/sizeof(Elf64_Rela); + + if (sechdrs[i].sh_type != SHT_RELA) + continue; + gots += count_gots(rels, numrels); + opds += count_opds(rels, numrels); + + /* Only need PLTs for non-init sections here. */ + if (!strstr(secstrings + sechdrs[i].sh_name, ".init")) + plts += count_plts(rels, numrels); + } + + me->core_size = ALIGN(me->core_size, 16); + me->arch.got_offset = me->core_size; + me->core_size += gots * sizeof(struct got_entry); + + me->core_size = ALIGN(me->core_size, 16); + me->arch.core_plt_offset = me->core_size; + me->core_size += plts * sizeof(struct plt_entry); + + me->core_size = ALIGN(me->core_size, 16); + me->arch.opd_offset = me->core_size; + me->core_size += opds * sizeof(struct opd_entry); + + return alloc_and_zero(me->core_size); +} + +void *module_init_alloc(const Elf64_Ehdr *hdr, + const Elf64_Shdr *sechdrs, + const char *secstrings, + struct module *me) +{ + unsigned long plts = 0; + unsigned int i; + + /* PLTs can occur in any relocated section... */ + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type != SHT_RELA + || !strstr(secstrings + sechdrs[i].sh_name, ".init")) + continue; + plts += count_plts((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size / sizeof(Elf64_Rela)); + } + + me->init_size = ALIGN(me->init_size, 16); + me->arch.init_plt_offset = me->init_size; + me->init_size += plts * sizeof(struct plt_entry); + + return alloc_and_zero(me->init_size); +} + +/* Opaque struct for insns, to protect against derefs. */ +struct insn; +static void *bundle(const struct insn *insn) +{ + return (void *)((uint64_t)insn & ~0x3UL); +} + +static int slot(const struct insn *insn) +{ + return (uint64_t)insn & 0x3; +} + +int apply_relocate(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + printk(KERN_ERR "module %s: Non-ADD RELOCATION unsupported\n", + me->name); + return -ENOEXEC; +} + +/* Return mask for these bits, eg. bits(0, 7) => 0xFF. */ +static inline unsigned long mask(unsigned int first, unsigned int last) +{ + return (((1UL << (last-first+1)) - 1) << first); +} + +/* Return bits first to last of val, shifted down */ +static inline unsigned long bits(unsigned long val, + unsigned int first, unsigned int last) +{ + return (val & mask(first, last)) >> first; +} + +/* Only apply val where mask has 1 bits. */ +static void apply(struct insn *insn, uint64_t mask, uint64_t val) +{ + uint64_t *ptr; + unsigned int bitshift; + + /* First 5 bits are template, then 3 x 41-bit instructions. */ + ptr = bundle(insn) + 5 * slot(insn); + bitshift = 5 + slot(insn); + + /* Shift everyone up */ + val <<= bitshift; + mask <<= bitshift; + + /* We use unaligned pointers here. */ + *ptr = (*ptr & ~mask) | (val & mask); +} + +/* Apply ia64 wierd-ass 22-bit immediate relocation */ +static int apply22(struct module *me, struct insn *insn, uint64_t val) +{ + if (val + (1 << 21) >= (1 << 22)) { + printk(KERN_ERR "%s: Apply22 value %li out of range\n", + me->name, (int64_t)val); + return 0; + } + + /* I am not making this up! */ + apply(insn, + mask(13, 19) | mask(22, 26) | mask(27, 35) | mask(36, 36), + (bits(val, 0, 6) << 13) + | (bits(val, 7, 15) << 27) + | (bits(val, 16, 20) << 22) + | (bits(val, 21, 21) << 36)); + return 1; +} + +/* Get gp-relative GOT entry for this value (gp points to start of GOT). + Returns -1 on failure. */ +static uint64_t get_got(struct module *me, uint64_t value) +{ + unsigned int i; + struct got_entry *got; + + if (value == 0) { + printk(KERN_ERR "%s: Zero value in GOT. Call rusty\n", + me->name); + return (uint64_t)-1; + } + + got = me->module_core + me->arch.got_offset; + for (i = 0; got[i].got; i++) + if (got[i].got == value) + return i * sizeof(struct got_entry); + + /* Not enough GOT entries? */ + if ((void *)&got[i] >= me->module_core + me->arch.core_plt_offset) + BUG(); + + got[i].got = value; + return i * sizeof(struct got_entry); +} + +/* Has slot 0 of first bundle been patched to this value? */ +static inline int plt_matches(const struct plt_entry *plt, uint64_t value) +{ + uint64_t insn = *(uint64_t *)plt; + + /* See apply22. */ + + /* Get slot 0 */ + insn >>= 5; + + /* Mask out non-intersting bits of insn. */ + insn &= (mask(13, 19) | mask(22, 26) | mask(27, 35) | mask(36, 36)); + + if (((bits(value, 0, 6) << 13) + | (bits(value, 7, 15) << 27) + | (bits(value, 16, 20) << 22) + | (bits(value, 21, 21) << 36)) + == insn) + return 1; + else + return 0; +} + +static inline int in_init(const struct module *me, void *addr) +{ + return addr >= me->module_init + && addr < me->module_init + me->init_size; +} + +static inline int in_core(const struct module *me, void *addr) +{ + return addr >= me->module_core + && addr < me->module_core + me->core_size; +} + +/* Get PC-relative PLT entry for this value. Returns 0 on failure. */ +static uint64_t get_plt(struct module *me, + const struct insn *insn, + uint64_t value) +{ + struct plt_entry *plt; + + /* Figure out if this is in the init or core sections */ + if (in_init(bundle(insn))) + plt = me->module_init + me->arch.init_plt_offset; + else if (in_core(bundle(insn))) + plt = me->module_core + me->arch.core_plt_offset; + else BUG(); + + /* We store the value in the GOT */ + value = get_got(me, value); + if (value == (uint64_t)-1) + return 0; + + /* Look for existing PLT entry. */ + while (plt->plt[0]) { + if (plt_matches(plt, value)) + return (uint64_t)plt - (uint64_t)bundle(insn); + plt++; + } + + /* Create new PLT entry, with slot 0 of the first instruction + patched to point to the gp-relative GOT entry */ + *plt = ia64_plt; + if (!apply22(me, (struct insn *)plt, value)) + return 0; +#if 0 + if (!plt_matches(plt, value)) + BUG(); +#endif + + return (uint64_t)plt - (uint64_t)bundle(insn); +} + +/* Get OPD entry for this function. Returns 0 on failure. */ +static uint64_t get_opd(struct module *me, uint64_t value) +{ + struct opd_entry *opd = me->module_core + me->arch.opd_offset; + + if (!value) { + printk(KERN_ERR "%s: zero OPD requested!\n", me->name); + return 0; + } + + /* Look for existing OPD entry. */ + while (opd->function) { + if (opd->function == value) + return (uint64_t)opd; + opd++; + } + + /* Create new one */ + opd->function = value; + opd->gp = (uint64_t)me->module_core + me->arch.got_offset; + return (uint64_t)opd; +} + +static inline int is_internal(const struct module *me, uint64_t value) +{ + return in_init(me, (void *)value) || in_core(me, (void *)value); +} + +int apply_relocate_add(Elf64_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf64_Rela *rela = (void *)sechdrs[relsec].sh_offset; + Elf64_Sym *sym; + void *location; + uint64_t value; + + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_offset + + rela[i].r_offset; + + /* This is the symbol it is referring to */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_offset + + ELF64_R_SYM(rela[i].r_info); + if (!sym->st_value) + return -ENOENT; + + /* `Everything is relative'. */ + value = sym->st_value + rela[i].r_addend; + + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_LTOFF22: + value = get_got(me, value); + if (value == (uint64_t)-1 + || !apply22(me, location, value)) + return -ENOEXEC; + break; + + case R_IA64_PCREL21B: + /* Beware: this may be a direct jump to a function + in this module. We have to create an OPD. */ + if (is_internal(me, (void *)value)) { + value = get_opd(me, value); + if (!value) + return -ENOEXEC; + } + value = get_plt(me, location, value); + if (value + (1 << 24) >= (1 << 25) || (value & 0xf)) { + printk(KERN_ERR + "%s: PCREL21B plt invalid value %lu\n", + me->name, value); + return -ENOEXEC; + } + + /* It's actually a bitshifted 25-bit value */ + value >>= 4; + apply(location, mask(13, 32) | mask(36, 36), + (bits(value, 0, 19) << 13) + | (bits(value, 20, 20) << 36)); + break; + + case R_IA64_DIR32LSB: + *((uint32_t *)location) = value; + break; + + case R_IA64_SEGREL64LSB: + /* My definition of "segment" is a little fuzzy here, + but quoth David Mosberger-Tang: + + As long as it's used consistently, it doesn't + matter much. All unwind offsets are 64-bit offsets + anyhow, so you could just use a segment base of + zero (the reason SEGREL relocs are used is to make + it possible to have the unwind tables be read-only + in shared libraries, but since you need to do + relocation anyhow, that's a moot issue). */ + /* Fall thru */ + case R_IA64_DIR64LSB: + *((uint64_t *)location) = value; + break; + + case R_IA64_FPTR64LSB: + /* Create an OPD for internal functions only. */ + if (is_internal(me, (void *)value)) + value = get_opd(me, value); + *((uint64_t *)location) = value; + break; + + case R_IA64_GPREL22: + value -= (uint64_t)me->module_core+me->arch.got_offset; + if (!apply22(me, location, value)) + return -ENOEXEC; + break; + + default: + printk(KERN_ERR "%s: Unknown ADD relocation: %lu\n", + me->name, ELF64_R_TYPE(rela[i].r_info)); + return -ENOEXEC; + } + } + + return 0; +} + +/* Free memory returned from module_core_alloc/module_init_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ +} + +/* The function pointers should really be OPD pointers! */ +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + me->init = (void *)get_opd(me, (uint64_t)me->init); +#ifdef CONFIG_MODULE_UNLOAD + if (me->cleanup) + me->cleanup = (void *)get_opd(me, (uint64_t)me->cleanup); + if (me->destroy) + me->destroy = (void *)get_opd(me, (uint64_t)me->destroy); +#endif + return 0; +} diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .13409-linux-2.5.43/include/asm-ia64/module.h .13409-linux-2.5.43.updated/include/asm-ia64/module.h --- .13409-linux-2.5.43/include/asm-ia64/module.h 2002-08-28 09:29:50.000000000 +1000 +++ .13409-linux-2.5.43.updated/include/asm-ia64/module.h 2002-10-17 17:36:32.000000000 +1000 @@ -1,111 +1,22 @@ #ifndef _ASM_IA64_MODULE_H #define _ASM_IA64_MODULE_H -/* - * This file contains the ia64 architecture specific module code. - * - * Copyright (C) 2000 Intel Corporation. - * Copyright (C) 2000 Mike Stephens - */ -#include -#include -#include - -#define module_map(x) vmalloc(x) -#define module_unmap(x) ia64_module_unmap(x) -#define module_arch_init(x) ia64_module_init(x) - -/* - * This must match in size and layout the data created by - * modutils/obj/obj-ia64.c - */ -struct archdata { - const char *unw_table; - const char *segment_base; - const char *unw_start; - const char *unw_end; - const char *gp; -}; - -static inline void -arch_init_modules (struct module *kmod) -{ - static struct archdata archdata; - register char *kernel_gp asm ("gp"); - - archdata.gp = kernel_gp; - kmod->archdata_start = (const char *) &archdata; - kmod->archdata_end = (const char *) (&archdata + 1); -} - -/* - * functions to add/remove a modules unwind info when - * it is loaded or unloaded. - */ -static inline int -ia64_module_init (struct module *mod) +struct mod_arch_specific { - struct archdata *archdata; - - if (!mod_member_present(mod, archdata_start) || !mod->archdata_start) - return 0; - archdata = (struct archdata *)(mod->archdata_start); - - if (archdata->unw_start == 0) - return 0; - - /* - * Make sure the unwind pointers are sane. - */ - - if (archdata->unw_table) { - printk(KERN_ERR "module_arch_init: archdata->unw_table must be zero.\n"); - return 1; - } - if (!mod_bound(archdata->gp, 0, mod)) { - printk(KERN_ERR "module_arch_init: archdata->gp out of bounds.\n"); - return 1; - } - if (!mod_bound(archdata->unw_start, 0, mod)) { - printk(KERN_ERR "module_arch_init: archdata->unw_start out of bounds.\n"); - return 1; - } - if (!mod_bound(archdata->unw_end, 0, mod)) { - printk(KERN_ERR "module_arch_init: archdata->unw_end out of bounds.\n"); - return 1; - } - if (!mod_bound(archdata->segment_base, 0, mod)) { - printk(KERN_ERR "module_arch_init: archdata->segment_base out of bounds.\n"); - return 1; - } - - /* - * Pointers are reasonable, add the module unwind table - */ - archdata->unw_table = unw_add_unwind_table(mod->name, - (unsigned long) archdata->segment_base, - (unsigned long) archdata->gp, - archdata->unw_start, archdata->unw_end); - return 0; -} + /* End of core == start of GOT. */ + unsigned long got_offset; -static inline void -ia64_module_unmap (void * addr) -{ - struct module *mod = (struct module *) addr; - struct archdata *archdata; + /* End of GOT == start of core PLT. */ + unsigned long core_plt_offset; - /* - * Before freeing the module memory remove the unwind table entry - */ - if (mod_member_present(mod, archdata_start) && mod->archdata_start) { - archdata = (struct archdata *)(mod->archdata_start); + /* End of core PLT == start of OPD. */ + unsigned long opd_offset; - if (archdata->unw_table != NULL) - unw_remove_unwind_table((void *) archdata->unw_table); - } + unsigned long init_plt_offset; +}; - vfree(addr); -} +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define Elf_Ehdr Elf64_Ehdr #endif /* _ASM_IA64_MODULE_H */