Date: Sun, 19 Nov 2000 20:37:14 +0100 From: Christoph Hellwig To: linux-privs-discuss@sourceforge.net This patch adds supports for the extended capability API and filesystem capabilities to Linux 2.4.0. It is not just a forward-port, but changes a lot of the internal organisation. The changes are mainly the following: kernel/capability.c: separate the code more cleanly into small functions. fs/capability.c: make the vfs_ functions only operate on inodes, everything else in the VFS does so and it is easy to get struct inode from most other VFS objects. Add the ext-attr interface to this file. Try it! Christoph -- Always remember that you are unique. Just like everyone else. diff -uNr linux-2.4.0-test11-pre7/Documentation/Configure.help linux/Documentation/Configure.help --- linux-2.4.0-test11-pre7/Documentation/Configure.help Sat Nov 18 13:18:18 2000 +++ linux/Documentation/Configure.help Sat Nov 18 13:38:05 2000 @@ -9844,6 +9844,22 @@ If you don't know what Access Control Lists are, say N. +Filesystem Capabilities +CONFIG_FS_POSIX_CAP + Filesystem capabilities allow to restrict the capabilities binaries may + obtain and pass on to their children. This allows to configure a more + secure system where binaries only get the privileges they actually need + (principle of least privilege). Filesystem capabilities are stored + as extended attributes. + + If you plan to use Filesystem Capabilities, say Y here. + + At mount time, specify the "cap" mount option to enable Capabilities on + that filesystem. This implies the "attr" mount option. + Currently, only the ext2 filesystem is supported. + + If you have no idea what this is all about, say N. + Quota support CONFIG_QUOTA If you say Y here, you will be able to set per user limits for disk diff -uNr linux-2.4.0-test11-pre7/fs/Config.in linux/fs/Config.in --- linux-2.4.0-test11-pre7/fs/Config.in Sat Nov 18 13:18:19 2000 +++ linux/fs/Config.in Sat Nov 18 13:38:05 2000 @@ -9,6 +9,7 @@ if [ "$CONFIG_FS_EXT_ATTR" = "y" ]; then bool ' Extended user attributes' CONFIG_FS_USER_EXT_ATTR bool ' Access Control Lists' CONFIG_FS_POSIX_ACL + bool ' Filesystem Capabilities' CONFIG_FS_POSIX_CAP fi fi diff -uNr linux-2.4.0-test11-pre7/fs/Makefile linux/fs/Makefile --- linux-2.4.0-test11-pre7/fs/Makefile Sat Nov 18 13:37:49 2000 +++ linux/fs/Makefile Sat Nov 18 13:38:05 2000 @@ -15,7 +15,7 @@ super.o block_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o fifo.o locks.o \ dcache.o inode.o attr.o bad_inode.o file.o iobuf.o dnotify.o \ - filesystems.o ext_attr.o + filesystems.o ext_attr.o capability.o ifeq ($(CONFIG_QUOTA),y) obj-y += dquot.o diff -uNr linux-2.4.0-test11-pre7/fs/capability.c linux/fs/capability.c --- linux-2.4.0-test11-pre7/fs/capability.c Thu Jan 1 01:00:00 1970 +++ linux/fs/capability.c Sat Nov 18 13:38:53 2000 @@ -0,0 +1,257 @@ +/* + * capability.c - vfs capability support and extended attribute representation. + * + * Copyright (c) 2000, Andrew G. Morgan (morgan@transmeta.com) + * Copyright (c) 2000, Andreas Gruenbacher (a.gruenbacher@computer.org) + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +#ifdef CONFIG_FS_POSIX_CAP +static int cap_ext_attr_set_inode(struct inode *, cap_kernel_data_t); +static int cap_ext_attr_get_inode(struct inode *, cap_kernel_data_t); +#endif + + +int vfs_getcap(struct inode * inode, cap_kernel_data_t caps) +{ + +#ifdef CONFIG_FS_POSIX_CAP + if (IS_POSIX_CAP(inode)) + return cap_ext_attr_get_inode(inode, caps); +#endif + + cap_clear(caps->inheritable); + cap_clear(caps->permitted); + cap_clear(caps->effective); + + return 0; +} + +int vfs_setcap(struct inode * inode, cap_kernel_data_t caps) +{ + if (!capable(CAP_SETFCAP)) + return -EPERM; + +#ifdef CONFIG_FS_POSIX_CAP + if (IS_POSIX_CAP(inode)) + return cap_ext_attr_set_inode(inode, caps); +#endif + + return -ENOSYS; +} + + + + +#ifdef CONFIG_FS_POSIX_CAP + +/* + * Converts from extended attribute representation to a capability + */ +static int +cap_from_ext_attr(cap_kernel_data_t cap, const void * value, size_t size) +{ + const struct __cap_ext_attr_s * ext_cap = value; + __u32 c_bitflags; + __u32 c_u32; + + if (value == NULL) + return -EINVAL; + + ext_cap = (const struct __cap_ext_attr_s *) value; + + if (size != CAP_EXT_ATTR_SIZE) + return -EINVAL; + + if (ext_cap->c_version != cpu_to_le32(CAP_EXT_ATTR_VERSION)) + return -EINVAL; + + c_bitflags = le32_to_cpu(ext_cap->c_bitflags); + if (c_bitflags & CAP_EXT_ATTR_FLAG_EFFECTIVE) + cap_set_full(cap->effective); + else + cap_clear(cap->effective); + + + c_u32 = le32_to_cpu(ext_cap->c_inheritable); + cap->inheritable = to_cap_t(c_u32); + + c_u32 = le32_to_cpu(ext_cap->c_permitted); + cap->permitted = to_cap_t(c_u32); + + return 0; +} + +/* + * Converts to external attribute representation from kernel internal + */ +static int +cap_to_ext_attr(const cap_kernel_data_t cap, void * value, size_t size) +{ + cap_ext_attr_t ext_cap; + __u32 c_bitflags; + __u32 c_u32; + + if (!value) + return -EINVAL; + + if (size != CAP_EXT_ATTR_SIZE) + return -EINVAL; + + ext_cap.c_version = cpu_to_le32(CAP_EXT_ATTR_VERSION); + + c_bitflags = 0; + if (!cap_isclear(cap->effective)) + c_bitflags |= CAP_EXT_ATTR_FLAG_EFFECTIVE; + + ext_cap.c_bitflags = cpu_to_le32(c_bitflags); + + c_u32 = (__u32) cap_t(cap->inheritable); + ext_cap.c_inheritable = cpu_to_le32(c_u32); + + c_u32 = (__u32) cap_t(cap->permitted); + ext_cap.c_permitted = cpu_to_le32(c_u32); + + /* copy ext_cap into value */ + memcpy(value, &ext_cap, size); + + return 0; +} + +/* + * Functions for raw access to setting/getting the extended attributes + * associated with filesystem capabilities. + */ +static int +cap_ext_attr_set(struct inode * inode, const char * name, char * value, size_t size, int flag) +{ + struct __kernel_cap_data_struct cap_temp; + int error; + + if (IS_RDONLY(inode)) + return -EROFS; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return -EPERM; + + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (!S_ISREG(inode->i_mode) || IS_NOEXEC(inode)) + return -EINVAL; + + /* + * Here, we have to establish that the current process is + * sufficiently capable to write the capabilities that it + * is writing to the extended attribute. Basically, the + * current->permitted set must be a superset of the + * inheritable and permitted sets in this attribute + * definition. + */ + error = cap_from_ext_attr(&cap_temp, value, size); + if (error) + return error; + + if (!cap_issubset(cap_temp.inheritable, current->cap_permitted)) + return -EPERM; + + if (!cap_issubset(cap_temp.permitted, current->cap_permitted)) + return -EPERM; + + /* + * This extended attribute is acceptable, so set it + */ + return ext_attr_set(inode, CAP_EXT_ATTR_NAME, value, size, 0); +} + +/* + * This handler structure is used when the user attempts to set + * capabilities on a file via the general external attribute + * functions (ie. in the case of setting an attribute, we perform + * some sanity and security checks). + */ + +static struct ext_attr_handler cap_handler = { + name: CAP_EXT_ATTR_NAME, + get_inode: ext_attr_get, + set_inode: cap_ext_attr_set, +}; + + +/* + * This function takes a cap_kernel_data_t and writes its contents + * to the "$cap" external attribute associated with the given inode + */ +static int +cap_ext_attr_set_inode(struct inode * inode, cap_kernel_data_t cap) +{ + char value[CAP_EXT_ATTR_SIZE]; + int retval; + + retval = cap_to_ext_attr(cap, value, CAP_EXT_ATTR_SIZE); + if (retval) + return retval; + + retval = cap_ext_attr_set(inode, CAP_EXT_ATTR_NAME, value, CAP_EXT_ATTR_SIZE, 0); + + return retval; +} + +/* + * This function uses the external attribute function ext_attr_get() + * to get the contents of the "$cap" attribute for this inode and + * converts it into a cap_kernel_data_t type for use within the kernel. + */ +static int +cap_ext_attr_get_inode(struct inode * inode, cap_kernel_data_t cap) +{ + char value[CAP_EXT_ATTR_SIZE], *value_p = value; + int retval; + + retval = ext_attr_get(inode, CAP_EXT_ATTR_NAME, + value_p, CAP_EXT_ATTR_SIZE, 0); + if (retval == -ENOATTR || retval == -ENOTSUP) { + cap_clear(cap->effective); + cap_clear(cap->inheritable); + cap_clear(cap->permitted); + retval = 0; + } else if (retval == CAP_EXT_ATTR_SIZE) { + retval = cap_from_ext_attr(cap, value, CAP_EXT_ATTR_SIZE); + } else if (retval >= 0) { + printk(__FUNCTION__ " got bad length back (%d)\n", retval); + retval = -EINVAL; + } + + return retval; +} + + +static int __init +init_posix_cap(void) +{ + register_ext_attr_handler(&cap_handler); + return 0; +} + +static void __exit +exit_posix_cap(void) +{ + unregister_ext_attr_handler(&cap_handler); +} + +module_init(init_posix_cap); +module_exit(exit_posix_cap); +#endif /* CONFIG_FS_POSIX_CAP */ diff -uNr linux-2.4.0-test11-pre7/fs/exec.c linux/fs/exec.c --- linux-2.4.0-test11-pre7/fs/exec.c Sat Nov 18 13:10:21 2000 +++ linux/fs/exec.c Sat Nov 18 13:38:05 2000 @@ -628,10 +628,8 @@ id_change = 1; } - /* We don't have VFS support for capabilities yet */ - cap_clear(bprm->cap_inheritable); - cap_clear(bprm->cap_permitted); - cap_clear(bprm->cap_effective); + if (vfs_getcap(bprm->file->f_dentry->d_inode, &bprm->cap)) + return -EPERM; /* To support inheritance of root-permissions and suid-root * executables under compatibility mode, we raise all three @@ -643,11 +641,11 @@ if (!issecure(SECURE_NOROOT)) { if (bprm->e_uid == 0 || current->uid == 0) { - cap_set_full(bprm->cap_inheritable); - cap_set_full(bprm->cap_permitted); + cap_set_full(bprm->cap.inheritable); + cap_set_full(bprm->cap.permitted); } if (bprm->e_uid == 0) - cap_set_full(bprm->cap_effective); + cap_set_full(bprm->cap.effective); } /* Only if pP' is _not_ a subset of pP, do we consider there @@ -658,8 +656,8 @@ { kernel_cap_t permitted, working; - permitted = cap_intersect(bprm->cap_permitted, cap_bset); - working = cap_intersect(bprm->cap_inheritable, + permitted = cap_intersect(bprm->cap.permitted, cap_bset); + working = cap_intersect(bprm->cap.inheritable, current->cap_inheritable); working = cap_combine(permitted, working); if (!cap_issubset(working, current->cap_permitted)) { @@ -705,8 +703,8 @@ { kernel_cap_t new_permitted, working; - new_permitted = cap_intersect(bprm->cap_permitted, cap_bset); - working = cap_intersect(bprm->cap_inheritable, + new_permitted = cap_intersect(bprm->cap.permitted, cap_bset); + working = cap_intersect(bprm->cap.inheritable, current->cap_inheritable); new_permitted = cap_combine(new_permitted, working); @@ -716,7 +714,7 @@ if (current->pid != 1) { current->cap_permitted = new_permitted; current->cap_effective = - cap_intersect(new_permitted, bprm->cap_effective); + cap_intersect(new_permitted, bprm->cap.effective); } /* AUD: Audit candidate if current->cap_effective is set */ diff -uNr linux-2.4.0-test11-pre7/fs/ext2/super.c linux/fs/ext2/super.c --- linux-2.4.0-test11-pre7/fs/ext2/super.c Sat Nov 18 13:18:19 2000 +++ linux/fs/ext2/super.c Sat Nov 18 13:38:05 2000 @@ -173,6 +173,11 @@ *ext_attr_flags &= ~EXT_ATTR_FLAG_POSIX_ACL; #endif +#ifdef CONFIG_FS_POSIX_CAP + else if (!strcmp(value, "nocap")) + *ext_attr_flags &= + ~EXT_ATTR_FLAG_POSIX_CAP; +#endif else { printk("EXT2-fs: Invalid attr option: " "%s\n", value); @@ -428,6 +433,9 @@ #ifdef CONFIG_FS_POSIX_ACL sb->s_ext_attr_flags |= EXT_ATTR_FLAG_POSIX_ACL; #endif +#ifdef CONFIG_FS_POSIX_CAP + sb->s_ext_attr_flags |= EXT_ATTR_FLAG_POSIX_CAP; +#endif #endif if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, &sb->s_ext_attr_flags, &sb->u.ext2_sb.s_mount_opt)) { @@ -707,6 +715,9 @@ ext_attr_flags |= EXT_ATTR_FLAG_EXT_ATTR; #ifdef CONFIG_FS_USER_EXT_ATTR ext_attr_flags |= EXT_ATTR_FLAG_EXT_ATTR_USER; +#endif +#ifdef CONFIG_FS_POSIX_CAP + ext_attr_flags |= EXT_ATTR_FLAG_POSIX_CAP; #endif #ifdef CONFIG_FS_POSIX_ACL ext_attr_flags |= EXT_ATTR_FLAG_POSIX_ACL; diff -uNr linux-2.4.0-test11-pre7/fs/stat.c linux/fs/stat.c --- linux-2.4.0-test11-pre7/fs/stat.c Fri Nov 17 19:39:48 2000 +++ linux/fs/stat.c Sat Nov 18 13:38:05 2000 @@ -12,18 +12,6 @@ #include -/* - * Revalidate the inode. This is required for proper NFS attribute caching. - */ -static __inline__ int -do_revalidate(struct dentry *dentry) -{ - struct inode * inode = dentry->d_inode; - if (inode->i_op && inode->i_op->revalidate) - return inode->i_op->revalidate(dentry); - return 0; -} - #if !defined(__alpha__) && !defined(__sparc__) && !defined(__ia64__) && !defined(__s390__) diff -uNr linux-2.4.0-test11-pre7/include/linux/binfmts.h linux/include/linux/binfmts.h --- linux-2.4.0-test11-pre7/include/linux/binfmts.h Fri Nov 17 19:39:48 2000 +++ linux/include/linux/binfmts.h Sat Nov 18 13:38:05 2000 @@ -26,7 +26,7 @@ int sh_bang; struct file * file; int e_uid, e_gid; - kernel_cap_t cap_inheritable, cap_permitted, cap_effective; + struct __kernel_cap_data_struct cap; int argc, envc; char * filename; /* Name of binary */ unsigned long loader, exec; diff -uNr linux-2.4.0-test11-pre7/include/linux/cap_ext_attr.h linux/include/linux/cap_ext_attr.h --- linux-2.4.0-test11-pre7/include/linux/cap_ext_attr.h Thu Jan 1 01:00:00 1970 +++ linux/include/linux/cap_ext_attr.h Sat Nov 18 13:38:05 2000 @@ -0,0 +1,30 @@ +/* + File: linux/cap_ext_attr.h + + Capability sets -- extended attribute representation. + + (C) 2000 Andreas Gruenbacher, + Copyright (c) 2000, Andrew G. Morgan +*/ + +#ifndef LINUX__CAP_EXT_ATTR_H +#define LINUX__CAP_EXT_ATTR_H + +struct __kernel_cap_data_struct; + +/* stored little endian on the filesystem - _and_ in this type */ +typedef struct __cap_ext_attr_s { + __u32 c_version; + __u32 c_bitflags; + __u32 c_inheritable; + __u32 c_permitted; +} cap_ext_attr_t; + +#define CAP_EXT_ATTR_NAME "$cap" +#define CAP_EXT_ATTR_VERSION 0x00000002 +#define CAP_EXT_ATTR_SIZE sizeof(cap_ext_attr_t) + +/* bit flags - the effective file capability is a single bit */ +#define CAP_EXT_ATTR_FLAG_EFFECTIVE 0x00000001 + +#endif /* LINUX__CAP_EXT_ATTR_H */ diff -uNr linux-2.4.0-test11-pre7/include/linux/capability.h linux/include/linux/capability.h --- linux-2.4.0-test11-pre7/include/linux/capability.h Fri Nov 17 19:39:48 2000 +++ linux/include/linux/capability.h Sat Nov 18 13:38:05 2000 @@ -27,13 +27,42 @@ library since the draft standard requires the use of malloc/free etc.. */ -#define _LINUX_CAPABILITY_VERSION 0x19980330 +#define _LINUX_CAPABILITY_VERSION_0 0x19980330 +#define _LINUX_CAPABILITY_VERSION 0x20000603 -typedef struct __user_cap_header_struct { + +/* Legacy support */ + +typedef struct __user_cap_header_0_struct { __u32 version; int pid; +} *cap_user_header_0_t; + +typedef struct __user_cap_data_0_struct { + __u32 effective; + __u32 permitted; + __u32 inheritable; +} *cap_user_data_0_t; + + +/* Current definition */ + +typedef struct __user_cap_header_struct { + __u32 version; + int type; + union { + int pid; + int fd; + const char *file; + } u; } *cap_user_header_t; - + +#define CAP_USERHEADER_PID 0x00000001 +#define CAP_USERHEADER_FILEDES 0x00000002 +#define CAP_USERHEADER_FILE 0x00000003 +#define CAP_USERHEADER_FSMASK 0x00000004 +#define CAP_USERHEADER_PROCMASK 0x00000005 + typedef struct __user_cap_data_struct { __u32 effective; __u32 permitted; @@ -41,25 +70,14 @@ } *cap_user_data_t; #ifdef __KERNEL__ - -/* #define STRICT_CAP_T_TYPECHECKS */ - -#ifdef STRICT_CAP_T_TYPECHECKS - -typedef struct kernel_cap_struct { - __u32 cap; -} kernel_cap_t; - -#else - typedef __u32 kernel_cap_t; -#endif - -#define _USER_CAP_HEADER_SIZE (2*sizeof(__u32)) -#define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) - -#endif +typedef struct __kernel_cap_data_struct { + kernel_cap_t effective; + kernel_cap_t permitted; + kernel_cap_t inheritable; +} *cap_kernel_data_t; +#endif /* __KERNEL__ */ /** @@ -277,6 +295,11 @@ #define CAP_LEASE 28 +/* Transfer any capability in your permitted set to any file you own, + remove any capability in your permitted set from any file you own */ + +#define CAP_SETFCAP 8 + #ifdef __KERNEL__ /* * Bounding set @@ -286,28 +309,19 @@ /* * Internal kernel functions only */ - -#ifdef STRICT_CAP_T_TYPECHECKS - -#define to_cap_t(x) { x } -#define cap_t(x) (x).cap - -#else - -#define to_cap_t(x) (x) -#define cap_t(x) (x) - -#endif -#define CAP_EMPTY_SET to_cap_t(0) -#define CAP_FULL_SET to_cap_t(~0) -#define CAP_INIT_EFF_SET to_cap_t(~0 & ~CAP_TO_MASK(CAP_SETPCAP)) -#define CAP_INIT_INH_SET to_cap_t(0) +#define to_cap_t(x) (x) +#define cap_t(x) (x) -#define CAP_TO_MASK(x) (1 << (x)) -#define cap_raise(c, flag) (cap_t(c) |= CAP_TO_MASK(flag)) -#define cap_lower(c, flag) (cap_t(c) &= ~CAP_TO_MASK(flag)) -#define cap_raised(c, flag) (cap_t(c) & CAP_TO_MASK(flag)) +#define CAP_EMPTY_SET to_cap_t(0) +#define CAP_FULL_SET to_cap_t(~0) +#define CAP_INIT_EFF_SET to_cap_t(~0) +#define CAP_INIT_INH_SET to_cap_t(0) + +#define CAP_TO_MASK(x) (1 << (x)) +#define cap_raise(c, flag) (cap_t(c) |= CAP_TO_MASK(flag)) +#define cap_lower(c, flag) (cap_t(c) &= ~CAP_TO_MASK(flag)) +#define cap_raised(c, flag) (cap_t(c) & CAP_TO_MASK(flag)) static inline kernel_cap_t cap_combine(kernel_cap_t a, kernel_cap_t b) { diff -uNr linux-2.4.0-test11-pre7/include/linux/fs.h linux/include/linux/fs.h --- linux-2.4.0-test11-pre7/include/linux/fs.h Sat Nov 18 13:18:19 2000 +++ linux/include/linux/fs.h Sat Nov 18 13:38:05 2000 @@ -26,7 +26,7 @@ #include struct poll_table_struct; - +struct __kernel_cap_data_struct; /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change @@ -113,6 +113,7 @@ #define EXT_ATTR_FLAG_EXT_ATTR 1 /* Extended attributes */ #define EXT_ATTR_FLAG_EXT_ATTR_USER 2 /* Extended user attributes */ #define EXT_ATTR_FLAG_POSIX_ACL 4 /* Access Control Lists */ +#define EXT_ATTR_FLAG_POSIX_CAP 8 /* Filesystem Capabilities */ #define __IS_EXT_ATTR_FLG(inode,flg) \ ((inode)->i_sb && \ @@ -121,6 +122,7 @@ #define IS_EXT_ATTR(inode) __IS_EXT_ATTR_FLG(inode, EXT_ATTR) #define IS_EXT_ATTR_USER(inode) __IS_EXT_ATTR_FLG(inode, EXT_ATTR_USER) #define IS_POSIX_ACL(inode) __IS_EXT_ATTR_FLG(inode, POSIX_ACL) +#define IS_POSIX_CAP(inode) __IS_EXT_ATTR_FLG(inode, POSIX_CAP) /* * Flags that can be altered by MS_REMOUNT @@ -980,6 +982,10 @@ extern void make_bad_inode(struct inode *); extern int is_bad_inode(struct inode *); +/* Filesystem capabilities -- fs/capability.c */ +extern int vfs_getcap(struct inode *, struct __kernel_cap_data_struct *); +extern int vfs_setcap(struct inode *, struct __kernel_cap_data_struct *); + extern struct file_operations read_fifo_fops; extern struct file_operations write_fifo_fops; extern struct file_operations rdwr_fifo_fops; @@ -1285,6 +1291,18 @@ extern int inode_change_ok(struct inode *, struct iattr *); extern int inode_setattr(struct inode *, struct iattr *); + +/* + * Revalidate the inode. This is required for proper NFS attribute caching. + */ +static __inline__ int +do_revalidate(struct dentry *dentry) +{ + struct inode * inode = dentry->d_inode; + if (inode->i_op && inode->i_op->revalidate) + return inode->i_op->revalidate(dentry); + return 0; +} /* * Common dentry functions for inclusion in the VFS diff -uNr linux-2.4.0-test11-pre7/include/linux/sched.h linux/include/linux/sched.h --- linux-2.4.0-test11-pre7/include/linux/sched.h Sat Nov 18 13:10:23 2000 +++ linux/include/linux/sched.h Sat Nov 18 13:38:05 2000 @@ -678,16 +678,10 @@ static inline int capable(int cap) { -#if 1 /* ok now */ if (cap_raised(current->cap_effective, cap)) -#else - if (cap_is_fs_cap(cap) ? current->fsuid == 0 : current->euid == 0) -#endif - { - current->flags |= PF_SUPERPRIV; return 1; - } - return 0; + else + return 0; } /* diff -uNr linux-2.4.0-test11-pre7/kernel/capability.c linux/kernel/capability.c --- linux-2.4.0-test11-pre7/kernel/capability.c Fri Nov 17 19:39:48 2000 +++ linux/kernel/capability.c Sat Nov 18 13:39:42 2000 @@ -3,83 +3,268 @@ * * Copyright (C) 1997 Andrew Main * Integrated into 2.1.97+, Andrew G. Morgan + * Added filesystem support for capabilities, Andrew G. Morgan + * Reorganized and ported filesystem support to Linux 2.4, Christoph Hellwig */ +#include +#include #include #include -kernel_cap_t cap_bset = CAP_INIT_EFF_SET; - /* Note: never hold tasklist_lock while spinning for this one */ spinlock_t task_capability_lock = SPIN_LOCK_UNLOCKED; -/* - * For sys_getproccap() and sys_setproccap(), any of the three - * capability set pointers may be NULL -- indicating that that set is - * uninteresting and/or not to be changed. - */ +kernel_cap_t cap_bset = CAP_INIT_EFF_SET; + +static int caught_old_library = 0; -asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) + +static void +warn_about_old_library(void) { - int error, pid; - __u32 version; - struct task_struct *target; - struct __user_cap_data_struct data; + if (caught_old_library) + return; - if (get_user(version, &header->version)) - return -EFAULT; - - error = -EINVAL; - if (version != _LINUX_CAPABILITY_VERSION) { - version = _LINUX_CAPABILITY_VERSION; - if (put_user(version, &header->version)) - error = -EFAULT; - return error; - } + printk("note: you should upgrade your libcap library\n"); + ++caught_old_library; +} + +static int +cap_to_user(const cap_kernel_data_t data, cap_user_data_t caps) +{ + if (copy_to_user(&caps->effective, &data->effective, + sizeof(data->effective))) + return -EFAULT; + if (copy_to_user(&caps->inheritable, &data->inheritable, + sizeof(data->inheritable))) + return -EFAULT; + if (copy_to_user(&caps->permitted, &data->permitted, + sizeof(data->permitted))) + return -EFAULT; - if (get_user(pid, &header->pid)) - return -EFAULT; + return 0; +} - if (pid < 0) - return -EINVAL; +static int +cap_to_user_old(const cap_kernel_data_t data, cap_user_data_0_t caps) +{ + /* XXX - when we are done, new capabilities will be 64 + bits long and we won't support this flavor of call + anymore */ + + if (copy_to_user(&caps->effective, &data->effective, + sizeof(data->effective))) + return -EFAULT; + if (copy_to_user(&caps->inheritable, &data->inheritable, + sizeof(data->inheritable))) + return -EFAULT; + if (copy_to_user(&caps->permitted, &data->permitted, + sizeof(data->permitted))) + return -EFAULT; - error = 0; + return 0; +} - spin_lock(&task_capability_lock); - - if (pid && pid != current->pid) { - read_lock(&tasklist_lock); - target = find_task_by_pid(pid); /* identify target of query */ - if (!target) - error = -ESRCH; - } else { - target = current; - } +static int +cap_from_user(cap_kernel_data_t cap, cap_user_data_t user_cap) +{ + if (copy_from_user(&cap->effective, &user_cap->effective, + sizeof(cap->effective))) + return -EFAULT; + if (copy_from_user(&cap->inheritable, &user_cap->inheritable, + sizeof(cap->inheritable))) + return -EFAULT; + if (copy_from_user(&cap->permitted, &user_cap->permitted, + sizeof(cap->permitted))) + return -EFAULT; + + return 0; +} - if (!error) { - data.permitted = cap_t(target->cap_permitted); - data.inheritable = cap_t(target->cap_inheritable); - data.effective = cap_t(target->cap_effective); - } +static int +cap_from_user_old(cap_kernel_data_t cap, const cap_user_data_0_t user_cap) +{ + if (copy_from_user(&cap->effective, &user_cap->effective, + sizeof(cap->effective))) + return -EFAULT; + if (copy_from_user(&cap->inheritable, &user_cap->inheritable, + sizeof(cap->inheritable))) + return -EFAULT; + if (copy_from_user(&cap->permitted, &user_cap->permitted, + sizeof(cap->permitted))) + return -EFAULT; + + return 0; +} - if (target != current) - read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); - - if (!error) { - if (copy_to_user(dataptr, &data, sizeof data)) - return -EFAULT; - } - return error; +static int +__capget_pid(int pid, cap_kernel_data_t caps) +{ + struct task_struct * target; + int error; + + if (pid < 0) + return -EINVAL; + + error = 0; + + spin_lock(&task_capability_lock); + + if (pid && pid != current->pid) { + read_lock(&tasklist_lock); + target = find_task_by_pid(pid); /* identify target of query */ + if (!target) + error = -ESRCH; + } else + target = current; + + if (!error) { + caps->permitted = cap_t(target->cap_permitted); + caps->inheritable = cap_t(target->cap_inheritable); + caps->effective = cap_t(target->cap_effective); + } + + if (target != current) + read_unlock(&tasklist_lock); + + spin_unlock(&task_capability_lock); + + return error; } -/* set capabilities for all processes in a given process group */ +static int +capget_old(cap_user_header_0_t header, cap_user_data_0_t data) +{ + struct __kernel_cap_data_struct caps; + int error, pid; + + warn_about_old_library(); + + if (get_user(pid, &header->pid)) + return -EFAULT; + + error = __capget_pid(pid, &caps); + if (error) + return error; -static void cap_set_pg(int pgrp, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) + return cap_to_user_old(&caps, data); +} + + +static int +capget_pid (cap_user_header_t header, cap_user_data_t data) +{ + struct __kernel_cap_data_struct caps; + int error, pid; + + if (get_user(pid, &header->u.pid)) + return -EFAULT; + + error = __capget_pid(pid, &caps); + if (error) + return error; + + return cap_to_user(&caps, data); +} + +static int +capget_file(cap_user_header_t header, cap_user_data_t data) +{ + struct __kernel_cap_data_struct caps; + struct nameidata nd; + const char * filename; + int error; + + if (get_user(filename, &header->u.file)) + return -EFAULT; + + error = user_path_walk_link(filename, &nd); + if (error) + return error; + + error = do_revalidate(nd.dentry); + if (!error) + error = vfs_getcap(nd.dentry->d_inode, &caps); + + path_release(&nd); + if (error) + return error; + + return cap_to_user(&caps, data); +} + +static int +capget_fd(cap_user_header_t header, cap_user_data_t data) +{ + struct __kernel_cap_data_struct caps; + struct file * file; + int error, fd; + + if (get_user(fd, &header->u.fd)) + return -EFAULT; + + error = -EBADF; + + file = fget(fd); + if (file) { + struct dentry * dentry = file->f_dentry; + error = do_revalidate(dentry); + if (!error) + error = vfs_getcap(dentry->d_inode, &caps); + fput(file); + } + + if (error) + return error; + + return cap_to_user(&caps, data); +} + +asmlinkage +long sys_capget(cap_user_header_t header, cap_user_data_t data) +{ + int type; + __u32 version; + + if (get_user(version, &header->version)) + return -EFAULT; + + if (version == _LINUX_CAPABILITY_VERSION) { + if (get_user(type, &header->type)) + return -EFAULT; + + switch (type) { + case CAP_USERHEADER_PID: + return capget_pid(header, data); + case CAP_USERHEADER_FILEDES: + return capget_fd(header, data); + case CAP_USERHEADER_FILE: + return capget_file(header, data); + case CAP_USERHEADER_FSMASK: + case CAP_USERHEADER_PROCMASK: + default: + /* we don't recognize the request */ + printk("capget: unrecognized request 0x%x\n", type); + return -EINVAL; + } + } else if (version == _LINUX_CAPABILITY_VERSION_0) { + return capget_old((cap_user_header_0_t) header, + (cap_user_data_0_t) data); + } else { + version = _LINUX_CAPABILITY_VERSION; + if (put_user(version, &header->version)) + return -EFAULT; + return -EINVAL; + } +} + +/* + * set capabilities for all processes in a given process group + */ +static void +capset_pg(int pgrp, cap_kernel_data_t caps) { struct task_struct *target; @@ -88,18 +273,18 @@ for_each_task(target) { if (target->pgrp != pgrp) continue; - target->cap_effective = *effective; - target->cap_inheritable = *inheritable; - target->cap_permitted = *permitted; + target->cap_effective = caps->effective; + target->cap_inheritable = caps->inheritable; + target->cap_permitted = caps->permitted; } read_unlock(&tasklist_lock); } -/* set capabilities for all processes other than 1 and self */ - -static void cap_set_all(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +/* + set capabilities for all processes other than 1 and self + */ +static void +capset_all(cap_kernel_data_t caps) { struct task_struct *target; @@ -109,108 +294,207 @@ for_each_task(target) { if (target == current || target->pid == 1) continue; - target->cap_effective = *effective; - target->cap_inheritable = *inheritable; - target->cap_permitted = *permitted; + target->cap_effective = caps->effective; + target->cap_inheritable = caps->inheritable; + target->cap_permitted = caps->permitted; } read_unlock(&tasklist_lock); } /* - * The restrictions on setting capabilities are specified as: + * The restrictions on setting capabilities on a process are specified as: * * [pid is for the 'target' task. 'current' is the calling task.] * - * I: any raised capabilities must be a subset of the (old current) Permitted - * P: any raised capabilities must be a subset of the (old current) permitted + * I: any newly raised capabilities must be a subset of the (old current) Permitted + * P: any newly raised capabilities must be a subset of the (old current) Permitted * E: must be set to a subset of (new target) Permitted */ - -asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) +static int +__capset_pid(int pid, cap_kernel_data_t caps) { - kernel_cap_t inheritable, permitted, effective; - __u32 version; - struct task_struct *target; - int error, pid; + struct task_struct * target; + int error = -EPERM; + + spin_lock(&task_capability_lock); + + if ((pid > 0) && (pid != current->pid)) { + read_lock(&tasklist_lock); + target = find_task_by_pid(pid); /* identify target of query */ + if (!target) { + error = -ESRCH; + goto out; + } + } else { + /* XXX - note == -1 => no test on target caps */ + target = current; + } + + + /* verify restrictions on target's new Inheritable set */ + if (!cap_issubset(caps->inheritable, + cap_combine(target->cap_inheritable, + current->cap_permitted))) + goto out; + + /* verify restrictions on target's new Permitted set */ + if (!cap_issubset(caps->permitted, + cap_combine(target->cap_permitted, + current->cap_permitted))) + goto out; + + /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ + if (!cap_issubset(caps->effective, caps->permitted)) + goto out; + + /* having verified that the proposed changes are legal, + we now put them into effect. */ + error = 0; + + if (pid < 0) { + if (pid == -1) /* all procs other than current and init */ + capset_all(caps); + else /* all procs in process group */ + capset_pg(-pid, caps); + goto spin_out; + } else { + /* FIXME: do we need to have a write lock here..? */ + target->cap_effective = caps->effective; + target->cap_inheritable = caps->inheritable; + target->cap_permitted = caps->permitted; + } + +out: + if (target != current) + read_unlock(&tasklist_lock); - if (get_user(version, &header->version)) - return -EFAULT; +spin_out: + spin_unlock(&task_capability_lock); + return error; +} - if (version != _LINUX_CAPABILITY_VERSION) { - version = _LINUX_CAPABILITY_VERSION; - if (put_user(version, &header->version)) - return -EFAULT; - return -EINVAL; - } +static int +capset_old (cap_user_header_0_t header, const cap_user_data_0_t data) +{ + struct __kernel_cap_data_struct caps; + int error, pid; - if (get_user(pid, &header->pid)) - return -EFAULT; + warn_about_old_library(); + + if (!capable(CAP_SETPCAP)) + return -EPERM; + + if (get_user(pid, &header->pid)) + return -EFAULT; + + error = cap_from_user_old(&caps, data); + if (error) + return error; + + return __capset_pid(pid, &caps); +} - if (pid && !capable(CAP_SETPCAP)) - return -EPERM; +static int +capset_pid(cap_user_header_t header, const cap_user_data_t data) +{ + struct __kernel_cap_data_struct caps; + int error, pid; - if (copy_from_user(&effective, &data->effective, sizeof(effective)) || - copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) || - copy_from_user(&permitted, &data->permitted, sizeof(permitted))) - return -EFAULT; - - error = -EPERM; - spin_lock(&task_capability_lock); - - if (pid > 0 && pid != current->pid) { - read_lock(&tasklist_lock); - target = find_task_by_pid(pid); /* identify target of query */ - if (!target) { - error = -ESRCH; - goto out; - } - } else { - target = current; - } + if (!capable(CAP_SETPCAP)) + return -EPERM; + if (get_user(pid, &header->u.pid)) + return -EFAULT; + + error = cap_from_user(&caps, data); + if (error) + return error; + + return __capset_pid(pid, &caps); +} - /* verify restrictions on target's new Inheritable set */ - if (!cap_issubset(inheritable, - cap_combine(target->cap_inheritable, - current->cap_permitted))) { - goto out; - } +static int +capset_fd(cap_user_header_t header, const cap_user_data_t data) +{ + struct __kernel_cap_data_struct caps; + struct file * file; + int error, fd; + + if (get_user(fd, &header->u.fd)) + return -EFAULT; + + error = cap_from_user(&caps, data); + if (error) + return error; + + error = -EBADF; + + file = fget(fd); + if (file) { + struct dentry * dentry = file->f_dentry; + error = do_revalidate(dentry); + if (!error) + error = vfs_setcap(dentry->d_inode, &caps); + fput(file); + } + + return error; +} - /* verify restrictions on target's new Permitted set */ - if (!cap_issubset(permitted, - cap_combine(target->cap_permitted, - current->cap_permitted))) { - goto out; - } +static int +capset_file(cap_user_header_t header, const cap_user_data_t data) +{ + struct __kernel_cap_data_struct caps; + struct nameidata nd; + const char * filename; + int error; + + if (get_user(filename, &header->u.file)) + return -EFAULT; + + error = cap_from_user(&caps, data); + if (error) + return error; + + error = user_path_walk_link(filename, &nd); + if (!error) { + error = do_revalidate(nd.dentry); + if (!error) + error = vfs_setcap(nd.dentry->d_inode, &caps); + path_release(&nd); + } + + return error; +} - /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ - if (!cap_issubset(effective, permitted)) { - goto out; - } +asmlinkage long +sys_capset(cap_user_header_t header, const cap_user_data_t data) +{ + int type; + __u32 version; - /* having verified that the proposed changes are legal, - we now put them into effect. */ - error = 0; - - if (pid < 0) { - if (pid == -1) /* all procs other than current and init */ - cap_set_all(&effective, &inheritable, &permitted); - - else /* all procs in process group */ - cap_set_pg(-pid, &effective, &inheritable, &permitted); - goto spin_out; - } else { - /* FIXME: do we need to have a write lock here..? */ - target->cap_effective = effective; - target->cap_inheritable = inheritable; - target->cap_permitted = permitted; - } + if (get_user(version, &header->version)) + return -EFAULT; -out: - if (target != current) { - read_unlock(&tasklist_lock); - } -spin_out: - spin_unlock(&task_capability_lock); - return error; + if (version == _LINUX_CAPABILITY_VERSION) { + if (get_user(type, &header->type)) + return -EFAULT; + + switch (type) { + case CAP_USERHEADER_PID: + return capset_pid(header, data); + case CAP_USERHEADER_FILEDES: + return capset_fd(header, data); + case CAP_USERHEADER_FILE: + return capset_file(header, data); + case CAP_USERHEADER_FSMASK: + case CAP_USERHEADER_PROCMASK: + default: + return -EINVAL; + } + } else if (version == _LINUX_CAPABILITY_VERSION_0) + return capset_old((cap_user_header_0_t) header, + (cap_user_data_0_t) data); + else + panic("sys_capset: unreconized version id\n"); } _______________________________________________ Linux-privs-discuss mailing list Linux-privs-discuss@lists.sourceforge.net http://lists.sourceforge.net/mailman/listinfo/linux-privs-discuss