diff --git a/Makefile b/Makefile index 76c3b6c..adbf6aa 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = .62 +EXTRAVERSION = .63-rc1 NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 7ffab7cb..3a0fae6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -517,7 +517,9 @@ struct ethtool_ops { #define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ #define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level. */ #define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation. */ -#define ETHTOOL_GLINK 0x0000000a /* Get link status (ethtool_value) */ +/* Get link status for host, i.e. whether the interface *and* the + * physical port (if there is one) are up (ethtool_value). */ +#define ETHTOOL_GLINK 0x0000000a #define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ #define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data. */ #define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 267e484..b6998ef 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -680,6 +680,22 @@ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key) return AUDIT_BUILD_CONTEXT; } +static int audit_in_mask(const struct audit_krule *rule, unsigned long val) +{ + int word, bit; + + if (val > 0xffffffff) + return false; + + word = AUDIT_WORD(val); + if (word >= AUDIT_BITMASK_SIZE) + return false; + + bit = AUDIT_BIT(val); + + return rule->mask[word] & bit; +} + /* At syscall entry and exit time, this filter is called if the * audit_state is not low enough that auditing cannot take place, but is * also not high enough that we already know we have to write an audit @@ -697,11 +713,8 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, rcu_read_lock(); if (!list_empty(list)) { - int word = AUDIT_WORD(ctx->major); - int bit = AUDIT_BIT(ctx->major); - list_for_each_entry_rcu(e, list, list) { - if ((e->rule.mask[word] & bit) == bit && + if (audit_in_mask(&e->rule, ctx->major) && audit_filter_rules(tsk, &e->rule, ctx, NULL, &state)) { rcu_read_unlock(); @@ -730,8 +743,6 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) rcu_read_lock(); for (i = 0; i < ctx->name_count; i++) { - int word = AUDIT_WORD(ctx->major); - int bit = AUDIT_BIT(ctx->major); struct audit_names *n = &ctx->names[i]; int h = audit_hash_ino((u32)n->ino); struct list_head *list = &audit_inode_hash[h]; @@ -740,7 +751,7 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx) continue; list_for_each_entry_rcu(e, list, list) { - if ((e->rule.mask[word] & bit) == bit && + if (audit_in_mask(&e->rule, ctx->major) && audit_filter_rules(tsk, &e->rule, ctx, n, &state)) { rcu_read_unlock(); ctx->current_state = state; diff --git a/kernel/futex.c b/kernel/futex.c index 9c5ffe1..55dd3d2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -536,6 +536,55 @@ void exit_pi_state_list(struct task_struct *curr) spin_unlock_irq(&curr->pi_lock); } +/* + * We need to check the following states: + * + * Waiter | pi_state | pi->owner | uTID | uODIED | ? + * + * [1] NULL | --- | --- | 0 | 0/1 | Valid + * [2] NULL | --- | --- | >0 | 0/1 | Valid + * + * [3] Found | NULL | -- | Any | 0/1 | Invalid + * + * [4] Found | Found | NULL | 0 | 1 | Valid + * [5] Found | Found | NULL | >0 | 1 | Invalid + * + * [6] Found | Found | task | 0 | 1 | Valid + * + * [7] Found | Found | NULL | Any | 0 | Invalid + * + * [8] Found | Found | task | ==taskTID | 0/1 | Valid + * [9] Found | Found | task | 0 | 0 | Invalid + * [10] Found | Found | task | !=taskTID | 0/1 | Invalid + * + * [1] Indicates that the kernel can acquire the futex atomically. We + * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. + * + * [2] Valid, if TID does not belong to a kernel thread. If no matching + * thread is found then it indicates that the owner TID has died. + * + * [3] Invalid. The waiter is queued on a non PI futex + * + * [4] Valid state after exit_robust_list(), which sets the user space + * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. + * + * [5] The user space value got manipulated between exit_robust_list() + * and exit_pi_state_list() + * + * [6] Valid state after exit_pi_state_list() which sets the new owner in + * the pi_state but cannot access the user space value. + * + * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. + * + * [8] Owner and user space value match + * + * [9] There is no transient state which sets the user space TID to 0 + * except exit_robust_list(), but this is indicated by the + * FUTEX_OWNER_DIED bit. See [4] + * + * [10] There is no transient state which leaves owner and user space + * TID out of sync. + */ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps) @@ -551,12 +600,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, plist_for_each_entry_safe(this, next, head, list) { if (match_futex(&this->key, key)) { /* - * Another waiter already exists - bump up - * the refcount and return its pi_state: + * Sanity check the waiter before increasing + * the refcount and attaching to it. */ pi_state = this->pi_state; /* - * Userspace might have messed up non PI and PI futexes + * Userspace might have messed up non-PI and + * PI futexes [3] */ if (unlikely(!pi_state)) return -EINVAL; @@ -564,34 +614,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, WARN_ON(!atomic_read(&pi_state->refcount)); /* - * When pi_state->owner is NULL then the owner died - * and another waiter is on the fly. pi_state->owner - * is fixed up by the task which acquires - * pi_state->rt_mutex. - * - * We do not check for pid == 0 which can happen when - * the owner died and robust_list_exit() cleared the - * TID. + * Handle the owner died case: */ - if (pid && pi_state->owner) { + if (uval & FUTEX_OWNER_DIED) { /* - * Bail out if user space manipulated the - * futex value. + * exit_pi_state_list sets owner to NULL and + * wakes the topmost waiter. The task which + * acquires the pi_state->rt_mutex will fixup + * owner. */ - if (pid != task_pid_vnr(pi_state->owner)) + if (!pi_state->owner) { + /* + * No pi state owner, but the user + * space TID is not 0. Inconsistent + * state. [5] + */ + if (pid) + return -EINVAL; + /* + * Take a ref on the state and + * return. [4] + */ + goto out_state; + } + + /* + * If TID is 0, then either the dying owner + * has not yet executed exit_pi_state_list() + * or some waiter acquired the rtmutex in the + * pi state, but did not yet fixup the TID in + * user space. + * + * Take a ref on the state and return. [6] + */ + if (!pid) + goto out_state; + } else { + /* + * If the owner died bit is not set, + * then the pi_state must have an + * owner. [7] + */ + if (!pi_state->owner) return -EINVAL; } + /* + * Bail out if user space manipulated the + * futex value. If pi state exists then the + * owner TID must be the same as the user + * space TID. [9/10] + */ + if (pid != task_pid_vnr(pi_state->owner)) + return -EINVAL; + + out_state: atomic_inc(&pi_state->refcount); *ps = pi_state; - return 0; } } /* * We are the first waiter - try to look up the real owner and attach - * the new pi_state to it, but bail out when TID = 0 + * the new pi_state to it, but bail out when TID = 0 [1] */ if (!pid) return -ESRCH; @@ -599,6 +685,11 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, if (!p) return -ESRCH; + if (!p->mm) { + put_task_struct(p); + return -EPERM; + } + /* * We need to look at the task state flags to figure out, * whether the task is exiting. To protect against the do_exit @@ -619,6 +710,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return ret; } + /* + * No existing pi state. First waiter. [2] + */ pi_state = alloc_pi_state(); /* @@ -692,10 +786,18 @@ retry: return -EDEADLK; /* - * Surprise - we got the lock. Just return to userspace: + * Surprise - we got the lock, but we do not trust user space at all. */ - if (unlikely(!curval)) - return 1; + if (unlikely(!curval)) { + /* + * We verify whether there is kernel state for this + * futex. If not, we can safely assume, that the 0 -> + * TID transition is correct. If state exists, we do + * not bother to fixup the user space state as it was + * corrupted already. + */ + return futex_top_waiter(hb, key) ? -EINVAL : 1; + } uval = curval; @@ -803,6 +905,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; u32 curval, newval; + int ret = 0; if (!pi_state) return -EINVAL; @@ -827,25 +930,21 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) new_owner = this->task; /* - * We pass it to the next owner. (The WAITERS bit is always - * kept enabled while there is PI state around. We must also - * preserve the owner died bit.) + * We pass it to the next owner. The WAITERS bit is always + * kept enabled while there is PI state around. We cleanup the + * owner died bit, because we are the owner. */ - if (!(uval & FUTEX_OWNER_DIED)) { - int ret = 0; + newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); + curval = cmpxchg_futex_value_locked(uaddr, uval, newval); - if (curval == -EFAULT) - ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; - if (ret) { - spin_unlock(&pi_state->pi_mutex.wait_lock); - return ret; - } + if (curval == -EFAULT) + ret = -EFAULT; + else if (curval != uval) + ret = -EINVAL; + if (ret) { + spin_unlock(&pi_state->pi_mutex.wait_lock); + return ret; } spin_lock_irq(&pi_state->owner->pi_lock); @@ -1122,8 +1221,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * hb1 and hb2 must be held by the caller. * * Returns: - * 0 - failed to acquire the lock atomicly - * 1 - acquired the lock + * 0 - failed to acquire the lock atomically; + * >0 - acquired the lock, return value is vpid of the top_waiter * <0 - error */ static int futex_proxy_trylock_atomic(u32 __user *pifutex, @@ -1134,7 +1233,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, { struct futex_q *top_waiter = NULL; u32 curval; - int ret; + int ret, vpid; if (get_futex_value_locked(&curval, pifutex)) return -EFAULT; @@ -1162,11 +1261,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, * the contended case or if set_waiters is 1. The pi_state is returned * in ps in contended cases. */ + vpid = task_pid_vnr(top_waiter->task); ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, set_waiters); - if (ret == 1) + if (ret == 1) { requeue_pi_wake_futex(top_waiter, key2, hb2); - + return vpid; + } return ret; } @@ -1196,10 +1297,16 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2, struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; - u32 curval2; if (requeue_pi) { /* + * Requeue PI only works on two distinct uaddrs. This + * check is only valid for private futexes. See below. + */ + if (uaddr1 == uaddr2) + return -EINVAL; + + /* * requeue_pi requires a pi_state, try to allocate it now * without any locks in case it fails. */ @@ -1237,6 +1344,15 @@ retry: if (unlikely(ret != 0)) goto out_put_key1; + /* + * The check above which compares uaddrs is not sufficient for + * shared futexes. We need to compare the keys: + */ + if (requeue_pi && match_futex(&key1, &key2)) { + ret = -EINVAL; + goto out_put_keys; + } + hb1 = hash_futex(&key1); hb2 = hash_futex(&key2); @@ -1282,16 +1398,25 @@ retry_private: * At this point the top_waiter has either taken uaddr2 or is * waiting on it. If the former, then the pi_state will not * exist yet, look it up one more time to ensure we have a - * reference to it. + * reference to it. If the lock was taken, ret contains the + * vpid of the top waiter task. */ - if (ret == 1) { + if (ret > 0) { WARN_ON(pi_state); drop_count++; task_count++; - ret = get_futex_value_locked(&curval2, uaddr2); - if (!ret) - ret = lookup_pi_state(curval2, hb2, &key2, - &pi_state); + /* + * If we acquired the lock, then the user + * space value of uaddr2 should be vpid. It + * cannot be changed by the top waiter as it + * is blocked on hb2 lock if it tries to do + * so. If something fiddled with it behind our + * back the pi state lookup might unearth + * it. So we rather use the known value than + * rereading and handing potential crap to + * lookup_pi_state. + */ + ret = lookup_pi_state(ret, hb2, &key2, &pi_state); } switch (ret) { @@ -2083,9 +2208,10 @@ retry: /* * To avoid races, try to do the TID -> 0 atomic transition * again. If it succeeds then we can return without waking - * anyone else up: + * anyone else up. We only try this if neither the waiters nor + * the owner died bit are set. */ - if (!(uval & FUTEX_OWNER_DIED)) + if (!(uval & ~FUTEX_TID_MASK)) uval = cmpxchg_futex_value_locked(uaddr, task_pid_vnr(current), 0); @@ -2120,11 +2246,9 @@ retry: /* * No waiters - kernel unlocks the futex: */ - if (!(uval & FUTEX_OWNER_DIED)) { - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) - goto pi_faulted; - } + ret = unlock_futex_pi(uaddr, uval); + if (ret == -EFAULT) + goto pi_faulted; out_unlock: spin_unlock(&hb->lock); @@ -2285,6 +2409,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared, if (ret) goto out_key2; + /* + * The check above which compares uaddrs is not sufficient for + * shared futexes. We need to compare the keys: + */ + if (match_futex(&q.key, &key2)) { + ret = -EINVAL; + goto out_put_keys; + } + /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index abbe8fa..f9e7179 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -365,6 +365,20 @@ static int ethtool_nway_reset(struct net_device *dev) return dev->ethtool_ops->nway_reset(dev); } +static int ethtool_get_link(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { .cmd = ETHTOOL_GLINK }; + + if (!dev->ethtool_ops->get_link) + return -EOPNOTSUPP; + + edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { struct ethtool_eeprom eeprom; @@ -1016,8 +1030,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) rc = ethtool_nway_reset(dev); break; case ETHTOOL_GLINK: - rc = ethtool_get_value(dev, useraddr, ethcmd, - dev->ethtool_ops->get_link); + rc = ethtool_get_link(dev, useraddr); break; case ETHTOOL_GEEPROM: rc = ethtool_get_eeprom(dev, useraddr); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index e2eaf29..e6bf72c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -121,7 +121,8 @@ static struct ctl_table netns_core_table[] = { .mode = 0644, .extra1 = &zero, .extra2 = &ushort_max, - .proc_handler = proc_dointvec_minmax + .proc_handler = proc_dointvec_minmax, + .strategy = &sysctl_intvec }, { .ctl_name = 0 } }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 910fa54..d957371 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -241,7 +241,8 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &tcp_syn_retries_min, - .extra2 = &tcp_syn_retries_max + .extra2 = &tcp_syn_retries_max, + .strategy = &sysctl_intvec }, { .ctl_name = NET_IPV4_NONLOCAL_BIND,