diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index c1c480c52f0e0..1326cf3f88cae 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -656,6 +656,31 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, if (next_lock != waiter->lock) goto out_unlock_pi; + /* + * There could be 'spurious' loops in the lock graph due to ww_mutex, + * consider: + * + * P1: A, ww_A, ww_B + * P2: ww_B, ww_A + * P3: A + * + * P3 should not return -EDEADLK because it gets trapped in the cycle + * created by P1 and P2 (which will resolve -- and runs into + * max_lock_depth above). Therefore disable detect_deadlock such that + * the below termination condition can trigger once all relevant tasks + * are boosted. + * + * Even when we start with ww_mutex we can disable deadlock detection, + * since we would supress a ww_mutex induced deadlock at [6] anyway. + * Supressing it here however is not sufficient since we might still + * hit [6] due to adjustment driven iteration. + * + * NOTE: if someone were to create a deadlock between 2 ww_classes we'd + * utterly fail to report it; lockdep should. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock) + detect_deadlock = false; + /* * Drop out, when the task has no waiters. Note, * top_waiter can be NULL, when we are in the deboosting @@ -717,8 +742,21 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, * walk, we detected a deadlock. */ if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { - raw_spin_unlock(&lock->wait_lock); ret = -EDEADLK; + + /* + * When the deadlock is due to ww_mutex; also see above. Don't + * report the deadlock and instead let the ww_mutex wound/die + * logic pick which of the contending threads gets -EDEADLK. + * + * NOTE: assumes the cycle only contains a single ww_class; any + * other configuration and we fail to report; also, see + * lockdep. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx) + ret = 0; + + raw_spin_unlock(&lock->wait_lock); goto out_unlock_pi; } @@ -1100,8 +1138,13 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, /* Check whether the waiter should back out immediately */ rtm = container_of(lock, struct rt_mutex, rtmutex); res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); - if (res) + if (res) { + raw_spin_lock(&task->pi_lock); + rt_mutex_dequeue(lock, waiter); + task->pi_blocked_on = NULL; + raw_spin_unlock(&task->pi_lock); return res; + } } if (!owner) @@ -1347,8 +1390,9 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock, * for CONFIG_PREEMPT_RCU=y) * - the VCPU on which owner runs is preempted */ - if (!owner->on_cpu || waiter != rt_mutex_top_waiter(lock) || - need_resched() || vcpu_is_preempted(task_cpu(owner))) { + if (!owner->on_cpu || need_resched() || + rt_mutex_waiter_is_top_waiter(lock, waiter) || + vcpu_is_preempted(task_cpu(owner))) { res = false; break; } diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 61256de5bd66a..c47e8361bfb5c 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -95,6 +95,19 @@ static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock) return !RB_EMPTY_ROOT(&lock->waiters.rb_root); } +/* + * Lockless speculative check whether @waiter is still the top waiter on + * @lock. This is solely comparing pointers and not derefencing the + * leftmost entry which might be about to vanish. + */ +static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock, + struct rt_mutex_waiter *waiter) +{ + struct rb_node *leftmost = rb_first_cached(&lock->waiters); + + return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter; +} + static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock) { struct rb_node *leftmost = rb_first_cached(&lock->waiters); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 02718b6c7732d..3885f851997d9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3081,7 +3081,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state ktime_t to = NSEC_PER_SEC / HZ; set_current_state(TASK_UNINTERRUPTIBLE); - schedule_hrtimeout(&to, HRTIMER_MODE_REL); + schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD); continue; } @@ -8588,7 +8588,6 @@ static void balance_push(struct rq *rq) struct task_struct *push_task = rq->curr; lockdep_assert_rq_held(rq); - SCHED_WARN_ON(rq->cpu != smp_processor_id()); /* * Ensure the thing is persistent until balance_push_set(.on = false); @@ -8596,9 +8595,10 @@ static void balance_push(struct rq *rq) rq->balance_callback = &balance_push_callback; /* - * Only active while going offline. + * Only active while going offline and when invoked on the outgoing + * CPU. */ - if (!cpu_dying(rq->cpu)) + if (!cpu_dying(rq->cpu) && rq == this_rq()) return; /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 096cfefc66c90..60a9ad2ba166b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2255,6 +2255,9 @@ static inline struct task_struct *get_push_task(struct rq *rq) if (p->nr_cpus_allowed == 1) return NULL; + if (p->migration_disabled) + return NULL; + rq->push_busy = true; return get_task_struct(p); } diff --git a/localversion-rt b/localversion-rt index 9f7d0bdbffb18..08b3e75841adc 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt13 +-rt14 diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c55d5862416fc..d597df37bfb7c 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -82,7 +82,7 @@ struct zsmalloc_handle { unsigned long addr; - struct mutex lock; + spinlock_t lock; }; #define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) @@ -370,7 +370,7 @@ static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) if (p) { struct zsmalloc_handle *zh = p; - mutex_init(&zh->lock); + spin_lock_init(&zh->lock); } #endif return (unsigned long)p; @@ -927,7 +927,7 @@ static inline int testpin_tag(unsigned long handle) #ifdef CONFIG_PREEMPT_RT struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - return mutex_is_locked(&zh->lock); + return spin_is_locked(&zh->lock); #else return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); #endif @@ -938,7 +938,7 @@ static inline int trypin_tag(unsigned long handle) #ifdef CONFIG_PREEMPT_RT struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - return mutex_trylock(&zh->lock); + return spin_trylock(&zh->lock); #else return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); #endif @@ -949,7 +949,7 @@ static void pin_tag(unsigned long handle) __acquires(bitlock) #ifdef CONFIG_PREEMPT_RT struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - return mutex_lock(&zh->lock); + return spin_lock(&zh->lock); #else bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); #endif @@ -960,7 +960,7 @@ static void unpin_tag(unsigned long handle) __releases(bitlock) #ifdef CONFIG_PREEMPT_RT struct zsmalloc_handle *zh = zs_get_pure_handle(handle); - return mutex_unlock(&zh->lock); + return spin_unlock(&zh->lock); #else bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); #endif