aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c54
-rw-r--r--kernel/futex/syscalls.c11
-rw-r--r--kernel/locking/rtmutex.c55
-rw-r--r--kernel/locking/rtmutex_api.c6
4 files changed, 73 insertions, 53 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index eacc3702654d..d56328e5080e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -380,7 +380,6 @@ enum event_type_t {
/*
* perf_sched_events : >0 events exist
- * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
*/
static void perf_sched_delayed(struct work_struct *work);
@@ -389,7 +388,6 @@ static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
static DEFINE_MUTEX(perf_sched_mutex);
static atomic_t perf_sched_count;
-static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
static atomic_t nr_mmap_events __read_mostly;
@@ -844,9 +842,16 @@ static void perf_cgroup_switch(struct task_struct *task)
struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context);
struct perf_cgroup *cgrp;
- cgrp = perf_cgroup_from_task(task, NULL);
+ /*
+ * cpuctx->cgrp is set when the first cgroup event enabled,
+ * and is cleared when the last cgroup event disabled.
+ */
+ if (READ_ONCE(cpuctx->cgrp) == NULL)
+ return;
WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
+
+ cgrp = perf_cgroup_from_task(task, NULL);
if (READ_ONCE(cpuctx->cgrp) == cgrp)
return;
@@ -3631,8 +3636,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
* to check if we have to switch out PMU state.
* cgroup event are system-wide mode only
*/
- if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
- perf_cgroup_switch(next);
+ perf_cgroup_switch(next);
}
static bool perf_less_group_idx(const void *l, const void *r)
@@ -4974,15 +4978,6 @@ static void unaccount_pmu_sb_event(struct perf_event *event)
detach_sb_event(event);
}
-static void unaccount_event_cpu(struct perf_event *event, int cpu)
-{
- if (event->parent)
- return;
-
- if (is_cgroup_event(event))
- atomic_dec(&per_cpu(perf_cgroup_events, cpu));
-}
-
#ifdef CONFIG_NO_HZ_FULL
static DEFINE_SPINLOCK(nr_freq_lock);
#endif
@@ -5048,8 +5043,6 @@ static void unaccount_event(struct perf_event *event)
schedule_delayed_work(&perf_sched_work, HZ);
}
- unaccount_event_cpu(event, event->cpu);
-
unaccount_pmu_sb_event(event);
}
@@ -11679,15 +11672,6 @@ static void account_pmu_sb_event(struct perf_event *event)
attach_sb_event(event);
}
-static void account_event_cpu(struct perf_event *event, int cpu)
-{
- if (event->parent)
- return;
-
- if (is_cgroup_event(event))
- atomic_inc(&per_cpu(perf_cgroup_events, cpu));
-}
-
/* Freq events need the tick to stay alive (see perf_event_task_tick). */
static void account_freq_event_nohz(void)
{
@@ -11775,8 +11759,6 @@ static void account_event(struct perf_event *event)
}
enabled:
- account_event_cpu(event, event->cpu);
-
account_pmu_sb_event(event);
}
@@ -12339,12 +12321,12 @@ SYSCALL_DEFINE5(perf_event_open,
if (flags & ~PERF_FLAG_ALL)
return -EINVAL;
- /* Do we allow access to perf_event_open(2) ? */
- err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
+ err = perf_copy_attr(attr_uptr, &attr);
if (err)
return err;
- err = perf_copy_attr(attr_uptr, &attr);
+ /* Do we allow access to perf_event_open(2) ? */
+ err = security_perf_event_open(&attr, PERF_SECURITY_OPEN);
if (err)
return err;
@@ -12689,7 +12671,8 @@ SYSCALL_DEFINE5(perf_event_open,
return event_fd;
err_context:
- /* event->pmu_ctx freed by free_event() */
+ put_pmu_ctx(event->pmu_ctx);
+ event->pmu_ctx = NULL; /* _free_event() */
err_locked:
mutex_unlock(&ctx->mutex);
perf_unpin_context(ctx);
@@ -12802,6 +12785,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
err_pmu_ctx:
put_pmu_ctx(pmu_ctx);
+ event->pmu_ctx = NULL; /* _free_event() */
err_unlock:
mutex_unlock(&ctx->mutex);
perf_unpin_context(ctx);
@@ -12822,13 +12806,11 @@ static void __perf_pmu_remove(struct perf_event_context *ctx,
perf_event_groups_for_cpu_pmu(event, groups, cpu, pmu) {
perf_remove_from_context(event, 0);
- unaccount_event_cpu(event, cpu);
put_pmu_ctx(event->pmu_ctx);
list_add(&event->migrate_entry, events);
for_each_sibling_event(sibling, event) {
perf_remove_from_context(sibling, 0);
- unaccount_event_cpu(sibling, cpu);
put_pmu_ctx(sibling->pmu_ctx);
list_add(&sibling->migrate_entry, events);
}
@@ -12847,7 +12829,6 @@ static void __perf_pmu_install_event(struct pmu *pmu,
if (event->state >= PERF_EVENT_STATE_OFF)
event->state = PERF_EVENT_STATE_INACTIVE;
- account_event_cpu(event, cpu);
perf_install_in_context(ctx, event, cpu);
}
@@ -13231,7 +13212,7 @@ inherit_event(struct perf_event *parent_event,
pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event);
if (IS_ERR(pmu_ctx)) {
free_event(child_event);
- return NULL;
+ return ERR_CAST(pmu_ctx);
}
child_event->pmu_ctx = pmu_ctx;
@@ -13742,8 +13723,7 @@ static int __perf_cgroup_move(void *info)
struct task_struct *task = info;
preempt_disable();
- if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
- perf_cgroup_switch(task);
+ perf_cgroup_switch(task);
preempt_enable();
return 0;
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 086a22d1adb7..a8074079b09e 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -286,19 +286,22 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
}
futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
- if (!futexv)
- return -ENOMEM;
+ if (!futexv) {
+ ret = -ENOMEM;
+ goto destroy_timer;
+ }
ret = futex_parse_waitv(futexv, waiters, nr_futexes);
if (!ret)
ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
+ kfree(futexv);
+
+destroy_timer:
if (timeout) {
hrtimer_cancel(&to.timer);
destroy_hrtimer_on_stack(&to.timer);
}
-
- kfree(futexv);
return ret;
}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 7779ee8abc2a..010cf4e6d0b8 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -89,15 +89,31 @@ static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
* set this bit before looking at the lock.
*/
-static __always_inline void
-rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
+static __always_inline struct task_struct *
+rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
{
unsigned long val = (unsigned long)owner;
if (rt_mutex_has_waiters(lock))
val |= RT_MUTEX_HAS_WAITERS;
- WRITE_ONCE(lock->owner, (struct task_struct *)val);
+ return (struct task_struct *)val;
+}
+
+static __always_inline void
+rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
+{
+ /*
+ * lock->wait_lock is held but explicit acquire semantics are needed
+ * for a new lock owner so WRITE_ONCE is insufficient.
+ */
+ xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
+}
+
+static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
+{
+ /* lock->wait_lock is held so the unlock provides release semantics. */
+ WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
}
static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
@@ -106,7 +122,8 @@ static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
}
-static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
+static __always_inline void
+fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
{
unsigned long owner, *p = (unsigned long *) &lock->owner;
@@ -172,8 +189,21 @@ static __always_inline void fixup_rt_mutex_waiters(struct rt_mutex_base *lock)
* still set.
*/
owner = READ_ONCE(*p);
- if (owner & RT_MUTEX_HAS_WAITERS)
- WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
+ if (owner & RT_MUTEX_HAS_WAITERS) {
+ /*
+ * See rt_mutex_set_owner() and rt_mutex_clear_owner() on
+ * why xchg_acquire() is used for updating owner for
+ * locking and WRITE_ONCE() for unlocking.
+ *
+ * WRITE_ONCE() would work for the acquire case too, but
+ * in case that the lock acquisition failed it might
+ * force other lockers into the slow path unnecessarily.
+ */
+ if (acquire_lock)
+ xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
+ else
+ WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
+ }
}
/*
@@ -208,6 +238,13 @@ static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
owner = *p;
} while (cmpxchg_relaxed(p, owner,
owner | RT_MUTEX_HAS_WAITERS) != owner);
+
+ /*
+ * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
+ * operations in the event of contention. Ensure the successful
+ * cmpxchg is visible.
+ */
+ smp_mb__after_atomic();
}
/*
@@ -1243,7 +1280,7 @@ static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
* try_to_take_rt_mutex() sets the lock waiters bit
* unconditionally. Clean this up.
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, true);
return ret;
}
@@ -1604,7 +1641,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
* try_to_take_rt_mutex() sets the waiter bit
* unconditionally. We might have to fix that up.
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, true);
trace_contention_end(lock, ret);
@@ -1719,7 +1756,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
* try_to_take_rt_mutex() sets the waiter bit unconditionally.
* We might have to fix that up:
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, true);
debug_rt_mutex_free_waiter(&waiter);
trace_contention_end(lock, 0);
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index 900220941caa..cb9fdff76a8a 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -267,7 +267,7 @@ void __sched rt_mutex_init_proxy_locked(struct rt_mutex_base *lock,
void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock)
{
debug_rt_mutex_proxy_unlock(lock);
- rt_mutex_set_owner(lock, NULL);
+ rt_mutex_clear_owner(lock);
}
/**
@@ -382,7 +382,7 @@ int __sched rt_mutex_wait_proxy_lock(struct rt_mutex_base *lock,
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, true);
raw_spin_unlock_irq(&lock->wait_lock);
return ret;
@@ -438,7 +438,7 @@ bool __sched rt_mutex_cleanup_proxy_lock(struct rt_mutex_base *lock,
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.
*/
- fixup_rt_mutex_waiters(lock);
+ fixup_rt_mutex_waiters(lock, false);
raw_spin_unlock_irq(&lock->wait_lock);