diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/completion.c | 26 | ||||
-rw-r--r-- | kernel/sched/core.c | 5 | ||||
-rw-r--r-- | kernel/sched/fair.c | 4 | ||||
-rw-r--r-- | kernel/sched/sched.h | 13 | ||||
-rw-r--r-- | kernel/sched/swait.c | 8 | ||||
-rw-r--r-- | kernel/sched/wait.c | 5 | ||||
-rw-r--r-- | kernel/seccomp.c | 84 |
7 files changed, 115 insertions, 30 deletions
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index d57a5c1c1cd9..3561ab533dd4 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -13,6 +13,23 @@ * Waiting for completion is a typically sync point, but not an exclusion point. */ +static void complete_with_flags(struct completion *x, int wake_flags) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&x->wait.lock, flags); + + if (x->done != UINT_MAX) + x->done++; + swake_up_locked(&x->wait, wake_flags); + raw_spin_unlock_irqrestore(&x->wait.lock, flags); +} + +void complete_on_current_cpu(struct completion *x) +{ + return complete_with_flags(x, WF_CURRENT_CPU); +} + /** * complete: - signals a single thread waiting on this completion * @x: holds the state of this particular completion @@ -27,14 +44,7 @@ */ void complete(struct completion *x) { - unsigned long flags; - - raw_spin_lock_irqsave(&x->wait.lock, flags); - - if (x->done != UINT_MAX) - x->done++; - swake_up_locked(&x->wait); - raw_spin_unlock_irqrestore(&x->wait.lock, flags); + complete_with_flags(x, 0); } EXPORT_SYMBOL(complete); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c52c2eba7c73..4d63e063608a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4193,8 +4193,7 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success) * Return: %true if @p->state changes (an actual wakeup was done), * %false otherwise. */ -static int -try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) { unsigned long flags; int cpu, success = 0; @@ -7030,7 +7029,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, void *key) { - WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC); + WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC|WF_CURRENT_CPU)); return try_to_wake_up(curr->private, mode, wake_flags); } EXPORT_SYMBOL(default_wake_function); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b3e25be58e2b..ceb5d4c4738e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7741,6 +7741,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) if (wake_flags & WF_TTWU) { record_wakee(p); + if ((wake_flags & WF_CURRENT_CPU) && + cpumask_test_cpu(cpu, p->cpus_ptr)) + return cpu; + if (sched_energy_enabled()) { new_cpu = find_energy_efficient_cpu(p, prev_cpu); if (new_cpu >= 0) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e93e006a942b..48d0be005f08 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2131,12 +2131,13 @@ static inline int task_on_rq_migrating(struct task_struct *p) } /* Wake flags. The first three directly map to some SD flag value */ -#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */ -#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */ -#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */ +#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */ +#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */ +#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */ -#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ -#define WF_MIGRATED 0x20 /* Internal use, task got migrated */ +#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ +#define WF_MIGRATED 0x20 /* Internal use, task got migrated */ +#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */ #ifdef CONFIG_SMP static_assert(WF_EXEC == SD_BALANCE_EXEC); @@ -3229,6 +3230,8 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) extern void swake_up_all_locked(struct swait_queue_head *q); extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); +extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int wake_flags); + #ifdef CONFIG_PREEMPT_DYNAMIC extern int preempt_dynamic_mode; extern int sched_dynamic_mode(const char *str); diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 76b9b796e695..72505cd3b60a 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -18,7 +18,7 @@ EXPORT_SYMBOL(__init_swait_queue_head); * If for some reason it would return 0, that means the previously waiting * task is already running, so it will observe condition true (or has already). */ -void swake_up_locked(struct swait_queue_head *q) +void swake_up_locked(struct swait_queue_head *q, int wake_flags) { struct swait_queue *curr; @@ -26,7 +26,7 @@ void swake_up_locked(struct swait_queue_head *q) return; curr = list_first_entry(&q->task_list, typeof(*curr), task_list); - wake_up_process(curr->task); + try_to_wake_up(curr->task, TASK_NORMAL, wake_flags); list_del_init(&curr->task_list); } EXPORT_SYMBOL(swake_up_locked); @@ -41,7 +41,7 @@ EXPORT_SYMBOL(swake_up_locked); void swake_up_all_locked(struct swait_queue_head *q) { while (!list_empty(&q->task_list)) - swake_up_locked(q); + swake_up_locked(q, 0); } void swake_up_one(struct swait_queue_head *q) @@ -49,7 +49,7 @@ void swake_up_one(struct swait_queue_head *q) unsigned long flags; raw_spin_lock_irqsave(&q->lock, flags); - swake_up_locked(q); + swake_up_locked(q, 0); raw_spin_unlock_irqrestore(&q->lock, flags); } EXPORT_SYMBOL(swake_up_one); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 48c53e4739ea..802d98cf2de3 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -161,6 +161,11 @@ int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, } EXPORT_SYMBOL(__wake_up); +void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key) +{ + __wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key); +} + /* * Same as __wake_up but called with the spinlock in wait_queue_head_t held. */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d3e584065c7f..255999ba9190 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -110,11 +110,13 @@ struct seccomp_knotif { * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC * is allowed. * @ioctl_flags: The flags used for the seccomp_addfd ioctl. + * @setfd: whether or not SECCOMP_ADDFD_FLAG_SETFD was set during notify_addfd * @ret: The return value of the installing process. It is set to the fd num * upon success (>= 0). * @completion: Indicates that the installing process has completed fd * installation, or gone away (either due to successful * reply, or signal) + * @list: list_head for chaining seccomp_kaddfd together. * */ struct seccomp_kaddfd { @@ -138,14 +140,17 @@ struct seccomp_kaddfd { * structure is fairly large, we store the notification-specific stuff in a * separate structure. * - * @request: A semaphore that users of this notification can wait on for - * changes. Actual reads and writes are still controlled with - * filter->notify_lock. + * @requests: A semaphore that users of this notification can wait on for + * changes. Actual reads and writes are still controlled with + * filter->notify_lock. + * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags. * @next_id: The id of the next request. * @notifications: A list of struct seccomp_knotif elements. */ + struct notification { - struct semaphore request; + atomic_t requests; + u32 flags; u64 next_id; struct list_head notifications; }; @@ -555,6 +560,8 @@ static void __seccomp_filter_release(struct seccomp_filter *orig) * drop its reference count, and notify * about unused filters * + * @tsk: task the filter should be released from. + * * This function should only be called when the task is exiting as * it detaches it from its filter tree. As such, READ_ONCE() and * barriers are not needed here, as would normally be needed. @@ -574,6 +581,8 @@ void seccomp_filter_release(struct task_struct *tsk) /** * seccomp_sync_threads: sets all threads to use current's filter * + * @flags: SECCOMP_FILTER_FLAG_* flags to set during sync. + * * Expects sighand and cred_guard_mutex locks to be held, and for * seccomp_can_sync_threads() to have returned success already * without dropping the locks. @@ -1116,8 +1125,11 @@ static int seccomp_do_user_notification(int this_syscall, list_add_tail(&n.list, &match->notif->notifications); INIT_LIST_HEAD(&n.addfd); - up(&match->notif->request); - wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); + atomic_inc(&match->notif->requests); + if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM); + else + wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM); /* * This is where we wait for a reply from userspace. @@ -1450,6 +1462,37 @@ find_notification(struct seccomp_filter *filter, u64 id) return NULL; } +static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, + void *key) +{ + /* Avoid a wakeup if event not interesting for us. */ + if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} + +static int recv_wait_event(struct seccomp_filter *filter) +{ + DEFINE_WAIT_FUNC(wait, recv_wake_function); + int ret; + + if (atomic_dec_if_positive(&filter->notif->requests) >= 0) + return 0; + + for (;;) { + ret = prepare_to_wait_event(&filter->wqh, &wait, TASK_INTERRUPTIBLE); + + if (atomic_dec_if_positive(&filter->notif->requests) >= 0) + break; + + if (ret) + return ret; + + schedule(); + } + finish_wait(&filter->wqh, &wait); + return 0; +} static long seccomp_notify_recv(struct seccomp_filter *filter, void __user *buf) @@ -1467,7 +1510,7 @@ static long seccomp_notify_recv(struct seccomp_filter *filter, memset(&unotif, 0, sizeof(unotif)); - ret = down_interruptible(&filter->notif->request); + ret = recv_wait_event(filter); if (ret < 0) return ret; @@ -1515,7 +1558,8 @@ out: if (should_sleep_killable(filter, knotif)) complete(&knotif->ready); knotif->state = SECCOMP_NOTIFY_INIT; - up(&filter->notif->request); + atomic_inc(&filter->notif->requests); + wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM); } mutex_unlock(&filter->notify_lock); } @@ -1561,7 +1605,10 @@ static long seccomp_notify_send(struct seccomp_filter *filter, knotif->error = resp.error; knotif->val = resp.val; knotif->flags = resp.flags; - complete(&knotif->ready); + if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + complete_on_current_cpu(&knotif->ready); + else + complete(&knotif->ready); out: mutex_unlock(&filter->notify_lock); return ret; @@ -1591,6 +1638,22 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter, return ret; } +static long seccomp_notify_set_flags(struct seccomp_filter *filter, + unsigned long flags) +{ + long ret; + + if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP) + return -EINVAL; + + ret = mutex_lock_interruptible(&filter->notify_lock); + if (ret < 0) + return ret; + filter->notif->flags = flags; + mutex_unlock(&filter->notify_lock); + return 0; +} + static long seccomp_notify_addfd(struct seccomp_filter *filter, struct seccomp_notif_addfd __user *uaddfd, unsigned int size) @@ -1720,6 +1783,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR: case SECCOMP_IOCTL_NOTIF_ID_VALID: return seccomp_notify_id_valid(filter, buf); + case SECCOMP_IOCTL_NOTIF_SET_FLAGS: + return seccomp_notify_set_flags(filter, arg); } /* Extensible Argument ioctls */ @@ -1777,7 +1842,6 @@ static struct file *init_listener(struct seccomp_filter *filter) if (!filter->notif) goto out; - sema_init(&filter->notif->request, 0); filter->notif->next_id = get_random_u64(); INIT_LIST_HEAD(&filter->notif->notifications); |