aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/devmap.c3
-rw-r--r--kernel/bpf/syscall.c11
-rw-r--r--kernel/bpf/verifier.c14
-rw-r--r--kernel/dma/map_benchmark.c25
-rw-r--r--kernel/events/core.c13
-rw-r--r--kernel/exit.c5
-rwxr-xr-xkernel/gen_kheaders.sh9
-rw-r--r--kernel/irq/cpuhotplug.c16
-rw-r--r--kernel/irq/irqdesc.c5
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/trace/bpf_trace.c12
-rw-r--r--kernel/trace/bpf_trace.h2
-rw-r--r--kernel/trace/preemptirq_delay_test.c1
-rw-r--r--kernel/trace/ring_buffer.c25
-rw-r--r--kernel/trace/rv/rv.c2
-rw-r--r--kernel/trace/trace_probe.c4
-rw-r--r--kernel/trace/trace_uprobe.c14
-rw-r--r--kernel/vhost_task.c53
20 files changed, 136 insertions, 85 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 4e2cdbb5629f..7f3b34452243 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -760,9 +760,6 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
for (i = 0; i < dtab->n_buckets; i++) {
head = dev_map_index_hash(dtab, i);
hlist_for_each_entry_safe(dst, next, head, index_hlist) {
- if (!dst)
- continue;
-
if (is_ifindex_excluded(excluded_devices, num_excluded,
dst->dev->ifindex))
continue;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2222c3ff88e7..f45ed6adc092 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2998,6 +2998,7 @@ static int bpf_obj_get(const union bpf_attr *attr)
void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
const struct bpf_link_ops *ops, struct bpf_prog *prog)
{
+ WARN_ON(ops->dealloc && ops->dealloc_deferred);
atomic64_set(&link->refcnt, 1);
link->type = type;
link->id = 0;
@@ -3056,16 +3057,17 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
/* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link)
{
+ const struct bpf_link_ops *ops = link->ops;
bool sleepable = false;
bpf_link_free_id(link->id);
if (link->prog) {
sleepable = link->prog->sleepable;
/* detach BPF program, clean up used resources */
- link->ops->release(link);
+ ops->release(link);
bpf_prog_put(link->prog);
}
- if (link->ops->dealloc_deferred) {
+ if (ops->dealloc_deferred) {
/* schedule BPF link deallocation; if underlying BPF program
* is sleepable, we need to first wait for RCU tasks trace
* sync, then go through "classic" RCU grace period
@@ -3074,9 +3076,8 @@ static void bpf_link_free(struct bpf_link *link)
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
else
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
- }
- if (link->ops->dealloc)
- link->ops->dealloc(link);
+ } else if (ops->dealloc)
+ ops->dealloc(link);
}
static void bpf_link_put_deferred(struct work_struct *work)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 77da1f438bec..36ef8e96787e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8882,7 +8882,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
enum bpf_attach_type eatype = env->prog->expected_attach_type;
enum bpf_prog_type type = resolve_prog_type(env->prog);
- if (func_id != BPF_FUNC_map_update_elem)
+ if (func_id != BPF_FUNC_map_update_elem &&
+ func_id != BPF_FUNC_map_delete_elem)
return false;
/* It's not possible to get access to a locked struct sock in these
@@ -8893,6 +8894,11 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
if (eatype == BPF_TRACE_ITER)
return true;
break;
+ case BPF_PROG_TYPE_SOCK_OPS:
+ /* map_update allowed only via dedicated helpers with event type checks */
+ if (func_id == BPF_FUNC_map_delete_elem)
+ return true;
+ break;
case BPF_PROG_TYPE_SOCKET_FILTER:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
@@ -8988,7 +8994,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_SOCKMAP:
if (func_id != BPF_FUNC_sk_redirect_map &&
func_id != BPF_FUNC_sock_map_update &&
- func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_map &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem &&
@@ -8998,7 +9003,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_SOCKHASH:
if (func_id != BPF_FUNC_sk_redirect_hash &&
func_id != BPF_FUNC_sock_hash_update &&
- func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_hash &&
func_id != BPF_FUNC_sk_select_reuseport &&
func_id != BPF_FUNC_map_lookup_elem &&
@@ -11124,7 +11128,11 @@ BTF_ID(func, bpf_iter_css_task_new)
#else
BTF_ID_UNUSED
#endif
+#ifdef CONFIG_BPF_EVENTS
BTF_ID(func, bpf_session_cookie)
+#else
+BTF_ID_UNUSED
+#endif
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
index 02205ab53b7e..4950e0b622b1 100644
--- a/kernel/dma/map_benchmark.c
+++ b/kernel/dma/map_benchmark.c
@@ -101,7 +101,6 @@ static int do_map_benchmark(struct map_benchmark_data *map)
struct task_struct **tsk;
int threads = map->bparam.threads;
int node = map->bparam.node;
- const cpumask_t *cpu_mask = cpumask_of_node(node);
u64 loops;
int ret = 0;
int i;
@@ -118,11 +117,13 @@ static int do_map_benchmark(struct map_benchmark_data *map)
if (IS_ERR(tsk[i])) {
pr_err("create dma_map thread failed\n");
ret = PTR_ERR(tsk[i]);
+ while (--i >= 0)
+ kthread_stop(tsk[i]);
goto out;
}
if (node != NUMA_NO_NODE)
- kthread_bind_mask(tsk[i], cpu_mask);
+ kthread_bind_mask(tsk[i], cpumask_of_node(node));
}
/* clear the old value in the previous benchmark */
@@ -139,13 +140,17 @@ static int do_map_benchmark(struct map_benchmark_data *map)
msleep_interruptible(map->bparam.seconds * 1000);
- /* wait for the completion of benchmark threads */
+ /* wait for the completion of all started benchmark threads */
for (i = 0; i < threads; i++) {
- ret = kthread_stop(tsk[i]);
- if (ret)
- goto out;
+ int kthread_ret = kthread_stop_put(tsk[i]);
+
+ if (kthread_ret)
+ ret = kthread_ret;
}
+ if (ret)
+ goto out;
+
loops = atomic64_read(&map->loops);
if (likely(loops > 0)) {
u64 map_variance, unmap_variance;
@@ -170,8 +175,6 @@ static int do_map_benchmark(struct map_benchmark_data *map)
}
out:
- for (i = 0; i < threads; i++)
- put_task_struct(tsk[i]);
put_device(map->dev);
kfree(tsk);
return ret;
@@ -208,7 +211,8 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
}
if (map->bparam.node != NUMA_NO_NODE &&
- !node_possible(map->bparam.node)) {
+ (map->bparam.node < 0 || map->bparam.node >= MAX_NUMNODES ||
+ !node_possible(map->bparam.node))) {
pr_err("invalid numa node\n");
return -EINVAL;
}
@@ -252,6 +256,9 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
* dma_mask changed by benchmark
*/
dma_set_mask(map->dev, old_dma_mask);
+
+ if (ret)
+ return ret;
break;
default:
return -EINVAL;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f0128c5ff278..8f908f077935 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5384,6 +5384,7 @@ int perf_event_release_kernel(struct perf_event *event)
again:
mutex_lock(&event->child_mutex);
list_for_each_entry(child, &event->child_list, child_list) {
+ void *var = NULL;
/*
* Cannot change, child events are not migrated, see the
@@ -5424,11 +5425,23 @@ again:
* this can't be the last reference.
*/
put_event(event);
+ } else {
+ var = &ctx->refcount;
}
mutex_unlock(&event->child_mutex);
mutex_unlock(&ctx->mutex);
put_ctx(ctx);
+
+ if (var) {
+ /*
+ * If perf_event_free_task() has deleted all events from the
+ * ctx while the child_mutex got released above, make sure to
+ * notify about the preceding put_ctx().
+ */
+ smp_mb(); /* pairs with wait_var_event() */
+ wake_up_var(var);
+ }
goto again;
}
mutex_unlock(&event->child_mutex);
diff --git a/kernel/exit.c b/kernel/exit.c
index cd3aa9042f1a..f95a2c1338a8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -413,10 +413,7 @@ static void coredump_task_exit(struct task_struct *tsk)
tsk->flags |= PF_POSTCOREDUMP;
core_state = tsk->signal->core_state;
spin_unlock_irq(&tsk->sighand->siglock);
-
- /* The vhost_worker does not particpate in coredumps */
- if (core_state &&
- ((tsk->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)) {
+ if (core_state) {
struct core_thread self;
self.task = current;
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 6d443ea22bb7..383fd43ac612 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -14,7 +14,12 @@ include/
arch/$SRCARCH/include/
"
-type cpio > /dev/null
+if ! command -v cpio >/dev/null; then
+ echo >&2 "***"
+ echo >&2 "*** 'cpio' could not be found."
+ echo >&2 "***"
+ exit 1
+fi
# Support incremental builds by skipping archive generation
# if timestamps of files being archived are not changed.
@@ -84,7 +89,7 @@ find $cpio_dir -type f -print0 |
# Create archive and try to normalize metadata for reproducibility.
tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
- --owner=0 --group=0 --sort=name --numeric-owner \
+ --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \
-I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null
echo $headers_md5 > kernel/kheaders.md5
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 75cadbc3c232..eb8628390156 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -70,6 +70,14 @@ static bool migrate_one_irq(struct irq_desc *desc)
}
/*
+ * Complete an eventually pending irq move cleanup. If this
+ * interrupt was moved in hard irq context, then the vectors need
+ * to be cleaned up. It can't wait until this interrupt actually
+ * happens and this CPU was involved.
+ */
+ irq_force_complete_move(desc);
+
+ /*
* No move required, if:
* - Interrupt is per cpu
* - Interrupt is not started
@@ -88,14 +96,6 @@ static bool migrate_one_irq(struct irq_desc *desc)
}
/*
- * Complete an eventually pending irq move cleanup. If this
- * interrupt was moved in hard irq context, then the vectors need
- * to be cleaned up. It can't wait until this interrupt actually
- * happens and this CPU was involved.
- */
- irq_force_complete_move(desc);
-
- /*
* If there is a setaffinity pending, then try to reuse the pending
* mask, so the last change of the affinity does not get lost. If
* there is no move pending or the pending mask does not contain
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 88ac3652fcf2..07e99c936ba5 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -160,7 +160,10 @@ static int irq_find_free_area(unsigned int from, unsigned int cnt)
static unsigned int irq_find_at_or_after(unsigned int offset)
{
unsigned long index = offset;
- struct irq_desc *desc = mt_find(&sparse_irqs, &index, nr_irqs);
+ struct irq_desc *desc;
+
+ guard(rcu)();
+ desc = mt_find(&sparse_irqs, &index, nr_irqs);
return desc ? irq_desc_get_irq(desc) : nr_irqs;
}
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index d9abb7ab031d..753b8dd42a59 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -1595,7 +1595,7 @@ int swsusp_check(bool exclusive)
put:
if (error)
- fput(hib_resume_bdev_file);
+ bdev_fput(hib_resume_bdev_file);
else
pr_debug("Image signature found, resuming\n");
} else {
diff --git a/kernel/signal.c b/kernel/signal.c
index 01c4c46a51a8..1f9dd41c04be 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1375,9 +1375,7 @@ int zap_other_threads(struct task_struct *p)
for_other_threads(p, t) {
task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- /* Don't require de_thread to wait for the vhost_worker */
- if ((t->flags & (PF_IO_WORKER | PF_USER_WORKER)) != PF_USER_WORKER)
- count++;
+ count++;
/* Don't bother with already dead threads */
if (t->exit_state)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index faad00cce269..d7eee421d4bc 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -196,6 +196,7 @@ COND_SYSCALL(migrate_pages);
COND_SYSCALL(move_pages);
COND_SYSCALL(set_mempolicy_home_node);
COND_SYSCALL(cachestat);
+COND_SYSCALL(mseal);
COND_SYSCALL(perf_event_open);
COND_SYSCALL(accept4);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f5154c051d2c..d1daeab1bbc1 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3295,7 +3295,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
struct bpf_run_ctx *old_run_ctx;
int err = 0;
- if (link->task && current != link->task)
+ if (link->task && current->mm != link->task->mm)
return 0;
if (sleepable)
@@ -3396,8 +3396,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
cnt = attr->link_create.uprobe_multi.cnt;
+ pid = attr->link_create.uprobe_multi.pid;
- if (!upath || !uoffsets || !cnt)
+ if (!upath || !uoffsets || !cnt || pid < 0)
return -EINVAL;
if (cnt > MAX_UPROBE_MULTI_CNT)
return -E2BIG;
@@ -3421,11 +3422,8 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
goto error_path_put;
}
- pid = attr->link_create.uprobe_multi.pid;
if (pid) {
- rcu_read_lock();
- task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
- rcu_read_unlock();
+ task = get_pid_task(find_vpid(pid), PIDTYPE_TGID);
if (!task) {
err = -ESRCH;
goto error_path_put;
@@ -3519,7 +3517,6 @@ static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
}
#endif /* CONFIG_UPROBES */
-#ifdef CONFIG_FPROBE
__bpf_kfunc_start_defs();
__bpf_kfunc bool bpf_session_is_return(void)
@@ -3568,4 +3565,3 @@ static int __init bpf_kprobe_multi_kfuncs_init(void)
}
late_initcall(bpf_kprobe_multi_kfuncs_init);
-#endif
diff --git a/kernel/trace/bpf_trace.h b/kernel/trace/bpf_trace.h
index 9acbc11ac7bb..c4075b56becc 100644
--- a/kernel/trace/bpf_trace.h
+++ b/kernel/trace/bpf_trace.h
@@ -19,7 +19,7 @@ TRACE_EVENT(bpf_trace_printk,
),
TP_fast_assign(
- __assign_str(bpf_string, bpf_string);
+ __assign_str(bpf_string);
),
TP_printk("%s", __get_str(bpf_string))
diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c
index 8c4ffd076162..cb0871fbdb07 100644
--- a/kernel/trace/preemptirq_delay_test.c
+++ b/kernel/trace/preemptirq_delay_test.c
@@ -215,4 +215,5 @@ static void __exit preemptirq_delay_exit(void)
module_init(preemptirq_delay_init)
module_exit(preemptirq_delay_exit)
+MODULE_DESCRIPTION("Preempt / IRQ disable delay thread to test latency tracers");
MODULE_LICENSE("GPL v2");
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7345a8b625fb..28853966aa9a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1460,6 +1460,11 @@ static void rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
*
* As a safety measure we check to make sure the data pages have not
* been corrupted.
+ *
+ * Callers of this function need to guarantee that the list of pages doesn't get
+ * modified during the check. In particular, if it's possible that the function
+ * is invoked with concurrent readers which can swap in a new reader page then
+ * the caller should take cpu_buffer->reader_lock.
*/
static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
{
@@ -2210,8 +2215,12 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
*/
synchronize_rcu();
for_each_buffer_cpu(buffer, cpu) {
+ unsigned long flags;
+
cpu_buffer = buffer->buffers[cpu];
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
rb_check_pages(cpu_buffer);
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
}
atomic_dec(&buffer->record_disabled);
}
@@ -5046,13 +5055,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume);
* @flags: gfp flags to use for memory allocation
*
* This performs the initial preparations necessary to iterate
- * through the buffer. Memory is allocated, buffer recording
+ * through the buffer. Memory is allocated, buffer resizing
* is disabled, and the iterator pointer is returned to the caller.
*
- * Disabling buffer recording prevents the reading from being
- * corrupted. This is not a consuming read, so a producer is not
- * expected.
- *
* After a sequence of ring_buffer_read_prepare calls, the user is
* expected to make at least one call to ring_buffer_read_prepare_sync.
* Afterwards, ring_buffer_read_start is invoked to get things going
@@ -5139,8 +5144,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_start);
* ring_buffer_read_finish - finish reading the iterator of the buffer
* @iter: The iterator retrieved by ring_buffer_start
*
- * This re-enables the recording to the buffer, and frees the
- * iterator.
+ * This re-enables resizing of the buffer, and frees the iterator.
*/
void
ring_buffer_read_finish(struct ring_buffer_iter *iter)
@@ -5148,12 +5152,7 @@ ring_buffer_read_finish(struct ring_buffer_iter *iter)
struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
unsigned long flags;
- /*
- * Ring buffer is disabled from recording, here's a good place
- * to check the integrity of the ring buffer.
- * Must prevent readers from trying to read, as the check
- * clears the HEAD page and readers require it.
- */
+ /* Use this opportunity to check the integrity of the ring buffer. */
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
rb_check_pages(cpu_buffer);
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 2f68e93fff0b..df0745a42a3f 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -245,6 +245,7 @@ static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync)
/**
* rv_disable_monitor - disable a given runtime monitor
+ * @mdef: Pointer to the monitor definition structure.
*
* Returns 0 on success.
*/
@@ -256,6 +257,7 @@ int rv_disable_monitor(struct rv_monitor_def *mdef)
/**
* rv_enable_monitor - enable a given runtime monitor
+ * @mdef: Pointer to the monitor definition structure.
*
* Returns 0 on success, error otherwise.
*/
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 5e263c141574..39877c80d6cb 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -554,6 +554,10 @@ static int parse_btf_field(char *fieldname, const struct btf_type *type,
anon_offs = 0;
field = btf_find_struct_member(ctx->btf, type, fieldname,
&anon_offs);
+ if (IS_ERR(field)) {
+ trace_probe_log_err(ctx->offset, BAD_BTF_TID);
+ return PTR_ERR(field);
+ }
if (!field) {
trace_probe_log_err(ctx->offset, NO_BTF_FIELD);
return -ENOENT;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8541fa1494ae..c98e3b3386ba 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -970,19 +970,17 @@ static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu,
static void __uprobe_trace_func(struct trace_uprobe *tu,
unsigned long func, struct pt_regs *regs,
- struct uprobe_cpu_buffer **ucbp,
+ struct uprobe_cpu_buffer *ucb,
struct trace_event_file *trace_file)
{
struct uprobe_trace_entry_head *entry;
struct trace_event_buffer fbuffer;
- struct uprobe_cpu_buffer *ucb;
void *data;
int size, esize;
struct trace_event_call *call = trace_probe_event_call(&tu->tp);
WARN_ON(call != trace_file->event_call);
- ucb = prepare_uprobe_buffer(tu, regs, ucbp);
if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE))
return;
@@ -1014,13 +1012,16 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
struct uprobe_cpu_buffer **ucbp)
{
struct event_file_link *link;
+ struct uprobe_cpu_buffer *ucb;
if (is_ret_probe(tu))
return 0;
+ ucb = prepare_uprobe_buffer(tu, regs, ucbp);
+
rcu_read_lock();
trace_probe_for_each_link_rcu(link, &tu->tp)
- __uprobe_trace_func(tu, 0, regs, ucbp, link->file);
+ __uprobe_trace_func(tu, 0, regs, ucb, link->file);
rcu_read_unlock();
return 0;
@@ -1031,10 +1032,13 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
struct uprobe_cpu_buffer **ucbp)
{
struct event_file_link *link;
+ struct uprobe_cpu_buffer *ucb;
+
+ ucb = prepare_uprobe_buffer(tu, regs, ucbp);
rcu_read_lock();
trace_probe_for_each_link_rcu(link, &tu->tp)
- __uprobe_trace_func(tu, func, regs, ucbp, link->file);
+ __uprobe_trace_func(tu, func, regs, ucb, link->file);
rcu_read_unlock();
}
diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c
index da35e5b7f047..8800f5acc007 100644
--- a/kernel/vhost_task.c
+++ b/kernel/vhost_task.c
@@ -10,38 +10,32 @@
enum vhost_task_flags {
VHOST_TASK_FLAGS_STOP,
+ VHOST_TASK_FLAGS_KILLED,
};
struct vhost_task {
bool (*fn)(void *data);
+ void (*handle_sigkill)(void *data);
void *data;
struct completion exited;
unsigned long flags;
struct task_struct *task;
+ /* serialize SIGKILL and vhost_task_stop calls */
+ struct mutex exit_mutex;
};
static int vhost_task_fn(void *data)
{
struct vhost_task *vtsk = data;
- bool dead = false;
for (;;) {
bool did_work;
- if (!dead && signal_pending(current)) {
+ if (signal_pending(current)) {
struct ksignal ksig;
- /*
- * Calling get_signal will block in SIGSTOP,
- * or clear fatal_signal_pending, but remember
- * what was set.
- *
- * This thread won't actually exit until all
- * of the file descriptors are closed, and
- * the release function is called.
- */
- dead = get_signal(&ksig);
- if (dead)
- clear_thread_flag(TIF_SIGPENDING);
+
+ if (get_signal(&ksig))
+ break;
}
/* mb paired w/ vhost_task_stop */
@@ -57,7 +51,19 @@ static int vhost_task_fn(void *data)
schedule();
}
+ mutex_lock(&vtsk->exit_mutex);
+ /*
+ * If a vhost_task_stop and SIGKILL race, we can ignore the SIGKILL.
+ * When the vhost layer has called vhost_task_stop it's already stopped
+ * new work and flushed.
+ */
+ if (!test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) {
+ set_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags);
+ vtsk->handle_sigkill(vtsk->data);
+ }
+ mutex_unlock(&vtsk->exit_mutex);
complete(&vtsk->exited);
+
do_exit(0);
}
@@ -78,12 +84,17 @@ EXPORT_SYMBOL_GPL(vhost_task_wake);
* @vtsk: vhost_task to stop
*
* vhost_task_fn ensures the worker thread exits after
- * VHOST_TASK_FLAGS_SOP becomes true.
+ * VHOST_TASK_FLAGS_STOP becomes true.
*/
void vhost_task_stop(struct vhost_task *vtsk)
{
- set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
- vhost_task_wake(vtsk);
+ mutex_lock(&vtsk->exit_mutex);
+ if (!test_bit(VHOST_TASK_FLAGS_KILLED, &vtsk->flags)) {
+ set_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags);
+ vhost_task_wake(vtsk);
+ }
+ mutex_unlock(&vtsk->exit_mutex);
+
/*
* Make sure vhost_task_fn is no longer accessing the vhost_task before
* freeing it below.
@@ -96,14 +107,16 @@ EXPORT_SYMBOL_GPL(vhost_task_stop);
/**
* vhost_task_create - create a copy of a task to be used by the kernel
* @fn: vhost worker function
- * @arg: data to be passed to fn
+ * @handle_sigkill: vhost function to handle when we are killed
+ * @arg: data to be passed to fn and handled_kill
* @name: the thread's name
*
* This returns a specialized task for use by the vhost layer or NULL on
* failure. The returned task is inactive, and the caller must fire it up
* through vhost_task_start().
*/
-struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg,
+struct vhost_task *vhost_task_create(bool (*fn)(void *),
+ void (*handle_sigkill)(void *), void *arg,
const char *name)
{
struct kernel_clone_args args = {
@@ -122,8 +135,10 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg,
if (!vtsk)
return NULL;
init_completion(&vtsk->exited);
+ mutex_init(&vtsk->exit_mutex);
vtsk->data = arg;
vtsk->fn = fn;
+ vtsk->handle_sigkill = handle_sigkill;
args.fn_arg = vtsk;