aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c622
-rw-r--r--kernel/audit.h15
-rw-r--r--kernel/auditsc.c29
-rw-r--r--kernel/bpf/core.c12
-rw-r--r--kernel/bpf/hashtab.c144
-rw-r--r--kernel/bpf/syscall.c8
-rw-r--r--kernel/bpf/verifier.c64
-rw-r--r--kernel/cgroup/cgroup.c9
-rw-r--r--kernel/irq/affinity.c20
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/locking/lockdep_internals.h6
-rw-r--r--kernel/padata.c5
-rw-r--r--kernel/ptrace.c14
-rw-r--r--kernel/sched/clock.c46
-rw-r--r--kernel/sched/cpufreq_schedutil.c20
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/trace/ftrace.c29
-rw-r--r--kernel/trace/ring_buffer.c24
-rw-r--r--kernel/trace/trace.c9
-rw-r--r--kernel/trace/trace.h2
20 files changed, 651 insertions, 433 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index e794544f5e63..a871bf80fde1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -54,6 +54,10 @@
#include <linux/kthread.h>
#include <linux/kernel.h>
#include <linux/syscalls.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/gfp.h>
#include <linux/audit.h>
@@ -90,13 +94,34 @@ static u32 audit_default;
/* If auditing cannot proceed, audit_failure selects what happens. */
static u32 audit_failure = AUDIT_FAIL_PRINTK;
-/*
- * If audit records are to be written to the netlink socket, audit_pid
- * contains the pid of the auditd process and audit_nlk_portid contains
- * the portid to use to send netlink messages to that process.
+/* private audit network namespace index */
+static unsigned int audit_net_id;
+
+/**
+ * struct audit_net - audit private network namespace data
+ * @sk: communication socket
*/
-int audit_pid;
-static __u32 audit_nlk_portid;
+struct audit_net {
+ struct sock *sk;
+};
+
+/**
+ * struct auditd_connection - kernel/auditd connection state
+ * @pid: auditd PID
+ * @portid: netlink portid
+ * @net: the associated network namespace
+ * @lock: spinlock to protect write access
+ *
+ * Description:
+ * This struct is RCU protected; you must either hold the RCU lock for reading
+ * or the included spinlock for writing.
+ */
+static struct auditd_connection {
+ int pid;
+ u32 portid;
+ struct net *net;
+ spinlock_t lock;
+} auditd_conn;
/* If audit_rate_limit is non-zero, limit the rate of sending audit records
* to that number per second. This prevents DoS attacks, but results in
@@ -123,10 +148,6 @@ u32 audit_sig_sid = 0;
*/
static atomic_t audit_lost = ATOMIC_INIT(0);
-/* The netlink socket. */
-static struct sock *audit_sock;
-static unsigned int audit_net_id;
-
/* Hash for inode-based rules */
struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
@@ -192,6 +213,43 @@ struct audit_reply {
struct sk_buff *skb;
};
+/**
+ * auditd_test_task - Check to see if a given task is an audit daemon
+ * @task: the task to check
+ *
+ * Description:
+ * Return 1 if the task is a registered audit daemon, 0 otherwise.
+ */
+int auditd_test_task(const struct task_struct *task)
+{
+ int rc;
+
+ rcu_read_lock();
+ rc = (auditd_conn.pid && task->tgid == auditd_conn.pid ? 1 : 0);
+ rcu_read_unlock();
+
+ return rc;
+}
+
+/**
+ * audit_get_sk - Return the audit socket for the given network namespace
+ * @net: the destination network namespace
+ *
+ * Description:
+ * Returns the sock pointer if valid, NULL otherwise. The caller must ensure
+ * that a reference is held for the network namespace while the sock is in use.
+ */
+static struct sock *audit_get_sk(const struct net *net)
+{
+ struct audit_net *aunet;
+
+ if (!net)
+ return NULL;
+
+ aunet = net_generic(net, audit_net_id);
+ return aunet->sk;
+}
+
static void audit_set_portid(struct audit_buffer *ab, __u32 portid)
{
if (ab) {
@@ -210,9 +268,7 @@ void audit_panic(const char *message)
pr_err("%s\n", message);
break;
case AUDIT_FAIL_PANIC:
- /* test audit_pid since printk is always losey, why bother? */
- if (audit_pid)
- panic("audit: %s\n", message);
+ panic("audit: %s\n", message);
break;
}
}
@@ -370,21 +426,60 @@ static int audit_set_failure(u32 state)
return audit_do_config_change("audit_failure", &audit_failure, state);
}
-/*
- * For one reason or another this nlh isn't getting delivered to the userspace
- * audit daemon, just send it to printk.
+/**
+ * auditd_set - Set/Reset the auditd connection state
+ * @pid: auditd PID
+ * @portid: auditd netlink portid
+ * @net: auditd network namespace pointer
+ *
+ * Description:
+ * This function will obtain and drop network namespace references as
+ * necessary.
+ */
+static void auditd_set(int pid, u32 portid, struct net *net)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&auditd_conn.lock, flags);
+ auditd_conn.pid = pid;
+ auditd_conn.portid = portid;
+ if (auditd_conn.net)
+ put_net(auditd_conn.net);
+ if (net)
+ auditd_conn.net = get_net(net);
+ else
+ auditd_conn.net = NULL;
+ spin_unlock_irqrestore(&auditd_conn.lock, flags);
+}
+
+/**
+ * kauditd_print_skb - Print the audit record to the ring buffer
+ * @skb: audit record
+ *
+ * Whatever the reason, this packet may not make it to the auditd connection
+ * so write it via printk so the information isn't completely lost.
*/
static void kauditd_printk_skb(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
char *data = nlmsg_data(nlh);
- if (nlh->nlmsg_type != AUDIT_EOE) {
- if (printk_ratelimit())
- pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
- else
- audit_log_lost("printk limit exceeded");
- }
+ if (nlh->nlmsg_type != AUDIT_EOE && printk_ratelimit())
+ pr_notice("type=%d %s\n", nlh->nlmsg_type, data);
+}
+
+/**
+ * kauditd_rehold_skb - Handle a audit record send failure in the hold queue
+ * @skb: audit record
+ *
+ * Description:
+ * This should only be used by the kauditd_thread when it fails to flush the
+ * hold queue.
+ */
+static void kauditd_rehold_skb(struct sk_buff *skb)
+{
+ /* put the record back in the queue at the same place */
+ skb_queue_head(&audit_hold_queue, skb);
}
/**
@@ -444,65 +539,163 @@ static void kauditd_retry_skb(struct sk_buff *skb)
* auditd_reset - Disconnect the auditd connection
*
* Description:
- * Break the auditd/kauditd connection and move all the records in the retry
- * queue into the hold queue in case auditd reconnects. The audit_cmd_mutex
- * must be held when calling this function.
+ * Break the auditd/kauditd connection and move all the queued records into the
+ * hold queue in case auditd reconnects.
*/
static void auditd_reset(void)
{
struct sk_buff *skb;
- /* break the connection */
- if (audit_sock) {
- sock_put(audit_sock);
- audit_sock = NULL;
- }
- audit_pid = 0;
- audit_nlk_portid = 0;
+ /* if it isn't already broken, break the connection */
+ rcu_read_lock();
+ if (auditd_conn.pid)
+ auditd_set(0, 0, NULL);
+ rcu_read_unlock();
- /* flush all of the retry queue to the hold queue */
+ /* flush all of the main and retry queues to the hold queue */
while ((skb = skb_dequeue(&audit_retry_queue)))
kauditd_hold_skb(skb);
+ while ((skb = skb_dequeue(&audit_queue)))
+ kauditd_hold_skb(skb);
}
/**
- * kauditd_send_unicast_skb - Send a record via unicast to auditd
+ * auditd_send_unicast_skb - Send a record via unicast to auditd
* @skb: audit record
+ *
+ * Description:
+ * Send a skb to the audit daemon, returns positive/zero values on success and
+ * negative values on failure; in all cases the skb will be consumed by this
+ * function. If the send results in -ECONNREFUSED the connection with auditd
+ * will be reset. This function may sleep so callers should not hold any locks
+ * where this would cause a problem.
*/
-static int kauditd_send_unicast_skb(struct sk_buff *skb)
+static int auditd_send_unicast_skb(struct sk_buff *skb)
{
int rc;
+ u32 portid;
+ struct net *net;
+ struct sock *sk;
+
+ /* NOTE: we can't call netlink_unicast while in the RCU section so
+ * take a reference to the network namespace and grab local
+ * copies of the namespace, the sock, and the portid; the
+ * namespace and sock aren't going to go away while we hold a
+ * reference and if the portid does become invalid after the RCU
+ * section netlink_unicast() should safely return an error */
+
+ rcu_read_lock();
+ if (!auditd_conn.pid) {
+ rcu_read_unlock();
+ rc = -ECONNREFUSED;
+ goto err;
+ }
+ net = auditd_conn.net;
+ get_net(net);
+ sk = audit_get_sk(net);
+ portid = auditd_conn.portid;
+ rcu_read_unlock();
- /* if we know nothing is connected, don't even try the netlink call */
- if (!audit_pid)
- return -ECONNREFUSED;
+ rc = netlink_unicast(sk, skb, portid, 0);
+ put_net(net);
+ if (rc < 0)
+ goto err;
- /* get an extra skb reference in case we fail to send */
- skb_get(skb);
- rc = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
- if (rc >= 0) {
- consume_skb(skb);
- rc = 0;
- }
+ return rc;
+err:
+ if (rc == -ECONNREFUSED)
+ auditd_reset();
return rc;
}
+/**
+ * kauditd_send_queue - Helper for kauditd_thread to flush skb queues
+ * @sk: the sending sock
+ * @portid: the netlink destination
+ * @queue: the skb queue to process
+ * @retry_limit: limit on number of netlink unicast failures
+ * @skb_hook: per-skb hook for additional processing
+ * @err_hook: hook called if the skb fails the netlink unicast send
+ *
+ * Description:
+ * Run through the given queue and attempt to send the audit records to auditd,
+ * returns zero on success, negative values on failure. It is up to the caller
+ * to ensure that the @sk is valid for the duration of this function.
+ *
+ */
+static int kauditd_send_queue(struct sock *sk, u32 portid,
+ struct sk_buff_head *queue,
+ unsigned int retry_limit,
+ void (*skb_hook)(struct sk_buff *skb),
+ void (*err_hook)(struct sk_buff *skb))
+{
+ int rc = 0;
+ struct sk_buff *skb;
+ static unsigned int failed = 0;
+
+ /* NOTE: kauditd_thread takes care of all our locking, we just use
+ * the netlink info passed to us (e.g. sk and portid) */
+
+ while ((skb = skb_dequeue(queue))) {
+ /* call the skb_hook for each skb we touch */
+ if (skb_hook)
+ (*skb_hook)(skb);
+
+ /* can we send to anyone via unicast? */
+ if (!sk) {
+ if (err_hook)
+ (*err_hook)(skb);
+ continue;
+ }
+
+ /* grab an extra skb reference in case of error */
+ skb_get(skb);
+ rc = netlink_unicast(sk, skb, portid, 0);
+ if (rc < 0) {
+ /* fatal failure for our queue flush attempt? */
+ if (++failed >= retry_limit ||
+ rc == -ECONNREFUSED || rc == -EPERM) {
+ /* yes - error processing for the queue */
+ sk = NULL;
+ if (err_hook)
+ (*err_hook)(skb);
+ if (!skb_hook)
+ goto out;
+ /* keep processing with the skb_hook */
+ continue;
+ } else
+ /* no - requeue to preserve ordering */
+ skb_queue_head(queue, skb);
+ } else {
+ /* it worked - drop the extra reference and continue */
+ consume_skb(skb);
+ failed = 0;
+ }
+ }
+
+out:
+ return (rc >= 0 ? 0 : rc);
+}
+
/*
* kauditd_send_multicast_skb - Send a record to any multicast listeners
* @skb: audit record
*
* Description:
- * This function doesn't consume an skb as might be expected since it has to
- * copy it anyways.
+ * Write a multicast message to anyone listening in the initial network
+ * namespace. This function doesn't consume an skb as might be expected since
+ * it has to copy it anyways.
*/
static void kauditd_send_multicast_skb(struct sk_buff *skb)
{
struct sk_buff *copy;
- struct audit_net *aunet = net_generic(&init_net, audit_net_id);
- struct sock *sock = aunet->nlsk;
+ struct sock *sock = audit_get_sk(&init_net);
struct nlmsghdr *nlh;
+ /* NOTE: we are not taking an additional reference for init_net since
+ * we don't have to worry about it going away */
+
if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
return;
@@ -526,149 +719,79 @@ static void kauditd_send_multicast_skb(struct sk_buff *skb)
}
/**
- * kauditd_wake_condition - Return true when it is time to wake kauditd_thread
- *
- * Description:
- * This function is for use by the wait_event_freezable() call in
- * kauditd_thread().
+ * kauditd_thread - Worker thread to send audit records to userspace
+ * @dummy: unused
*/
-static int kauditd_wake_condition(void)
-{
- static int pid_last = 0;
- int rc;
- int pid = audit_pid;
-
- /* wake on new messages or a change in the connected auditd */
- rc = skb_queue_len(&audit_queue) || (pid && pid != pid_last);
- if (rc)
- pid_last = pid;
-
- return rc;
-}
-
static int kauditd_thread(void *dummy)
{
int rc;
- int auditd = 0;
- int reschedule = 0;
- struct sk_buff *skb;
- struct nlmsghdr *nlh;
+ u32 portid = 0;
+ struct net *net = NULL;
+ struct sock *sk = NULL;
#define UNICAST_RETRIES 5
-#define AUDITD_BAD(x,y) \
- ((x) == -ECONNREFUSED || (x) == -EPERM || ++(y) >= UNICAST_RETRIES)
-
- /* NOTE: we do invalidate the auditd connection flag on any sending
- * errors, but we only "restore" the connection flag at specific places
- * in the loop in order to help ensure proper ordering of audit
- * records */
set_freezable();
while (!kthread_should_stop()) {
- /* NOTE: possible area for future improvement is to look at
- * the hold and retry queues, since only this thread
- * has access to these queues we might be able to do
- * our own queuing and skip some/all of the locking */
-
- /* NOTE: it might be a fun experiment to split the hold and
- * retry queue handling to another thread, but the
- * synchronization issues and other overhead might kill
- * any performance gains */
+ /* NOTE: see the lock comments in auditd_send_unicast_skb() */
+ rcu_read_lock();
+ if (!auditd_conn.pid) {
+ rcu_read_unlock();
+ goto main_queue;
+ }
+ net = auditd_conn.net;
+ get_net(net);
+ sk = audit_get_sk(net);
+ portid = auditd_conn.portid;
+ rcu_read_unlock();
/* attempt to flush the hold queue */
- while (auditd && (skb = skb_dequeue(&audit_hold_queue))) {
- rc = kauditd_send_unicast_skb(skb);
- if (rc) {
- /* requeue to the same spot */
- skb_queue_head(&audit_hold_queue, skb);
-
- auditd = 0;
- if (AUDITD_BAD(rc, reschedule)) {
- mutex_lock(&audit_cmd_mutex);
- auditd_reset();
- mutex_unlock(&audit_cmd_mutex);
- reschedule = 0;
- }
- } else
- /* we were able to send successfully */
- reschedule = 0;
+ rc = kauditd_send_queue(sk, portid,
+ &audit_hold_queue, UNICAST_RETRIES,
+ NULL, kauditd_rehold_skb);
+ if (rc < 0) {
+ sk = NULL;
+ auditd_reset();
+ goto main_queue;
}
/* attempt to flush the retry queue */
- while (auditd && (skb = skb_dequeue(&audit_retry_queue))) {
- rc = kauditd_send_unicast_skb(skb);
- if (rc) {
- auditd = 0;
- if (AUDITD_BAD(rc, reschedule)) {
- kauditd_hold_skb(skb);
- mutex_lock(&audit_cmd_mutex);
- auditd_reset();
- mutex_unlock(&audit_cmd_mutex);
- reschedule = 0;
- } else
- /* temporary problem (we hope), queue
- * to the same spot and retry */
- skb_queue_head(&audit_retry_queue, skb);
- } else
- /* we were able to send successfully */
- reschedule = 0;
+ rc = kauditd_send_queue(sk, portid,
+ &audit_retry_queue, UNICAST_RETRIES,
+ NULL, kauditd_hold_skb);
+ if (rc < 0) {
+ sk = NULL;
+ auditd_reset();
+ goto main_queue;
}
- /* standard queue processing, try to be as quick as possible */
-quick_loop:
- skb = skb_dequeue(&audit_queue);
- if (skb) {
- /* setup the netlink header, see the comments in
- * kauditd_send_multicast_skb() for length quirks */
- nlh = nlmsg_hdr(skb);
- nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
-
- /* attempt to send to any multicast listeners */
- kauditd_send_multicast_skb(skb);
-
- /* attempt to send to auditd, queue on failure */
- if (auditd) {
- rc = kauditd_send_unicast_skb(skb);
- if (rc) {
- auditd = 0;
- if (AUDITD_BAD(rc, reschedule)) {
- mutex_lock(&audit_cmd_mutex);
- auditd_reset();
- mutex_unlock(&audit_cmd_mutex);
- reschedule = 0;
- }
-
- /* move to the retry queue */
- kauditd_retry_skb(skb);
- } else
- /* everything is working so go fast! */
- goto quick_loop;
- } else if (reschedule)
- /* we are currently having problems, move to
- * the retry queue */
- kauditd_retry_skb(skb);
- else
- /* dump the message via printk and hold it */
- kauditd_hold_skb(skb);
- } else {
- /* we have flushed the backlog so wake everyone */
- wake_up(&audit_backlog_wait);
-
- /* if everything is okay with auditd (if present), go
- * to sleep until there is something new in the queue
- * or we have a change in the connected auditd;
- * otherwise simply reschedule to give things a chance
- * to recover */
- if (reschedule) {
- set_current_state(TASK_INTERRUPTIBLE);
- schedule();
- } else
- wait_event_freezable(kauditd_wait,
- kauditd_wake_condition());
-
- /* update the auditd connection status */
- auditd = (audit_pid ? 1 : 0);
+main_queue:
+ /* process the main queue - do the multicast send and attempt
+ * unicast, dump failed record sends to the retry queue; if
+ * sk == NULL due to previous failures we will just do the
+ * multicast send and move the record to the retry queue */
+ rc = kauditd_send_queue(sk, portid, &audit_queue, 1,
+ kauditd_send_multicast_skb,
+ kauditd_retry_skb);
+ if (sk == NULL || rc < 0)
+ auditd_reset();
+ sk = NULL;
+
+ /* drop our netns reference, no auditd sends past this line */
+ if (net) {
+ put_net(net);
+ net = NULL;
}
+
+ /* we have processed all the queues so wake everyone */
+ wake_up(&audit_backlog_wait);
+
+ /* NOTE: we want to wake up if there is anything on the queue,
+ * regardless of if an auditd is connected, as we need to
+ * do the multicast send and rotate records from the
+ * main queue to the retry/hold queues */
+ wait_event_freezable(kauditd_wait,
+ (skb_queue_len(&audit_queue) ? 1 : 0));
}
return 0;
@@ -678,17 +801,16 @@ int audit_send_list(void *_dest)
{
struct audit_netlink_list *dest = _dest;
struct sk_buff *skb;
- struct net *net = dest->net;
- struct audit_net *aunet = net_generic(net, audit_net_id);
+ struct sock *sk = audit_get_sk(dest->net);
/* wait for parent to finish and send an ACK */
mutex_lock(&audit_cmd_mutex);
mutex_unlock(&audit_cmd_mutex);
while ((skb = __skb_dequeue(&dest->q)) != NULL)
- netlink_unicast(aunet->nlsk, skb, dest->portid, 0);
+ netlink_unicast(sk, skb, dest->portid, 0);
- put_net(net);
+ put_net(dest->net);
kfree(dest);
return 0;
@@ -722,16 +844,15 @@ out_kfree_skb:
static int audit_send_reply_thread(void *arg)
{
struct audit_reply *reply = (struct audit_reply *)arg;
- struct net *net = reply->net;
- struct audit_net *aunet = net_generic(net, audit_net_id);
+ struct sock *sk = audit_get_sk(reply->net);
mutex_lock(&audit_cmd_mutex);
mutex_unlock(&audit_cmd_mutex);
/* Ignore failure. It'll only happen if the sender goes away,
because our timeout is set to infinite. */
- netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0);
- put_net(net);
+ netlink_unicast(sk, reply->skb, reply->portid, 0);
+ put_net(reply->net);
kfree(reply);
return 0;
}
@@ -949,12 +1070,12 @@ static int audit_set_feature(struct sk_buff *skb)
static int audit_replace(pid_t pid)
{
- struct sk_buff *skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0,
- &pid, sizeof(pid));
+ struct sk_buff *skb;
+ skb = audit_make_reply(0, 0, AUDIT_REPLACE, 0, 0, &pid, sizeof(pid));
if (!skb)
return -ENOMEM;
- return netlink_unicast(audit_sock, skb, audit_nlk_portid, 0);
+ return auditd_send_unicast_skb(skb);
}
static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -981,7 +1102,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
memset(&s, 0, sizeof(s));
s.enabled = audit_enabled;
s.failure = audit_failure;
- s.pid = audit_pid;
+ rcu_read_lock();
+ s.pid = auditd_conn.pid;
+ rcu_read_unlock();
s.rate_limit = audit_rate_limit;
s.backlog_limit = audit_backlog_limit;
s.lost = atomic_read(&audit_lost);
@@ -1014,30 +1137,44 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
* from the initial pid namespace, but something
* to keep in mind if this changes */
int new_pid = s.pid;
+ pid_t auditd_pid;
pid_t requesting_pid = task_tgid_vnr(current);
- if ((!new_pid) && (requesting_pid != audit_pid)) {
- audit_log_config_change("audit_pid", new_pid, audit_pid, 0);
+ /* test the auditd connection */
+ audit_replace(requesting_pid);
+
+ rcu_read_lock();
+ auditd_pid = auditd_conn.pid;
+ /* only the current auditd can unregister itself */
+ if ((!new_pid) && (requesting_pid != auditd_pid)) {
+ rcu_read_unlock();
+ audit_log_config_change("audit_pid", new_pid,
+ auditd_pid, 0);
return -EACCES;
}
- if (audit_pid && new_pid &&
- audit_replace(requesting_pid) != -ECONNREFUSED) {
- audit_log_config_change("audit_pid", new_pid, audit_pid, 0);
+ /* replacing a healthy auditd is not allowed */
+ if (auditd_pid && new_pid) {
+ rcu_read_unlock();
+ audit_log_config_change("audit_pid", new_pid,
+ auditd_pid, 0);
return -EEXIST;
}
+ rcu_read_unlock();
+
if (audit_enabled != AUDIT_OFF)
- audit_log_config_change("audit_pid", new_pid, audit_pid, 1);
+ audit_log_config_change("audit_pid", new_pid,
+ auditd_pid, 1);
+
if (new_pid) {
- if (audit_sock)
- sock_put(audit_sock);
- audit_pid = new_pid;
- audit_nlk_portid = NETLINK_CB(skb).portid;
- sock_hold(skb->sk);
- audit_sock = skb->sk;
- } else {
+ /* register a new auditd connection */
+ auditd_set(new_pid,
+ NETLINK_CB(skb).portid,
+ sock_net(NETLINK_CB(skb).sk));
+ /* try to process any backlog */
+ wake_up_interruptible(&kauditd_wait);
+ } else
+ /* unregister the auditd connection */
auditd_reset();
- }
- wake_up_interruptible(&kauditd_wait);
}
if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
err = audit_set_rate_limit(s.rate_limit);
@@ -1090,7 +1227,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (err)
break;
}
- mutex_unlock(&audit_cmd_mutex);
audit_log_common_recv_msg(&ab, msg_type);
if (msg_type != AUDIT_USER_TTY)
audit_log_format(ab, " msg='%.*s'",
@@ -1108,7 +1244,6 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
}
audit_set_portid(ab, NETLINK_CB(skb).portid);
audit_log_end(ab);
- mutex_lock(&audit_cmd_mutex);
}
break;
case AUDIT_ADD_RULE:
@@ -1298,26 +1433,26 @@ static int __net_init audit_net_init(struct net *net)
struct audit_net *aunet = net_generic(net, audit_net_id);
- aunet->nlsk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg);
- if (aunet->nlsk == NULL) {
+ aunet->sk = netlink_kernel_create(net, NETLINK_AUDIT, &cfg);
+ if (aunet->sk == NULL) {
audit_panic("cannot initialize netlink socket in namespace");
return -ENOMEM;
}
- aunet->nlsk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+ aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+
return 0;
}
static void __net_exit audit_net_exit(struct net *net)
{
struct audit_net *aunet = net_generic(net, audit_net_id);
- struct sock *sock = aunet->nlsk;
- mutex_lock(&audit_cmd_mutex);
- if (sock == audit_sock)
+
+ rcu_read_lock();
+ if (net == auditd_conn.net)
auditd_reset();
- mutex_unlock(&audit_cmd_mutex);
+ rcu_read_unlock();
- netlink_kernel_release(sock);
- aunet->nlsk = NULL;
+ netlink_kernel_release(aunet->sk);
}
static struct pernet_operations audit_net_ops __net_initdata = {
@@ -1335,20 +1470,24 @@ static int __init audit_init(void)
if (audit_initialized == AUDIT_DISABLED)
return 0;
- pr_info("initializing netlink subsys (%s)\n",
- audit_default ? "enabled" : "disabled");
- register_pernet_subsys(&audit_net_ops);
+ memset(&auditd_conn, 0, sizeof(auditd_conn));
+ spin_lock_init(&auditd_conn.lock);
skb_queue_head_init(&audit_queue);
skb_queue_head_init(&audit_retry_queue);
skb_queue_head_init(&audit_hold_queue);
- audit_initialized = AUDIT_INITIALIZED;
- audit_enabled = audit_default;
- audit_ever_enabled |= !!audit_default;
for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
INIT_LIST_HEAD(&audit_inode_hash[i]);
+ pr_info("initializing netlink subsys (%s)\n",
+ audit_default ? "enabled" : "disabled");
+ register_pernet_subsys(&audit_net_ops);
+
+ audit_initialized = AUDIT_INITIALIZED;
+ audit_enabled = audit_default;
+ audit_ever_enabled |= !!audit_default;
+
kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
if (IS_ERR(kauditd_task)) {
int err = PTR_ERR(kauditd_task);
@@ -1519,20 +1658,16 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
if (unlikely(!audit_filter(type, AUDIT_FILTER_TYPE)))
return NULL;
- /* don't ever fail/sleep on these two conditions:
+ /* NOTE: don't ever fail/sleep on these two conditions:
* 1. auditd generated record - since we need auditd to drain the
* queue; also, when we are checking for auditd, compare PIDs using
* task_tgid_vnr() since auditd_pid is set in audit_receive_msg()
* using a PID anchored in the caller's namespace
- * 2. audit command message - record types 1000 through 1099 inclusive
- * are command messages/records used to manage the kernel subsystem
- * and the audit userspace, blocking on these messages could cause
- * problems under load so don't do it (note: not all of these
- * command types are valid as record types, but it is quicker to
- * just check two ints than a series of ints in a if/switch stmt) */
- if (!((audit_pid && audit_pid == task_tgid_vnr(current)) ||
- (type >= 1000 && type <= 1099))) {
- long sleep_time = audit_backlog_wait_time;
+ * 2. generator holding the audit_cmd_mutex - we don't want to block
+ * while holding the mutex */
+ if (!(auditd_test_task(current) ||
+ (current == __mutex_owner(&audit_cmd_mutex)))) {
+ long stime = audit_backlog_wait_time;
while (audit_backlog_limit &&
(skb_queue_len(&audit_queue) > audit_backlog_limit)) {
@@ -1541,14 +1676,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
/* sleep if we are allowed and we haven't exhausted our
* backlog wait limit */
- if ((gfp_mask & __GFP_DIRECT_RECLAIM) &&
- (sleep_time > 0)) {
+ if (gfpflags_allow_blocking(gfp_mask) && (stime > 0)) {
DECLARE_WAITQUEUE(wait, current);
add_wait_queue_exclusive(&audit_backlog_wait,
&wait);
set_current_state(TASK_UNINTERRUPTIBLE);
- sleep_time = schedule_timeout(sleep_time);
+ stime = schedule_timeout(stime);
remove_wait_queue(&audit_backlog_wait, &wait);
} else {
if (audit_rate_check() && printk_ratelimit())
@@ -2127,15 +2261,27 @@ out:
*/
void audit_log_end(struct audit_buffer *ab)
{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+
if (!ab)
return;
- if (!audit_rate_check()) {
- audit_log_lost("rate limit exceeded");
- } else {
- skb_queue_tail(&audit_queue, ab->skb);
- wake_up_interruptible(&kauditd_wait);
+
+ if (audit_rate_check()) {
+ skb = ab->skb;
ab->skb = NULL;
- }
+
+ /* setup the netlink header, see the comments in
+ * kauditd_send_multicast_skb() for length quirks */
+ nlh = nlmsg_hdr(skb);
+ nlh->nlmsg_len = skb->len - NLMSG_HDRLEN;
+
+ /* queue the netlink packet and poke the kauditd thread */
+ skb_queue_tail(&audit_queue, skb);
+ wake_up_interruptible(&kauditd_wait);
+ } else
+ audit_log_lost("rate limit exceeded");
+
audit_buffer_free(ab);
}
diff --git a/kernel/audit.h b/kernel/audit.h
index ca579880303a..0d87f8ab8778 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -218,7 +218,7 @@ extern void audit_log_name(struct audit_context *context,
struct audit_names *n, const struct path *path,
int record_num, int *call_panic);
-extern int audit_pid;
+extern int auditd_test_task(const struct task_struct *task);
#define AUDIT_INODE_BUCKETS 32
extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
@@ -250,10 +250,6 @@ struct audit_netlink_list {
int audit_send_list(void *);
-struct audit_net {
- struct sock *nlsk;
-};
-
extern int selinux_audit_rule_update(void);
extern struct mutex audit_filter_mutex;
@@ -337,14 +333,7 @@ extern u32 audit_sig_sid;
extern int audit_filter(int msgtype, unsigned int listtype);
#ifdef CONFIG_AUDITSYSCALL
-extern int __audit_signal_info(int sig, struct task_struct *t);
-static inline int audit_signal_info(int sig, struct task_struct *t)
-{
- if (unlikely((audit_pid && t->tgid == audit_pid) ||
- (audit_signals && !audit_dummy_context())))
- return __audit_signal_info(sig, t);
- return 0;
-}
+extern int audit_signal_info(int sig, struct task_struct *t);
extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
extern struct list_head *audit_killed_trees(void);
#else
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index d6a8de5f8fa3..1c2333155893 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -762,7 +762,7 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
struct audit_entry *e;
enum audit_state state;
- if (audit_pid && tsk->tgid == audit_pid)
+ if (auditd_test_task(tsk))
return AUDIT_DISABLED;
rcu_read_lock();
@@ -816,7 +816,7 @@ void audit_filter_inodes(struct task_struct *tsk, struct audit_context *ctx)
{
struct audit_names *n;
- if (audit_pid && tsk->tgid == audit_pid)
+ if (auditd_test_task(tsk))
return;
rcu_read_lock();
@@ -2249,26 +2249,27 @@ void __audit_ptrace(struct task_struct *t)
* If the audit subsystem is being terminated, record the task (pid)
* and uid that is doing that.
*/
-int __audit_signal_info(int sig, struct task_struct *t)
+int audit_signal_info(int sig, struct task_struct *t)
{
struct audit_aux_data_pids *axp;
struct task_struct *tsk = current;
struct audit_context *ctx = tsk->audit_context;
kuid_t uid = current_uid(), t_uid = task_uid(t);
- if (audit_pid && t->tgid == audit_pid) {
- if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
- audit_sig_pid = task_tgid_nr(tsk);
- if (uid_valid(tsk->loginuid))
- audit_sig_uid = tsk->loginuid;
- else
- audit_sig_uid = uid;
- security_task_getsecid(tsk, &audit_sig_sid);
- }
- if (!audit_signals || audit_dummy_context())
- return 0;
+ if (auditd_test_task(t) &&
+ (sig == SIGTERM || sig == SIGHUP ||
+ sig == SIGUSR1 || sig == SIGUSR2)) {
+ audit_sig_pid = task_tgid_nr(tsk);
+ if (uid_valid(tsk->loginuid))
+ audit_sig_uid = tsk->loginuid;
+ else
+ audit_sig_uid = uid;
+ security_task_getsecid(tsk, &audit_sig_sid);
}
+ if (!audit_signals || audit_dummy_context())
+ return 0;
+
/* optimize the common case by putting first signal recipient directly
* in audit_context */
if (!ctx->target_pid) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f45827e205d3..b4f1cb0c5ac7 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1162,12 +1162,12 @@ out:
LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
off = IMM;
load_word:
- /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
- * only appearing in the programs where ctx ==
- * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
- * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
- * internal BPF verifier will check that BPF_R6 ==
- * ctx.
+ /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
+ * appearing in the programs where ctx == skb
+ * (see may_access_skb() in the verifier). All programs
+ * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
+ * bpf_convert_filter() saves it in BPF_R6, internal BPF
+ * verifier will check that BPF_R6 == ctx.
*
* BPF_ABS and BPF_IND are wrappers of function calls,
* so they scratch BPF_R1-BPF_R5 registers, preserve
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index afe5bab376c9..361a69dfe543 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -30,18 +30,12 @@ struct bpf_htab {
struct pcpu_freelist freelist;
struct bpf_lru lru;
};
- void __percpu *extra_elems;
+ struct htab_elem *__percpu *extra_elems;
atomic_t count; /* number of elements in this hashtable */
u32 n_buckets; /* number of hash buckets */
u32 elem_size; /* size of each element in bytes */
};
-enum extra_elem_state {
- HTAB_NOT_AN_EXTRA_ELEM = 0,
- HTAB_EXTRA_ELEM_FREE,
- HTAB_EXTRA_ELEM_USED
-};
-
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
@@ -56,7 +50,6 @@ struct htab_elem {
};
union {
struct rcu_head rcu;
- enum extra_elem_state state;
struct bpf_lru_node lru_node;
};
u32 hash;
@@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
}
+static bool htab_is_prealloc(const struct bpf_htab *htab)
+{
+ return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
void __percpu *pptr)
{
@@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
static int prealloc_init(struct bpf_htab *htab)
{
+ u32 num_entries = htab->map.max_entries;
int err = -ENOMEM, i;
- htab->elems = bpf_map_area_alloc(htab->elem_size *
- htab->map.max_entries);
+ if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+ num_entries += num_possible_cpus();
+
+ htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
if (!htab->elems)
return -ENOMEM;
if (!htab_is_percpu(htab))
goto skip_percpu_elems;
- for (i = 0; i < htab->map.max_entries; i++) {
+ for (i = 0; i < num_entries; i++) {
u32 size = round_up(htab->map.value_size, 8);
void __percpu *pptr;
@@ -166,11 +167,11 @@ skip_percpu_elems:
if (htab_is_lru(htab))
bpf_lru_populate(&htab->lru, htab->elems,
offsetof(struct htab_elem, lru_node),
- htab->elem_size, htab->map.max_entries);
+ htab->elem_size, num_entries);
else
pcpu_freelist_populate(&htab->freelist,
htab->elems + offsetof(struct htab_elem, fnode),
- htab->elem_size, htab->map.max_entries);
+ htab->elem_size, num_entries);
return 0;
@@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
static int alloc_extra_elems(struct bpf_htab *htab)
{
- void __percpu *pptr;
+ struct htab_elem *__percpu *pptr, *l_new;
+ struct pcpu_freelist_node *l;
int cpu;
- pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+ pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
+ GFP_USER | __GFP_NOWARN);
if (!pptr)
return -ENOMEM;
for_each_possible_cpu(cpu) {
- ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
- HTAB_EXTRA_ELEM_FREE;
+ l = pcpu_freelist_pop(&htab->freelist);
+ /* pop will succeed, since prealloc_init()
+ * preallocated extra num_possible_cpus elements
+ */
+ l_new = container_of(l, struct htab_elem, fnode);
+ *per_cpu_ptr(pptr, cpu) = l_new;
}
htab->extra_elems = pptr;
return 0;
@@ -342,25 +349,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&htab->buckets[i].lock);
}
- if (!percpu && !lru) {
- /* lru itself can remove the least used element, so
- * there is no need for an extra elem during map_update.
- */
- err = alloc_extra_elems(htab);
- if (err)
- goto free_buckets;
- }
-
if (prealloc) {
err = prealloc_init(htab);
if (err)
- goto free_extra_elems;
+ goto free_buckets;
+
+ if (!percpu && !lru) {
+ /* lru itself can remove the least used element, so
+ * there is no need for an extra elem during map_update.
+ */
+ err = alloc_extra_elems(htab);
+ if (err)
+ goto free_prealloc;
+ }
}
return &htab->map;
-free_extra_elems:
- free_percpu(htab->extra_elems);
+free_prealloc:
+ prealloc_destroy(htab);
free_buckets:
bpf_map_area_free(htab->buckets);
free_htab:
@@ -575,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head)
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{
- if (l->state == HTAB_EXTRA_ELEM_USED) {
- l->state = HTAB_EXTRA_ELEM_FREE;
- return;
- }
-
- if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
+ if (htab_is_prealloc(htab)) {
pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
atomic_dec(&htab->count);
@@ -610,47 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
bool percpu, bool onallcpus,
- bool old_elem_exists)
+ struct htab_elem *old_elem)
{
u32 size = htab->map.value_size;
- bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
- struct htab_elem *l_new;
+ bool prealloc = htab_is_prealloc(htab);
+ struct htab_elem *l_new, **pl_new;
void __percpu *pptr;
- int err = 0;
if (prealloc) {
- struct pcpu_freelist_node *l;
+ if (old_elem) {
+ /* if we're updating the existing element,
+ * use per-cpu extra elems to avoid freelist_pop/push
+ */
+ pl_new = this_cpu_ptr(htab->extra_elems);
+ l_new = *pl_new;
+ *pl_new = old_elem;
+ } else {
+ struct pcpu_freelist_node *l;
- l = pcpu_freelist_pop(&htab->freelist);
- if (!l)
- err = -E2BIG;
- else
+ l = pcpu_freelist_pop(&htab->freelist);
+ if (!l)
+ return ERR_PTR(-E2BIG);
l_new = container_of(l, struct htab_elem, fnode);
- } else {
- if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
- atomic_dec(&htab->count);
- err = -E2BIG;
- } else {
- l_new = kmalloc(htab->elem_size,
- GFP_ATOMIC | __GFP_NOWARN);
- if (!l_new)
- return ERR_PTR(-ENOMEM);
}
- }
-
- if (err) {
- if (!old_elem_exists)
- return ERR_PTR(err);
-
- /* if we're updating the existing element and the hash table
- * is full, use per-cpu extra elems
- */
- l_new = this_cpu_ptr(htab->extra_elems);
- if (l_new->state != HTAB_EXTRA_ELEM_FREE)
- return ERR_PTR(-E2BIG);
- l_new->state = HTAB_EXTRA_ELEM_USED;
} else {
- l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
+ if (atomic_inc_return(&htab->count) > htab->map.max_entries)
+ if (!old_elem) {
+ /* when map is full and update() is replacing
+ * old element, it's ok to allocate, since
+ * old element will be freed immediately.
+ * Otherwise return an error
+ */
+ atomic_dec(&htab->count);
+ return ERR_PTR(-E2BIG);
+ }
+ l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+ if (!l_new)
+ return ERR_PTR(-ENOMEM);
}
memcpy(l_new->key, key, key_size);
@@ -731,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
goto err;
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
- !!l_old);
+ l_old);
if (IS_ERR(l_new)) {
/* all pre-allocated elements are in use or memory exhausted */
ret = PTR_ERR(l_new);
@@ -744,7 +742,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
hlist_nulls_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
hlist_nulls_del_rcu(&l_old->hash_node);
- free_htab_elem(htab, l_old);
+ if (!htab_is_prealloc(htab))
+ free_htab_elem(htab, l_old);
}
ret = 0;
err:
@@ -856,7 +855,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
value, onallcpus);
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
- hash, true, onallcpus, false);
+ hash, true, onallcpus, NULL);
if (IS_ERR(l_new)) {
ret = PTR_ERR(l_new);
goto err;
@@ -1024,8 +1023,7 @@ static void delete_all_elements(struct bpf_htab *htab)
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
hlist_nulls_del_rcu(&l->hash_node);
- if (l->state != HTAB_EXTRA_ELEM_USED)
- htab_elem_free(htab, l);
+ htab_elem_free(htab, l);
}
}
}
@@ -1045,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map)
* not have executed. Wait for them.
*/
rcu_barrier();
- if (htab->map.map_flags & BPF_F_NO_PREALLOC)
+ if (!htab_is_prealloc(htab))
delete_all_elements(htab);
else
prealloc_destroy(htab);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7af0dcc5d755..821f9e807de5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -617,6 +617,14 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
if (insn->imm == BPF_FUNC_xdp_adjust_head)
prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) {
+ /* If we tail call into other programs, we
+ * cannot make any assumptions since they
+ * can be replaced dynamically during runtime
+ * in the program array.
+ */
+ prog->cb_access = 1;
+ prog->xdp_adjust_head = 1;
+
/* mark bpf_tail_call as different opcode
* to avoid conditional branch in
* interpeter for every normal call
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 796b68d00119..a834068a400e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -765,38 +765,56 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
}
}
-static int check_ptr_alignment(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg, int off, int size)
+static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
+ int off, int size)
{
- if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
- if (off % size != 0) {
- verbose("misaligned access off %d size %d\n",
- off, size);
- return -EACCES;
- } else {
- return 0;
- }
- }
-
- if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
- /* misaligned access to packet is ok on x86,arm,arm64 */
- return 0;
-
if (reg->id && size != 1) {
- verbose("Unknown packet alignment. Only byte-sized access allowed\n");
+ verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
return -EACCES;
}
/* skb->data is NET_IP_ALIGN-ed */
- if (reg->type == PTR_TO_PACKET &&
- (NET_IP_ALIGN + reg->off + off) % size != 0) {
+ if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
verbose("misaligned packet access off %d+%d+%d size %d\n",
NET_IP_ALIGN, reg->off, off, size);
return -EACCES;
}
+
return 0;
}
+static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
+ int size)
+{
+ if (size != 1) {
+ verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int check_ptr_alignment(const struct bpf_reg_state *reg,
+ int off, int size)
+{
+ switch (reg->type) {
+ case PTR_TO_PACKET:
+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+ check_pkt_ptr_alignment(reg, off, size);
+ case PTR_TO_MAP_VALUE_ADJ:
+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+ check_val_ptr_alignment(reg, size);
+ default:
+ if (off % size != 0) {
+ verbose("misaligned access off %d size %d\n",
+ off, size);
+ return -EACCES;
+ }
+
+ return 0;
+ }
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -818,7 +836,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
if (size < 0)
return size;
- err = check_ptr_alignment(env, reg, off, size);
+ err = check_ptr_alignment(reg, off, size);
if (err)
return err;
@@ -1925,6 +1943,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
* register as unknown.
*/
if (env->allow_ptr_leaks &&
+ BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
(dst_reg->type == PTR_TO_MAP_VALUE ||
dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
@@ -1973,14 +1992,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
- regs[i].range = dst_reg->off;
+ /* keep the maximum range already checked */
+ regs[i].range = max(regs[i].range, dst_reg->off);
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] != STACK_SPILL)
continue;
reg = &state->spilled_regs[i / BPF_REG_SIZE];
if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
- reg->range = dst_reg->off;
+ reg->range = max(reg->range, dst_reg->off);
}
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 48851327a15e..687f5e0194ef 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2425,11 +2425,12 @@ ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
tsk = tsk->group_leader;
/*
- * Workqueue threads may acquire PF_NO_SETAFFINITY and become
- * trapped in a cpuset, or RT worker may be born in a cgroup
- * with no rt_runtime allocated. Just say no.
+ * kthreads may acquire PF_NO_SETAFFINITY during initialization.
+ * If userland migrates such a kthread to a non-root cgroup, it can
+ * become trapped in a cpuset, or RT kthread may be born in a
+ * cgroup with no rt_runtime allocated. Just say no.
*/
- if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
+ if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
goto out_unlock_rcu;
}
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 4544b115f5eb..e2d356dd7581 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -59,7 +59,7 @@ static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
struct cpumask *
irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
{
- int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec;
+ int n, nodes, cpus_per_vec, extra_vecs, curvec;
int affv = nvecs - affd->pre_vectors - affd->post_vectors;
int last_affv = affv + affd->pre_vectors;
nodemask_t nodemsk = NODE_MASK_NONE;
@@ -94,19 +94,21 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
goto done;
}
- /* Spread the vectors per node */
- vecs_per_node = affv / nodes;
- /* Account for rounding errors */
- extra_vecs = affv - (nodes * vecs_per_node);
-
for_each_node_mask(n, nodemsk) {
- int ncpus, v, vecs_to_assign = vecs_per_node;
+ int ncpus, v, vecs_to_assign, vecs_per_node;
+
+ /* Spread the vectors per node */
+ vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
/* Get the cpus on this node which are in the mask */
cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
/* Calculate the number of cpus per vector */
ncpus = cpumask_weight(nmsk);
+ vecs_to_assign = min(vecs_per_node, ncpus);
+
+ /* Account for rounding errors */
+ extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
for (v = 0; curvec < last_affv && v < vecs_to_assign;
curvec++, v++) {
@@ -115,14 +117,14 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
/* Account for extra vectors to compensate rounding errors */
if (extra_vecs) {
cpus_per_vec++;
- if (!--extra_vecs)
- vecs_per_node++;
+ --extra_vecs;
}
irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
}
if (curvec >= last_affv)
break;
+ --nodes;
}
done:
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2f26adea0f84..26db528c1d88 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,6 +20,7 @@
#include <linux/freezer.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
+#include <linux/cgroup.h>
#include <trace/events/sched.h>
static DEFINE_SPINLOCK(kthread_create_lock);
@@ -225,6 +226,7 @@ static int kthread(void *_create)
ret = -EINTR;
if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+ cgroup_kthread_ready();
__kthread_parkme(self);
ret = threadfn(data);
}
@@ -538,6 +540,7 @@ int kthreadd(void *unused)
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
+ cgroup_init_kthreadd();
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index c2b88490d857..c08fbd2f5ba9 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -46,13 +46,13 @@ enum {
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
/*
- * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text,
+ * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
* .data and .bss to fit in required 32MB limit for the kernel. With
- * PROVE_LOCKING we could go over this limit and cause system boot-up problems.
+ * CONFIG_LOCKDEP we could go over this limit and cause system boot-up problems.
* So, reduce the static allocations for lockdeps related structures so that
* everything fits in current required size limit.
*/
-#ifdef CONFIG_PROVE_LOCKING_SMALL
+#ifdef CONFIG_LOCKDEP_SMALL
/*
* MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
* we track.
diff --git a/kernel/padata.c b/kernel/padata.c
index 05316c9f32da..3202aa17492c 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -186,19 +186,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
reorder = &next_queue->reorder;
+ spin_lock(&reorder->lock);
if (!list_empty(&reorder->list)) {
padata = list_entry(reorder->list.next,
struct padata_priv, list);
- spin_lock(&reorder->lock);
list_del_init(&padata->list);
atomic_dec(&pd->reorder_objects);
- spin_unlock(&reorder->lock);
pd->processed++;
+ spin_unlock(&reorder->lock);
goto out;
}
+ spin_unlock(&reorder->lock);
if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
padata = ERR_PTR(-ENODATA);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0af928712174..266ddcc1d8bb 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
WARN_ON(!task->ptrace || task->parent != current);
+ /*
+ * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
+ * Recheck state under the lock to close this race.
+ */
spin_lock_irq(&task->sighand->siglock);
- if (__fatal_signal_pending(task))
- wake_up_state(task, __TASK_TRACED);
- else
- task->state = TASK_TRACED;
+ if (task->state == __TASK_TRACED) {
+ if (__fatal_signal_pending(task))
+ wake_up_state(task, __TASK_TRACED);
+ else
+ task->state = TASK_TRACED;
+ }
spin_unlock_irq(&task->sighand->siglock);
}
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index a08795e21628..00a45c45beca 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
static int __sched_clock_stable_early = 1;
/*
- * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset
+ * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
*/
-static __read_mostly u64 raw_offset;
-static __read_mostly u64 gtod_offset;
+__read_mostly u64 __sched_clock_offset;
+static __read_mostly u64 __gtod_offset;
struct sched_clock_data {
u64 tick_raw;
@@ -131,17 +131,24 @@ static void __set_sched_clock_stable(void)
/*
* Attempt to make the (initial) unstable->stable transition continuous.
*/
- raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw);
+ __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
- scd->tick_gtod, gtod_offset,
- scd->tick_raw, raw_offset);
+ scd->tick_gtod, __gtod_offset,
+ scd->tick_raw, __sched_clock_offset);
static_branch_enable(&__sched_clock_stable);
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
}
-static void __clear_sched_clock_stable(struct work_struct *work)
+static void __sched_clock_work(struct work_struct *work)
+{
+ static_branch_disable(&__sched_clock_stable);
+}
+
+static DECLARE_WORK(sched_clock_work, __sched_clock_work);
+
+static void __clear_sched_clock_stable(void)
{
struct sched_clock_data *scd = this_scd();
@@ -154,17 +161,17 @@ static void __clear_sched_clock_stable(struct work_struct *work)
*
* Still do what we can.
*/
- gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod);
+ __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
- scd->tick_gtod, gtod_offset,
- scd->tick_raw, raw_offset);
+ scd->tick_gtod, __gtod_offset,
+ scd->tick_raw, __sched_clock_offset);
- static_branch_disable(&__sched_clock_stable);
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
-}
-static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
+ if (sched_clock_stable())
+ schedule_work(&sched_clock_work);
+}
void clear_sched_clock_stable(void)
{
@@ -173,7 +180,7 @@ void clear_sched_clock_stable(void)
smp_mb(); /* matches sched_clock_init_late() */
if (sched_clock_running == 2)
- schedule_work(&sched_clock_work);
+ __clear_sched_clock_stable();
}
void sched_clock_init_late(void)
@@ -214,7 +221,7 @@ static inline u64 wrap_max(u64 x, u64 y)
*/
static u64 sched_clock_local(struct sched_clock_data *scd)
{
- u64 now, clock, old_clock, min_clock, max_clock;
+ u64 now, clock, old_clock, min_clock, max_clock, gtod;
s64 delta;
again:
@@ -231,9 +238,10 @@ again:
* scd->tick_gtod + TICK_NSEC);
*/
- clock = scd->tick_gtod + gtod_offset + delta;
- min_clock = wrap_max(scd->tick_gtod, old_clock);
- max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
+ gtod = scd->tick_gtod + __gtod_offset;
+ clock = gtod + delta;
+ min_clock = wrap_max(gtod, old_clock);
+ max_clock = wrap_max(old_clock, gtod + TICK_NSEC);
clock = wrap_max(clock, min_clock);
clock = wrap_min(clock, max_clock);
@@ -317,7 +325,7 @@ u64 sched_clock_cpu(int cpu)
u64 clock;
if (sched_clock_stable())
- return sched_clock() + raw_offset;
+ return sched_clock() + __sched_clock_offset;
if (unlikely(!sched_clock_running))
return 0ull;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index cd7cd489f739..54c577578da6 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -584,20 +584,14 @@ static int sugov_start(struct cpufreq_policy *policy)
for_each_cpu(cpu, policy->cpus) {
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
+ memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->sg_policy = sg_policy;
- if (policy_is_shared(policy)) {
- sg_cpu->util = 0;
- sg_cpu->max = 0;
- sg_cpu->flags = SCHED_CPUFREQ_RT;
- sg_cpu->last_update = 0;
- sg_cpu->iowait_boost = 0;
- sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
- cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
- sugov_update_shared);
- } else {
- cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
- sugov_update_single);
- }
+ sg_cpu->flags = SCHED_CPUFREQ_RT;
+ sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
+ policy_is_shared(policy) ?
+ sugov_update_shared :
+ sugov_update_single);
}
return 0;
}
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index acf0a5a06da7..8c8714fcb53c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2133,9 +2133,12 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
if (write) {
if (*negp)
return -EINVAL;
+ if (*lvalp > UINT_MAX)
+ return -EINVAL;
*valp = *lvalp;
} else {
unsigned int val = *valp;
+ *negp = false;
*lvalp = (unsigned long)val;
}
return 0;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b9691ee8f6c1..dd3e91d68dc7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3755,23 +3755,24 @@ static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash)
ftrace_probe_registered = 1;
}
-static void __disable_ftrace_function_probe(void)
+static bool __disable_ftrace_function_probe(void)
{
int i;
if (!ftrace_probe_registered)
- return;
+ return false;
for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
struct hlist_head *hhd = &ftrace_func_hash[i];
if (hhd->first)
- return;
+ return false;
}
/* no more funcs left */
ftrace_shutdown(&trace_probe_ops, 0);
ftrace_probe_registered = 0;
+ return true;
}
@@ -3901,6 +3902,7 @@ static void
__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
void *data, int flags)
{
+ struct ftrace_ops_hash old_hash_ops;
struct ftrace_func_entry *rec_entry;
struct ftrace_func_probe *entry;
struct ftrace_func_probe *p;
@@ -3912,6 +3914,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
struct hlist_node *tmp;
char str[KSYM_SYMBOL_LEN];
int i, ret;
+ bool disabled;
if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
func_g.search = NULL;
@@ -3930,6 +3933,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
mutex_lock(&trace_probe_ops.func_hash->regex_lock);
+ old_hash_ops.filter_hash = old_hash;
+ /* Probes only have filters */
+ old_hash_ops.notrace_hash = NULL;
+
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
if (!hash)
/* Hmm, should report this somehow */
@@ -3967,12 +3974,17 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
}
}
mutex_lock(&ftrace_lock);
- __disable_ftrace_function_probe();
+ disabled = __disable_ftrace_function_probe();
/*
* Remove after the disable is called. Otherwise, if the last
* probe is removed, a null hash means *all enabled*.
*/
ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+
+ /* still need to update the function call sites */
+ if (ftrace_enabled && !disabled)
+ ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS,
+ &old_hash_ops);
synchronize_sched();
if (!ret)
free_ftrace_hash_rcu(old_hash);
@@ -5554,6 +5566,15 @@ static void clear_ftrace_pids(struct trace_array *tr)
trace_free_pid_list(pid_list);
}
+void ftrace_clear_pids(struct trace_array *tr)
+{
+ mutex_lock(&ftrace_lock);
+
+ clear_ftrace_pids(tr);
+
+ mutex_unlock(&ftrace_lock);
+}
+
static void ftrace_pid_reset(struct trace_array *tr)
{
mutex_lock(&ftrace_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 96fc3c043ad6..ca47a4fa2986 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3405,11 +3405,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer;
+ struct buffer_page *reader;
+ struct buffer_page *head_page;
+ struct buffer_page *commit_page;
+ unsigned commit;
cpu_buffer = iter->cpu_buffer;
- return iter->head_page == cpu_buffer->commit_page &&
- iter->head == rb_commit_index(cpu_buffer);
+ /* Remember, trace recording is off when iterator is in use */
+ reader = cpu_buffer->reader_page;
+ head_page = cpu_buffer->head_page;
+ commit_page = cpu_buffer->commit_page;
+ commit = rb_page_commit(commit_page);
+
+ return ((iter->head_page == commit_page && iter->head == commit) ||
+ (iter->head_page == reader && commit_page == head_page &&
+ head_page->read == commit &&
+ iter->head == rb_page_commit(cpu_buffer->reader_page)));
}
EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
@@ -4826,9 +4838,9 @@ static __init int test_ringbuffer(void)
rb_data[cpu].cnt = cpu;
rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
"rbtester/%d", cpu);
- if (WARN_ON(!rb_threads[cpu])) {
+ if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
pr_cont("FAILED\n");
- ret = -1;
+ ret = PTR_ERR(rb_threads[cpu]);
goto out_free;
}
@@ -4838,9 +4850,9 @@ static __init int test_ringbuffer(void)
/* Now create the rb hammer! */
rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
- if (WARN_ON(!rb_hammer)) {
+ if (WARN_ON(IS_ERR(rb_hammer))) {
pr_cont("FAILED\n");
- ret = -1;
+ ret = PTR_ERR(rb_hammer);
goto out_free;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f35109514a01..0ad75e9698f6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6733,11 +6733,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
return ret;
out_reg:
- ret = register_ftrace_function_probe(glob, ops, count);
+ ret = alloc_snapshot(&global_trace);
+ if (ret < 0)
+ goto out;
- if (ret >= 0)
- alloc_snapshot(&global_trace);
+ ret = register_ftrace_function_probe(glob, ops, count);
+ out:
return ret < 0 ? ret : 0;
}
@@ -7402,6 +7404,7 @@ static int instance_rmdir(const char *name)
tracing_set_nop(tr);
event_trace_del_tracer(tr);
+ ftrace_clear_pids(tr);
ftrace_destroy_function_files(tr);
tracefs_remove_recursive(tr->dir);
free_trace_buffers(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ae1cce91fead..d19d52d600d6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -896,6 +896,7 @@ int using_ftrace_ops_list_func(void);
void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
void ftrace_init_tracefs_toplevel(struct trace_array *tr,
struct dentry *d_tracer);
+void ftrace_clear_pids(struct trace_array *tr);
#else
static inline int ftrace_trace_task(struct trace_array *tr)
{
@@ -914,6 +915,7 @@ ftrace_init_global_array_ops(struct trace_array *tr) { }
static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { }
static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { }
+static inline void ftrace_clear_pids(struct trace_array *tr) { }
/* ftace_func_t type is not defined, use macro instead of static inline */
#define ftrace_init_array_ops(tr, func) do { } while (0)
#endif /* CONFIG_FUNCTION_TRACER */