aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/core/dev.c7
-rw-r--r--net/core/net-sysfs.c4
-rw-r--r--net/core/skbuff.c8
-rw-r--r--net/core/sock.c13
-rw-r--r--net/devlink/Makefile2
-rw-r--r--net/devlink/core.c4
-rw-r--r--net/devlink/dev.c29
-rw-r--r--net/devlink/devl_internal.h21
-rw-r--r--net/devlink/health.c1333
-rw-r--r--net/devlink/leftover.c1582
-rw-r--r--net/ethtool/netlink.h2
-rw-r--r--net/ethtool/rings.c17
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_timewait_sock.c3
-rw-r--r--net/ipv6/af_inet6.c10
-rw-r--r--net/ipv6/icmp.c27
-rw-r--r--net/ipv6/ipv6_sockglue.c12
-rw-r--r--net/ipv6/ndisc.c13
-rw-r--r--net/ipv6/seg6_local.c352
-rw-r--r--net/mptcp/protocol.c1
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/rds/message.c2
-rw-r--r--net/sched/Kconfig81
-rw-r--r--net/sched/Makefile6
-rw-r--r--net/sched/act_api.c14
-rw-r--r--net/sched/act_connmark.c107
-rw-r--r--net/sched/act_gate.c30
-rw-r--r--net/sched/act_nat.c72
-rw-r--r--net/sched/act_pedit.c27
-rw-r--r--net/sched/cls_api.c1
-rw-r--r--net/sched/cls_flower.c7
-rw-r--r--net/sched/cls_matchall.c6
-rw-r--r--net/sched/cls_rsvp.c26
-rw-r--r--net/sched/cls_rsvp.h764
-rw-r--r--net/sched/cls_rsvp6.c26
-rw-r--r--net/sched/cls_tcindex.c716
-rw-r--r--net/sched/sch_api.c15
-rw-r--r--net/sched/sch_atm.c706
-rw-r--r--net/sched/sch_cbq.c1727
-rw-r--r--net/sched/sch_dsmark.c518
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/smc/af_smc.c3
-rw-r--r--net/xdp/xsk.c4
46 files changed, 2076 insertions, 6237 deletions
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ad13b48e3e08..24f01ff113f0 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -269,7 +269,7 @@ static void brport_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t
net_ns_get_ownership(dev_net(p->dev), uid, gid);
}
-static struct kobj_type brport_ktype = {
+static const struct kobj_type brport_ktype = {
#ifdef CONFIG_SYSFS
.sysfs_ops = &brport_sysfs_ops,
#endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 7307a0c15c9f..357081b0113c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8321,9 +8321,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
}
}
if (dev->flags != old_flags) {
- pr_info("device %s %s promiscuous mode\n",
- dev->name,
- dev->flags & IFF_PROMISC ? "entered" : "left");
+ netdev_info(dev, "%s promiscuous mode\n",
+ dev->flags & IFF_PROMISC ? "entered" : "left");
if (audit_enabled) {
current_uid_gid(&uid, &gid);
audit_log(audit_context(), GFP_ATOMIC,
@@ -8391,6 +8390,8 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
}
}
if (dev->flags ^ old_flags) {
+ netdev_info(dev, "%s allmulticast mode\n",
+ dev->flags & IFF_ALLMULTI ? "entered" : "left");
dev_change_rx_flags(dev, IFF_ALLMULTI);
dev_set_rx_mode(dev);
if (notify)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4b361ac6a252..e20784b6f873 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1052,7 +1052,7 @@ static void rx_queue_get_ownership(const struct kobject *kobj,
net_ns_get_ownership(net, uid, gid);
}
-static struct kobj_type rx_queue_ktype __ro_after_init = {
+static const struct kobj_type rx_queue_ktype = {
.sysfs_ops = &rx_queue_sysfs_ops,
.release = rx_queue_release,
.default_groups = rx_queue_default_groups,
@@ -1662,7 +1662,7 @@ static void netdev_queue_get_ownership(const struct kobject *kobj,
net_ns_get_ownership(net, uid, gid);
}
-static struct kobj_type netdev_queue_ktype __ro_after_init = {
+static const struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
.release = netdev_queue_release,
.default_groups = netdev_queue_default_groups,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 13ea10cf8544..98ebce9f6a51 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1406,14 +1406,18 @@ EXPORT_SYMBOL_GPL(skb_morph);
int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
{
- unsigned long max_pg, num_pg, new_pg, old_pg;
+ unsigned long max_pg, num_pg, new_pg, old_pg, rlim;
struct user_struct *user;
if (capable(CAP_IPC_LOCK) || !size)
return 0;
+ rlim = rlimit(RLIMIT_MEMLOCK);
+ if (rlim == RLIM_INFINITY)
+ return 0;
+
num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */
- max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ max_pg = rlim >> PAGE_SHIFT;
user = mmp->user ? : current_user();
old_pg = atomic_long_read(&user->locked_vm);
diff --git a/net/core/sock.c b/net/core/sock.c
index afbb02984d5f..341c565dbc26 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2340,17 +2340,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
smp_wmb();
refcount_set(&newsk->sk_refcnt, 2);
- /* Increment the counter in the same struct proto as the master
- * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
- * is the same as sk->sk_prot->socks, as this field was copied
- * with memcpy).
- *
- * This _changes_ the previous behaviour, where
- * tcp_create_openreq_child always was incrementing the
- * equivalent to tcp_prot->socks (inet_sock_nr), so this have
- * to be taken into account in all callers. -acme
- */
- sk_refcnt_debug_inc(newsk);
sk_set_socket(newsk, NULL);
sk_tx_queue_clear(newsk);
RCU_INIT_POINTER(newsk->sk_wq, NULL);
@@ -3710,8 +3699,6 @@ void sk_common_release(struct sock *sk)
xfrm_sk_free_policy(sk);
- sk_refcnt_debug_release(sk);
-
sock_put(sk);
}
EXPORT_SYMBOL(sk_common_release);
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
index daad4521c61e..ef91a76646a3 100644
--- a/net/devlink/Makefile
+++ b/net/devlink/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y := leftover.o core.o netlink.o dev.o
+obj-y := leftover.o core.o netlink.o dev.o health.o
diff --git a/net/devlink/core.c b/net/devlink/core.c
index a4f47dafb864..777b091ef74d 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -212,6 +212,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
devlink->dev = dev;
devlink->ops = ops;
xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
+ xa_init_flags(&devlink->params, XA_FLAGS_ALLOC);
xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
write_pnet(&devlink->_net, net);
INIT_LIST_HEAD(&devlink->rate_list);
@@ -219,7 +220,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
INIT_LIST_HEAD(&devlink->sb_list);
INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
INIT_LIST_HEAD(&devlink->resource_list);
- INIT_LIST_HEAD(&devlink->param_list);
INIT_LIST_HEAD(&devlink->region_list);
INIT_LIST_HEAD(&devlink->reporter_list);
INIT_LIST_HEAD(&devlink->trap_list);
@@ -255,7 +255,6 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->trap_list));
WARN_ON(!list_empty(&devlink->reporter_list));
WARN_ON(!list_empty(&devlink->region_list));
- WARN_ON(!list_empty(&devlink->param_list));
WARN_ON(!list_empty(&devlink->resource_list));
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
@@ -264,6 +263,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!xa_empty(&devlink->ports));
xa_destroy(&devlink->snapshot_ids);
+ xa_destroy(&devlink->params);
xa_destroy(&devlink->ports);
WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb));
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 78d824eda5ec..b40153fa2680 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -305,7 +305,7 @@ static struct net *devlink_netns_get(struct sk_buff *skb,
struct net *net;
if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
- NL_SET_ERR_MSG_MOD(info->extack, "multiple netns identifying attributes specified");
+ NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified");
return ERR_PTR(-EINVAL);
}
@@ -323,7 +323,7 @@ static struct net *devlink_netns_get(struct sk_buff *skb,
net = ERR_PTR(-EINVAL);
}
if (IS_ERR(net)) {
- NL_SET_ERR_MSG_MOD(info->extack, "Unknown network namespace");
+ NL_SET_ERR_MSG(info->extack, "Unknown network namespace");
return ERR_PTR(-EINVAL);
}
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
@@ -369,6 +369,9 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
if (dest_net && !net_eq(dest_net, curr_net))
devlink_reload_netns_change(devlink, curr_net, dest_net);
+ if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
+ devlink_params_driverinit_load_new(devlink);
+
err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
devlink_reload_failed_set(devlink, !!err);
if (err)
@@ -425,7 +428,7 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
err = devlink_resources_validate(devlink, NULL, info);
if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
+ NL_SET_ERR_MSG(info->extack, "resources size validation failed");
return err;
}
@@ -435,8 +438,7 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
if (!devlink_reload_action_is_supported(devlink, action)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested reload action is not supported by the driver");
+ NL_SET_ERR_MSG(info->extack, "Requested reload action is not supported by the driver");
return -EOPNOTSUPP;
}
@@ -448,7 +450,7 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
limits_selected = limits.value & limits.selector;
if (!limits_selected) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
+ NL_SET_ERR_MSG(info->extack, "Invalid limit selected");
return -EINVAL;
}
for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
@@ -456,18 +458,15 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
break;
/* UAPI enables multiselection, but currently it is not used */
if (limits_selected != BIT(limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Multiselection of limit is not supported");
+ NL_SET_ERR_MSG(info->extack, "Multiselection of limit is not supported");
return -EOPNOTSUPP;
}
if (!devlink_reload_limit_is_supported(devlink, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is not supported by the driver");
+ NL_SET_ERR_MSG(info->extack, "Requested limit is not supported by the driver");
return -EOPNOTSUPP;
}
if (devlink_reload_combination_is_invalid(action, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is invalid for this action");
+ NL_SET_ERR_MSG(info->extack, "Requested limit is invalid for this action");
return -EINVAL;
}
}
@@ -477,6 +476,12 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
dest_net = devlink_netns_get(skb, info);
if (IS_ERR(dest_net))
return PTR_ERR(dest_net);
+ if (!net_eq(dest_net, devlink_net(devlink)) &&
+ action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Changing namespace is only supported for reinit action");
+ return -EOPNOTSUPP;
+ }
}
err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 941174e157d4..e133f423294a 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -29,7 +29,7 @@ struct devlink {
struct list_head sb_list;
struct list_head dpipe_table_list;
struct list_head resource_list;
- struct list_head param_list;
+ struct xarray params;
struct list_head region_list;
struct list_head reporter_list;
struct devlink_dpipe_headers *dpipe_headers;
@@ -176,6 +176,8 @@ int devlink_port_netdevice_event(struct notifier_block *nb,
struct devlink_port *
devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info);
+struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs);
/* Reload */
bool devlink_reload_actions_valid(const struct devlink_ops *ops);
@@ -189,6 +191,9 @@ static inline bool devlink_reload_supported(const struct devlink_ops *ops)
return ops->reload_down && ops->reload_up;
}
+/* Params */
+void devlink_params_driverinit_load_new(struct devlink *devlink);
+
/* Resources */
struct devlink_resource;
int devlink_resources_validate(struct devlink *devlink,
@@ -218,3 +223,17 @@ int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
+ struct genl_info *info);
diff --git a/net/devlink/health.c b/net/devlink/health.c
new file mode 100644
index 000000000000..0839706d5741
--- /dev/null
+++ b/net/devlink/health.c
@@ -0,0 +1,1333 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include <trace/events/devlink.h>
+#include "devl_internal.h"
+
+struct devlink_fmsg_item {
+ struct list_head list;
+ int attrtype;
+ u8 nla_type;
+ u16 len;
+ int value[];
+};
+
+struct devlink_fmsg {
+ struct list_head item_list;
+ bool putting_binary; /* This flag forces enclosing of binary data
+ * in an array brackets. It forces using
+ * of designated API:
+ * devlink_fmsg_binary_pair_nest_start()
+ * devlink_fmsg_binary_pair_nest_end()
+ */
+};
+
+static struct devlink_fmsg *devlink_fmsg_alloc(void)
+{
+ struct devlink_fmsg *fmsg;
+
+ fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL);
+ if (!fmsg)
+ return NULL;
+
+ INIT_LIST_HEAD(&fmsg->item_list);
+
+ return fmsg;
+}
+
+static void devlink_fmsg_free(struct devlink_fmsg *fmsg)
+{
+ struct devlink_fmsg_item *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) {
+ list_del(&item->list);
+ kfree(item);
+ }
+ kfree(fmsg);
+}
+
+struct devlink_health_reporter {
+ struct list_head list;
+ void *priv;
+ const struct devlink_health_reporter_ops *ops;
+ struct devlink *devlink;
+ struct devlink_port *devlink_port;
+ struct devlink_fmsg *dump_fmsg;
+ struct mutex dump_lock; /* lock parallel read/write from dump buffers */
+ u64 graceful_period;
+ bool auto_recover;
+ bool auto_dump;
+ u8 health_state;
+ u64 dump_ts;
+ u64 dump_real_ts;
+ u64 error_count;
+ u64 recovery_count;
+ u64 last_recovery_ts;
+};
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+ return reporter->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
+
+static struct devlink_health_reporter *
+__devlink_health_reporter_find_by_name(struct list_head *reporter_list,
+ const char *reporter_name)
+{
+ struct devlink_health_reporter *reporter;
+
+ list_for_each_entry(reporter, reporter_list, list)
+ if (!strcmp(reporter->ops->name, reporter_name))
+ return reporter;
+ return NULL;
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_find_by_name(struct devlink *devlink,
+ const char *reporter_name)
+{
+ return __devlink_health_reporter_find_by_name(&devlink->reporter_list,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port,
+ const char *reporter_name)
+{
+ return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+__devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ if (WARN_ON(graceful_period && !ops->recover))
+ return ERR_PTR(-EINVAL);
+
+ reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
+ if (!reporter)
+ return ERR_PTR(-ENOMEM);
+
+ reporter->priv = priv;
+ reporter->ops = ops;
+ reporter->devlink = devlink;
+ reporter->graceful_period = graceful_period;
+ reporter->auto_recover = !!ops->recover;
+ reporter->auto_dump = !!ops->dump;
+ mutex_init(&reporter->dump_lock);
+ return reporter;
+}
+
+/**
+ * devl_port_health_reporter_create() - create devlink health reporter for
+ * specified port instance
+ *
+ * @port: devlink_port to which health reports will relate
+ * @ops: devlink health reporter ops
+ * @graceful_period: min time (in msec) between recovery attempts
+ * @priv: driver priv pointer
+ */
+struct devlink_health_reporter *
+devl_port_health_reporter_create(struct devlink_port *port,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ devl_assert_locked(port->devlink);
+
+ if (__devlink_health_reporter_find_by_name(&port->reporter_list,
+ ops->name))
+ return ERR_PTR(-EEXIST);
+
+ reporter = __devlink_health_reporter_create(port->devlink, ops,
+ graceful_period, priv);
+ if (IS_ERR(reporter))
+ return reporter;
+
+ reporter->devlink_port = port;
+ list_add_tail(&reporter->list, &port->reporter_list);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devl_port_health_reporter_create);
+
+struct devlink_health_reporter *
+devlink_port_health_reporter_create(struct devlink_port *port,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+ struct devlink *devlink = port->devlink;
+
+ devl_lock(devlink);
+ reporter = devl_port_health_reporter_create(port, ops,
+ graceful_period, priv);
+ devl_unlock(devlink);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create);
+
+/**
+ * devl_health_reporter_create - create devlink health reporter
+ *
+ * @devlink: devlink instance which the health reports will relate
+ * @ops: devlink health reporter ops
+ * @graceful_period: min time (in msec) between recovery attempts
+ * @priv: driver priv pointer
+ */
+struct devlink_health_reporter *
+devl_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ devl_assert_locked(devlink);
+
+ if (devlink_health_reporter_find_by_name(devlink, ops->name))
+ return ERR_PTR(-EEXIST);
+
+ reporter = __devlink_health_reporter_create(devlink, ops,
+ graceful_period, priv);
+ if (IS_ERR(reporter))
+ return reporter;
+
+ list_add_tail(&reporter->list, &devlink->reporter_list);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devl_health_reporter_create);
+
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ devl_lock(devlink);
+ reporter = devl_health_reporter_create(devlink, ops,
+ graceful_period, priv);
+ devl_unlock(devlink);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
+
+static void
+devlink_health_reporter_free(struct devlink_health_reporter *reporter)
+{
+ mutex_destroy(&reporter->dump_lock);
+ if (reporter->dump_fmsg)
+ devlink_fmsg_free(reporter->dump_fmsg);
+ kfree(reporter);
+}
+
+/**
+ * devl_health_reporter_destroy() - destroy devlink health reporter
+ *
+ * @reporter: devlink health reporter to destroy
+ */
+void
+devl_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ devl_assert_locked(reporter->devlink);
+
+ list_del(&reporter->list);
+ devlink_health_reporter_free(reporter);
+}
+EXPORT_SYMBOL_GPL(devl_health_reporter_destroy);
+
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ struct devlink *devlink = reporter->devlink;
+
+ devl_lock(devlink);
+ devl_health_reporter_destroy(reporter);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+
+static int
+devlink_nl_health_reporter_fill(struct sk_buff *msg,
+ struct devlink_health_reporter *reporter,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ struct devlink *devlink = reporter->devlink;
+ struct nlattr *reporter_attr;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ if (reporter->devlink_port) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index))
+ goto genlmsg_cancel;
+ }
+ reporter_attr = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_HEALTH_REPORTER);
+ if (!reporter_attr)
+ goto genlmsg_cancel;
+ if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ reporter->ops->name))
+ goto reporter_nest_cancel;
+ if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
+ reporter->health_state))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
+ reporter->error_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
+ reporter->recovery_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+ reporter->graceful_period,
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
+ reporter->auto_recover))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+ jiffies_to_msecs(reporter->dump_ts),
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+ reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->dump &&
+ nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
+ reporter->auto_dump))
+ goto reporter_nest_cancel;
+
+ nla_nest_end(msg, reporter_attr);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+reporter_nest_cancel:
+ nla_nest_cancel(msg, reporter_attr);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
+{
+ struct devlink_port *devlink_port;
+ char *reporter_name;
+
+ if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
+ return NULL;
+
+ reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
+ devlink_port = devlink_port_get_from_attrs(devlink, attrs);
+ if (IS_ERR(devlink_port))
+ return devlink_health_reporter_find_by_name(devlink,
+ reporter_name);
+ else
+ return devlink_port_health_reporter_find_by_name(devlink_port,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ return devlink_health_reporter_get_from_attrs(devlink, info->attrs);
+}
+
+int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ struct sk_buff *msg;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_health_reporter_fill(msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ info->snd_portid, info->snd_seq,
+ 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_health_reporter *reporter;
+ struct devlink_port *port;
+ unsigned long port_index;
+ int idx = 0;
+ int err;
+
+ list_for_each_entry(reporter, &devlink->reporter_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_health_reporter_fill(msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ idx++;
+ }
+ xa_for_each(&devlink->ports, port_index, port) {
+ list_for_each_entry(reporter, &port->reporter_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_health_reporter_fill(msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ idx++;
+ }
+ }
+
+ return 0;
+}
+
+const struct devlink_cmd devl_cmd_health_reporter_get = {
+ .dump_one = devlink_nl_cmd_health_reporter_get_dump_one,
+};
+
+int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->recover &&
+ (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
+ return -EOPNOTSUPP;
+
+ if (!reporter->ops->dump &&
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
+ return -EOPNOTSUPP;
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
+ reporter->graceful_period =
+ nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
+ reporter->auto_recover =
+ nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
+ reporter->auto_dump =
+ nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);
+
+ return 0;
+}
+
+static void devlink_recover_notify(struct devlink_health_reporter *reporter,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = reporter->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void
+devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter)
+{
+ reporter->recovery_count++;
+ reporter->last_recovery_ts = jiffies;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done);
+
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+ void *priv_ctx, struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY)
+ return 0;
+
+ if (!reporter->ops->recover)
+ return -EOPNOTSUPP;
+
+ err = reporter->ops->recover(reporter, priv_ctx, extack);
+ if (err)
+ return err;
+
+ devlink_health_reporter_recovery_done(reporter);
+ reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+
+ return 0;
+}
+
+static void
+devlink_health_dump_clear(struct devlink_health_reporter *reporter)
+{
+ if (!reporter->dump_fmsg)
+ return;
+ devlink_fmsg_free(reporter->dump_fmsg);
+ reporter->dump_fmsg = NULL;
+}
+
+static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
+ void *priv_ctx,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (!reporter->ops->dump)
+ return 0;
+
+ if (reporter->dump_fmsg)
+ return 0;
+
+ reporter->dump_fmsg = devlink_fmsg_alloc();
+ if (!reporter->dump_fmsg) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
+ if (err)
+ goto dump_err;
+
+ err = reporter->ops->dump(reporter, reporter->dump_fmsg,
+ priv_ctx, extack);
+ if (err)
+ goto dump_err;
+
+ err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+ if (err)
+ goto dump_err;
+
+ reporter->dump_ts = jiffies;
+ reporter->dump_real_ts = ktime_get_real_ns();
+
+ return 0;
+
+dump_err:
+ devlink_health_dump_clear(reporter);
+ return err;
+}
+
+int devlink_health_report(struct devlink_health_reporter *reporter,
+ const char *msg, void *priv_ctx)
+{
+ enum devlink_health_reporter_state prev_health_state;
+ struct devlink *devlink = reporter->devlink;
+ unsigned long recover_ts_threshold;
+ int ret;
+
+ /* write a log message of the current error */
+ WARN_ON(!msg);
+ trace_devlink_health_report(devlink, reporter->ops->name, msg);
+ reporter->error_count++;
+ prev_health_state = reporter->health_state;
+ reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+
+ /* abort if the previous error wasn't recovered */
+ recover_ts_threshold = reporter->last_recovery_ts +
+ msecs_to_jiffies(reporter->graceful_period);
+ if (reporter->auto_recover &&
+ (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
+ (reporter->last_recovery_ts && reporter->recovery_count &&
+ time_is_after_jiffies(recover_ts_threshold)))) {
+ trace_devlink_health_recover_aborted(devlink,
+ reporter->ops->name,
+ reporter->health_state,
+ jiffies -
+ reporter->last_recovery_ts);
+ return -ECANCELED;
+ }
+
+ if (reporter->auto_dump) {
+ mutex_lock(&reporter->dump_lock);
+ /* store current dump of current error, for later analysis */
+ devlink_health_do_dump(reporter, priv_ctx, NULL);
+ mutex_unlock(&reporter->dump_lock);
+ }
+
+ if (!reporter->auto_recover)
+ return 0;
+
+ devl_lock(devlink);
+ ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL);
+ devl_unlock(devlink);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(devlink_health_report);
+
+void
+devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
+ enum devlink_health_reporter_state state)
+{
+ if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
+ state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
+ return;
+
+ if (reporter->health_state == state)
+ return;
+
+ reporter->health_state = state;
+ trace_devlink_health_reporter_state_update(reporter->devlink,
+ reporter->ops->name, state);
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
+
+int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ return devlink_health_reporter_recover(reporter, NULL, info->extack);
+}
+
+static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg,
+ int attrtype)
+{
+ struct devlink_fmsg_item *item;
+
+ item = kzalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->attrtype = attrtype;
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start);
+
+static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
+}
+
+int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end);
+
+#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN)
+
+static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
+{
+ struct devlink_fmsg_item *item;
+
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE)
+ return -EMSGSIZE;
+
+ item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->nla_type = NLA_NUL_STRING;
+ item->len = strlen(name) + 1;
+ item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME;
+ memcpy(&item->value, name, item->len);
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
+{
+ int err;
+
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_put_name(fmsg, name);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start);
+
+int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end);
+
+int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name)
+{
+ int err;
+
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start);
+
+int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ err = devlink_fmsg_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end);
+
+int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name)
+{
+ int err;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ fmsg->putting_binary = true;
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start);
+
+int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ if (!fmsg->putting_binary)
+ return -EINVAL;
+
+ fmsg->putting_binary = false;
+ return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end);
+
+static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
+ const void *value, u16 value_len,
+ u8 value_nla_type)
+{
+ struct devlink_fmsg_item *item;
+
+ if (value_len > DEVLINK_FMSG_MAX_SIZE)
+ return -EMSGSIZE;
+
+ item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->nla_type = value_nla_type;
+ item->len = value_len;
+ item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+ memcpy(&item->value, value, item->len);
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
+}
+
+static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
+}
+
+int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
+
+static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
+}
+
+int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1,
+ NLA_NUL_STRING);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_put);
+
+int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len)
+{
+ if (!fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put);
+
+int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ bool value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put);
+
+int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u8 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put);
+
+int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u32 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put);
+
+int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u64 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put);
+
+int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const char *value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put);
+
+int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const void *value, u32 value_len)
+{
+ u32 data_size;
+ int end_err;
+ u32 offset;
+ int err;
+
+ err = devlink_fmsg_binary_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ for (offset = 0; offset < value_len; offset += data_size) {
+ data_size = value_len - offset;
+ if (data_size > DEVLINK_FMSG_MAX_SIZE)
+ data_size = DEVLINK_FMSG_MAX_SIZE;
+ err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
+ if (err)
+ break;
+ /* Exit from loop with a break (instead of
+ * return) to make sure putting_binary is turned off in
+ * devlink_fmsg_binary_pair_nest_end
+ */
+ }
+
+ end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
+ if (end_err)
+ err = end_err;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put);
+
+static int
+devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+ switch (msg->nla_type) {
+ case NLA_FLAG:
+ case NLA_U8:
+ case NLA_U32:
+ case NLA_U64:
+ case NLA_NUL_STRING:
+ case NLA_BINARY:
+ return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,
+ msg->nla_type);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+ int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+ u8 tmp;
+
+ switch (msg->nla_type) {
+ case NLA_FLAG:
+ /* Always provide flag data, regardless of its value */
+ tmp = *(bool *)msg->value;
+
+ return nla_put_u8(skb, attrtype, tmp);
+ case NLA_U8:
+ return nla_put_u8(skb, attrtype, *(u8 *)msg->value);
+ case NLA_U32:
+ return nla_put_u32(skb, attrtype, *(u32 *)msg->value);
+ case NLA_U64:
+ return nla_put_u64_64bit(skb, attrtype, *(u64 *)msg->value,
+ DEVLINK_ATTR_PAD);
+ case NLA_NUL_STRING:
+ return nla_put_string(skb, attrtype, (char *)&msg->value);
+ case NLA_BINARY:
+ return nla_put(skb, attrtype, msg->len, (void *)&msg->value);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+ int *start)
+{
+ struct devlink_fmsg_item *item;
+ struct nlattr *fmsg_nlattr;
+ int err = 0;
+ int i = 0;
+
+ fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG);
+ if (!fmsg_nlattr)
+ return -EMSGSIZE;
+
+ list_for_each_entry(item, &fmsg->item_list, list) {
+ if (i < *start) {
+ i++;
+ continue;
+ }
+
+ switch (item->attrtype) {
+ case DEVLINK_ATTR_FMSG_OBJ_NEST_START:
+ case DEVLINK_ATTR_FMSG_PAIR_NEST_START:
+ case DEVLINK_ATTR_FMSG_ARR_NEST_START:
+ case DEVLINK_ATTR_FMSG_NEST_END:
+ err = nla_put_flag(skb, item->attrtype);
+ break;
+ case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA:
+ err = devlink_fmsg_item_fill_type(item, skb);
+ if (err)
+ break;
+ err = devlink_fmsg_item_fill_data(item, skb);
+ break;
+ case DEVLINK_ATTR_FMSG_OBJ_NAME:
+ err = nla_put_string(skb, item->attrtype,
+ (char *)&item->value);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ if (!err)
+ *start = ++i;
+ else
+ break;
+ }
+
+ nla_nest_end(skb, fmsg_nlattr);
+ return err;
+}
+
+static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
+ struct genl_info *info,
+ enum devlink_command cmd, int flags)
+{
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ bool last = false;
+ int index = 0;
+ void *hdr;
+ int err;
+
+ while (!last) {
+ int tmp_index = index;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, flags | NLM_F_MULTI, cmd);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+ if (!err)
+ last = true;
+ else if (err != -EMSGSIZE || tmp_index == index)
+ goto nla_put_failure;
+
+ genlmsg_end(skb, hdr);
+ err = genlmsg_reply(skb, info);
+ if (err)
+ return err;
+ }
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ enum devlink_command cmd)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ int index = state->idx;
+ int tmp_index = index;
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+ if ((err && err != -EMSGSIZE) || tmp_index == index)
+ goto nla_put_failure;
+
+ state->idx = index;
+ genlmsg_end(skb, hdr);
+ return skb->len;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ struct devlink_fmsg *fmsg;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->diagnose)
+ return -EOPNOTSUPP;
+
+ fmsg = devlink_fmsg_alloc();
+ if (!fmsg)
+ return -ENOMEM;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ goto out;
+
+ err = reporter->ops->diagnose(reporter, fmsg, info->extack);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_snd(fmsg, info,
+ DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0);
+
+out:
+ devlink_fmsg_free(fmsg);
+ return err;
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
+{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct devlink_health_reporter *reporter;
+ struct nlattr **attrs = info->attrs;
+ struct devlink *devlink;
+
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ if (IS_ERR(devlink))
+ return NULL;
+ devl_unlock(devlink);
+
+ reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
+ devlink_put(devlink);
+ return reporter;
+}
+
+int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_health_reporter *reporter;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_cb(cb);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->dump)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&reporter->dump_lock);
+ if (!state->idx) {
+ err = devlink_health_do_dump(reporter, NULL, cb->extack);
+ if (err)
+ goto unlock;
+ state->dump_ts = reporter->dump_ts;
+ }
+ if (!reporter->dump_fmsg || state->dump_ts != reporter->dump_ts) {
+ NL_SET_ERR_MSG(cb->extack, "Dump trampled, please retry");
+ err = -EAGAIN;
+ goto unlock;
+ }
+
+ err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb,
+ DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET);
+unlock:
+ mutex_unlock(&reporter->dump_lock);
+ return err;
+}
+
+int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->dump)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&reporter->dump_lock);
+ devlink_health_dump_clear(reporter);
+ mutex_unlock(&reporter->dump_lock);
+ return 0;
+}
+
+int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->test)
+ return -EOPNOTSUPP;
+
+ return reporter->ops->test(reporter, info->extack);
+}
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index f05ab093d231..dffca2f9bfa7 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -156,8 +156,8 @@ static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
return xa_load(&devlink->ports, port_index);
}
-static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
- struct nlattr **attrs)
+struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
{
if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
@@ -810,13 +810,12 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
}
if (!devlink_port_fn_state_valid(state)) {
WARN_ON_ONCE(1);
- NL_SET_ERR_MSG_MOD(extack, "Invalid state read from driver");
+ NL_SET_ERR_MSG(extack, "Invalid state read from driver");
return -EINVAL;
}
if (!devlink_port_fn_opstate_valid(opstate)) {
WARN_ON_ONCE(1);
- NL_SET_ERR_MSG_MOD(extack,
- "Invalid operational state read from driver");
+ NL_SET_ERR_MSG(extack, "Invalid operational state read from driver");
return -EINVAL;
}
if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) ||
@@ -1111,24 +1110,18 @@ devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_port *devlink_port;
unsigned long port_index;
- int idx = 0;
int err = 0;
- xa_for_each(&devlink->ports, port_index, devlink_port) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
+ xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
err = devlink_nl_port_fill(msg, devlink_port,
DEVLINK_CMD_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, cb->extack);
if (err) {
- state->idx = idx;
+ state->idx = port_index;
break;
}
- idx++;
}
return err;
@@ -1171,16 +1164,16 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port,
hw_addr = nla_data(attr);
hw_addr_len = nla_len(attr);
if (hw_addr_len > MAX_ADDR_LEN) {
- NL_SET_ERR_MSG_MOD(extack, "Port function hardware address too long");
+ NL_SET_ERR_MSG(extack, "Port function hardware address too long");
return -EINVAL;
}
if (port->type == DEVLINK_PORT_TYPE_ETH) {
if (hw_addr_len != ETH_ALEN) {
- NL_SET_ERR_MSG_MOD(extack, "Address must be 6 bytes for Ethernet device");
+ NL_SET_ERR_MSG(extack, "Address must be 6 bytes for Ethernet device");
return -EINVAL;
}
if (!is_unicast_ether_addr(hw_addr)) {
- NL_SET_ERR_MSG_MOD(extack, "Non-unicast hardware address unsupported");
+ NL_SET_ERR_MSG(extack, "Non-unicast hardware address unsupported");
return -EINVAL;
}
}
@@ -1256,7 +1249,7 @@ static int devlink_port_function_set(struct devlink_port *port,
err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
devlink_function_nl_policy, extack);
if (err < 0) {
- NL_SET_ERR_MSG_MOD(extack, "Fail to parse port function attributes");
+ NL_SET_ERR_MSG(extack, "Fail to parse port function attributes");
return err;
}
@@ -1335,14 +1328,14 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
if (!devlink_port->attrs.splittable) {
/* Split ports cannot be split. */
if (devlink_port->attrs.split)
- NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split further");
+ NL_SET_ERR_MSG(info->extack, "Port cannot be split further");
else
- NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split");
+ NL_SET_ERR_MSG(info->extack, "Port cannot be split");
return -EINVAL;
}
if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid split count");
+ NL_SET_ERR_MSG(info->extack, "Invalid split count");
return -EINVAL;
}
@@ -1406,7 +1399,7 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
!info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) {
- NL_SET_ERR_MSG_MOD(extack, "Port flavour or PCI PF are not specified");
+ NL_SET_ERR_MSG(extack, "Port flavour or PCI PF are not specified");
return -EINVAL;
}
new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]);
@@ -1454,7 +1447,7 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_INDEX)) {
- NL_SET_ERR_MSG_MOD(extack, "Port index is not specified");
+ NL_SET_ERR_MSG(extack, "Port index is not specified");
return -EINVAL;
}
port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
@@ -1496,13 +1489,13 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
return -ENODEV;
if (parent == devlink_rate) {
- NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed");
+ NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed");
return -EINVAL;
}
if (devlink_rate_is_node(devlink_rate) &&
devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
- NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node.");
+ NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node.");
return -EEXIST;
}
@@ -1611,16 +1604,16 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
if (type == DEVLINK_RATE_TYPE_LEAF) {
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs");
+ NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs");
+ NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
!ops->rate_leaf_parent_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs");
+ NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) {
@@ -1637,16 +1630,16 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
}
} else if (type == DEVLINK_RATE_TYPE_NODE) {
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes");
+ NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes");
+ NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
!ops->rate_node_parent_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes");
+ NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) {
@@ -1697,7 +1690,7 @@ static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
ops = devlink->ops;
if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
- NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported");
+ NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported");
return -EOPNOTSUPP;
}
@@ -1753,7 +1746,7 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
int err;
if (refcount_read(&rate_node->refcnt) > 1) {
- NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node.");
+ NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
return -EBUSY;
}
@@ -1941,26 +1934,26 @@ static int devlink_linecard_type_set(struct devlink_linecard *linecard,
mutex_lock(&linecard->state_lock);
if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
err = -EBUSY;
goto out;
}
if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
err = -EBUSY;
goto out;
}
linecard_type = devlink_linecard_type_lookup(linecard, type);
if (!linecard_type) {
- NL_SET_ERR_MSG_MOD(extack, "Unsupported line card type provided");
+ NL_SET_ERR_MSG(extack, "Unsupported line card type provided");
err = -EINVAL;
goto out;
}
if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
- NL_SET_ERR_MSG_MOD(extack, "Line card already provisioned");
+ NL_SET_ERR_MSG(extack, "Line card already provisioned");
err = -EBUSY;
/* Check if the line card is provisioned in the same
* way the user asks. In case it is, make the operation
@@ -2004,12 +1997,12 @@ static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
mutex_lock(&linecard->state_lock);
if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
err = -EBUSY;
goto out;
}
if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
err = -EBUSY;
goto out;
}
@@ -2022,7 +2015,7 @@ static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
}
if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is not provisioned");
+ NL_SET_ERR_MSG(extack, "Line card is not provisioned");
err = 0;
goto out;
}
@@ -2846,7 +2839,7 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
list_for_each_entry(devlink_rate, &devlink->rate_list, list)
if (devlink_rate_is_node(devlink_rate)) {
- NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
+ NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
return -EBUSY;
}
return 0;
@@ -3612,18 +3605,18 @@ devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
int err = 0;
if (size > resource->size_params.size_max) {
- NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+ NL_SET_ERR_MSG(extack, "Size larger than maximum");
err = -EINVAL;
}
if (size < resource->size_params.size_min) {
- NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+ NL_SET_ERR_MSG(extack, "Size smaller than minimum");
err = -EINVAL;
}
div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
if (reminder) {
- NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+ NL_SET_ERR_MSG(extack, "Wrong granularity");
err = -EINVAL;
}
@@ -3960,26 +3953,22 @@ static int devlink_param_driver_verify(const struct devlink_param *param)
}
static struct devlink_param_item *
-devlink_param_find_by_name(struct list_head *param_list,
- const char *param_name)
+devlink_param_find_by_name(struct xarray *params, const char *param_name)
{
struct devlink_param_item *param_item;
+ unsigned long param_id;
- list_for_each_entry(param_item, param_list, list)
+ xa_for_each(params, param_id, param_item) {
if (!strcmp(param_item->param->name, param_name))
return param_item;
+ }
return NULL;
}
static struct devlink_param_item *
-devlink_param_find_by_id(struct list_head *param_list, u32 param_id)
+devlink_param_find_by_id(struct xarray *params, u32 param_id)
{
- struct devlink_param_item *param_item;
-
- list_for_each_entry(param_item, param_list, list)
- if (param_item->param->id == param_id)
- return param_item;
- return NULL;
+ return xa_load(params, param_id);
}
static bool
@@ -4098,9 +4087,12 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
if (!devlink_param_cmode_is_supported(param, i))
continue;
if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
- if (!param_item->driverinit_value_valid)
+ if (param_item->driverinit_value_new_valid)
+ param_value[i] = param_item->driverinit_value_new;
+ else if (param_item->driverinit_value_valid)
+ param_value[i] = param_item->driverinit_value;
+ else
return -EOPNOTSUPP;
- param_value[i] = param_item->driverinit_value;
} else {
ctx.cmode = i;
err = devlink_param_get(devlink, param, &ctx);
@@ -4205,14 +4197,10 @@ devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_param_item *param_item;
- int idx = 0;
+ unsigned long param_id;
int err = 0;
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
+ xa_for_each_start(&devlink->params, param_id, param_item, state->idx) {
err = devlink_nl_param_fill(msg, devlink, 0, param_item,
DEVLINK_CMD_PARAM_GET,
NETLINK_CB(cb->skb).portid,
@@ -4221,10 +4209,9 @@ devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
if (err == -EOPNOTSUPP) {
err = 0;
} else if (err) {
- state->idx = idx;
+ state->idx = param_id;
break;
}
- idx++;
}
return err;
@@ -4310,8 +4297,7 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
}
static struct devlink_param_item *
-devlink_param_get_from_info(struct list_head *param_list,
- struct genl_info *info)
+devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
{
char *param_name;
@@ -4319,7 +4305,7 @@ devlink_param_get_from_info(struct list_head *param_list,
return NULL;
param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
- return devlink_param_find_by_name(param_list, param_name);
+ return devlink_param_find_by_name(params, param_name);
}
static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
@@ -4330,7 +4316,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
struct sk_buff *msg;
int err;
- param_item = devlink_param_get_from_info(&devlink->param_list, info);
+ param_item = devlink_param_get_from_info(&devlink->params, info);
if (!param_item)
return -EINVAL;
@@ -4351,7 +4337,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
unsigned int port_index,
- struct list_head *param_list,
+ struct xarray *params,
struct genl_info *info,
enum devlink_command cmd)
{
@@ -4363,7 +4349,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
union devlink_param_value value;
int err = 0;
- param_item = devlink_param_get_from_info(param_list, info);
+ param_item = devlink_param_get_from_info(params, info);
if (!param_item)
return -EINVAL;
param = param_item->param;
@@ -4388,11 +4374,8 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
return -EOPNOTSUPP;
if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
- if (param->type == DEVLINK_PARAM_TYPE_STRING)
- strcpy(param_item->driverinit_value.vstr, value.vstr);
- else
- param_item->driverinit_value = value;
- param_item->driverinit_value_valid = true;
+ param_item->driverinit_value_new = value;
+ param_item->driverinit_value_new_valid = true;
} else {
if (!param->set)
return -EOPNOTSUPP;
@@ -4412,28 +4395,28 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
- return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->param_list,
+ return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params,
info, DEVLINK_CMD_PARAM_NEW);
}
static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
- NL_SET_ERR_MSG_MOD(cb->extack, "Port params are not supported");
+ NL_SET_ERR_MSG(cb->extack, "Port params are not supported");
return msg->len;
}
static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported");
+ NL_SET_ERR_MSG(info->extack, "Port params are not supported");
return -EINVAL;
}
static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported");
+ NL_SET_ERR_MSG(info->extack, "Port params are not supported");
return -EINVAL;
}
@@ -5002,7 +4985,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
int err;
if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) {
- NL_SET_ERR_MSG_MOD(info->extack, "No region name provided");
+ NL_SET_ERR_MSG(info->extack, "No region name provided");
return -EINVAL;
}
@@ -5022,19 +5005,19 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
region = devlink_region_get_by_name(devlink, region_name);
if (!region) {
- NL_SET_ERR_MSG_MOD(info->extack, "The requested region does not exist");
+ NL_SET_ERR_MSG(info->extack, "The requested region does not exist");
return -EINVAL;
}
if (!region->ops->snapshot) {
- NL_SET_ERR_MSG_MOD(info->extack, "The requested region does not support taking an immediate snapshot");
+ NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot");
return -EOPNOTSUPP;
}
mutex_lock(&region->snapshot_lock);
if (region->cur_snapshots == region->max_snapshots) {
- NL_SET_ERR_MSG_MOD(info->extack, "The region has reached the maximum number of stored snapshots");
+ NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots");
err = -ENOSPC;
goto unlock;
}
@@ -5044,7 +5027,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
snapshot_id = nla_get_u32(snapshot_id_attr);
if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
- NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use");
+ NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use");
err = -EEXIST;
goto unlock;
}
@@ -5055,7 +5038,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
} else {
err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id");
+ NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id");
goto unlock;
}
}
@@ -5389,1332 +5372,6 @@ out_unlock:
return err;
}
-struct devlink_fmsg_item {
- struct list_head list;
- int attrtype;
- u8 nla_type;
- u16 len;
- int value[];
-};
-
-struct devlink_fmsg {
- struct list_head item_list;
- bool putting_binary; /* This flag forces enclosing of binary data
- * in an array brackets. It forces using
- * of designated API:
- * devlink_fmsg_binary_pair_nest_start()
- * devlink_fmsg_binary_pair_nest_end()
- */
-};
-
-static struct devlink_fmsg *devlink_fmsg_alloc(void)
-{
- struct devlink_fmsg *fmsg;
-
- fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL);
- if (!fmsg)
- return NULL;
-
- INIT_LIST_HEAD(&fmsg->item_list);
-
- return fmsg;
-}
-
-static void devlink_fmsg_free(struct devlink_fmsg *fmsg)
-{
- struct devlink_fmsg_item *item, *tmp;
-
- list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) {
- list_del(&item->list);
- kfree(item);
- }
- kfree(fmsg);
-}
-
-static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg,
- int attrtype)
-{
- struct devlink_fmsg_item *item;
-
- item = kzalloc(sizeof(*item), GFP_KERNEL);
- if (!item)
- return -ENOMEM;
-
- item->attrtype = attrtype;
- list_add_tail(&item->list, &fmsg->item_list);
-
- return 0;
-}
-
-int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start);
-
-static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
-}
-
-int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_end(fmsg);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end);
-
-#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN)
-
-static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
-{
- struct devlink_fmsg_item *item;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE)
- return -EMSGSIZE;
-
- item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL);
- if (!item)
- return -ENOMEM;
-
- item->nla_type = NLA_NUL_STRING;
- item->len = strlen(name) + 1;
- item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME;
- memcpy(&item->value, name, item->len);
- list_add_tail(&item->list, &fmsg->item_list);
-
- return 0;
-}
-
-int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
-{
- int err;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
- if (err)
- return err;
-
- err = devlink_fmsg_put_name(fmsg, name);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start);
-
-int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_end(fmsg);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end);
-
-int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name)
-{
- int err;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start);
-
-int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
-{
- int err;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- err = devlink_fmsg_nest_end(fmsg);
- if (err)
- return err;
-
- err = devlink_fmsg_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end);
-
-int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name)
-{
- int err;
-
- err = devlink_fmsg_arr_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- fmsg->putting_binary = true;
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start);
-
-int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg)
-{
- if (!fmsg->putting_binary)
- return -EINVAL;
-
- fmsg->putting_binary = false;
- return devlink_fmsg_arr_pair_nest_end(fmsg);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end);
-
-static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
- const void *value, u16 value_len,
- u8 value_nla_type)
-{
- struct devlink_fmsg_item *item;
-
- if (value_len > DEVLINK_FMSG_MAX_SIZE)
- return -EMSGSIZE;
-
- item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL);
- if (!item)
- return -ENOMEM;
-
- item->nla_type = value_nla_type;
- item->len = value_len;
- item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
- memcpy(&item->value, value, item->len);
- list_add_tail(&item->list, &fmsg->item_list);
-
- return 0;
-}
-
-static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
-}
-
-static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
-}
-
-int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
-
-static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
-}
-
-int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1,
- NLA_NUL_STRING);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_string_put);
-
-int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
- u16 value_len)
-{
- if (!fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put);
-
-int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
- bool value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_bool_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put);
-
-int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u8 value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_u8_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put);
-
-int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u32 value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_u32_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put);
-
-int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u64 value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_u64_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put);
-
-int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const char *value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_string_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put);
-
-int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const void *value, u32 value_len)
-{
- u32 data_size;
- int end_err;
- u32 offset;
- int err;
-
- err = devlink_fmsg_binary_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- for (offset = 0; offset < value_len; offset += data_size) {
- data_size = value_len - offset;
- if (data_size > DEVLINK_FMSG_MAX_SIZE)
- data_size = DEVLINK_FMSG_MAX_SIZE;
- err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
- if (err)
- break;
- /* Exit from loop with a break (instead of
- * return) to make sure putting_binary is turned off in
- * devlink_fmsg_binary_pair_nest_end
- */
- }
-
- end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
- if (end_err)
- err = end_err;
-
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put);
-
-static int
-devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb)
-{
- switch (msg->nla_type) {
- case NLA_FLAG:
- case NLA_U8:
- case NLA_U32:
- case NLA_U64:
- case NLA_NUL_STRING:
- case NLA_BINARY:
- return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,
- msg->nla_type);
- default:
- return -EINVAL;
- }
-}
-
-static int
-devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
-{
- int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
- u8 tmp;
-
- switch (msg->nla_type) {
- case NLA_FLAG:
- /* Always provide flag data, regardless of its value */
- tmp = *(bool *) msg->value;
-
- return nla_put_u8(skb, attrtype, tmp);
- case NLA_U8:
- return nla_put_u8(skb, attrtype, *(u8 *) msg->value);
- case NLA_U32:
- return nla_put_u32(skb, attrtype, *(u32 *) msg->value);
- case NLA_U64:
- return nla_put_u64_64bit(skb, attrtype, *(u64 *) msg->value,
- DEVLINK_ATTR_PAD);
- case NLA_NUL_STRING:
- return nla_put_string(skb, attrtype, (char *) &msg->value);
- case NLA_BINARY:
- return nla_put(skb, attrtype, msg->len, (void *) &msg->value);
- default:
- return -EINVAL;
- }
-}
-
-static int
-devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb,
- int *start)
-{
- struct devlink_fmsg_item *item;
- struct nlattr *fmsg_nlattr;
- int err = 0;
- int i = 0;
-
- fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG);
- if (!fmsg_nlattr)
- return -EMSGSIZE;
-
- list_for_each_entry(item, &fmsg->item_list, list) {
- if (i < *start) {
- i++;
- continue;
- }
-
- switch (item->attrtype) {
- case DEVLINK_ATTR_FMSG_OBJ_NEST_START:
- case DEVLINK_ATTR_FMSG_PAIR_NEST_START:
- case DEVLINK_ATTR_FMSG_ARR_NEST_START:
- case DEVLINK_ATTR_FMSG_NEST_END:
- err = nla_put_flag(skb, item->attrtype);
- break;
- case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA:
- err = devlink_fmsg_item_fill_type(item, skb);
- if (err)
- break;
- err = devlink_fmsg_item_fill_data(item, skb);
- break;
- case DEVLINK_ATTR_FMSG_OBJ_NAME:
- err = nla_put_string(skb, item->attrtype,
- (char *) &item->value);
- break;
- default:
- err = -EINVAL;
- break;
- }
- if (!err)
- *start = ++i;
- else
- break;
- }
-
- nla_nest_end(skb, fmsg_nlattr);
- return err;
-}
-
-static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
- struct genl_info *info,
- enum devlink_command cmd, int flags)
-{
- struct nlmsghdr *nlh;
- struct sk_buff *skb;
- bool last = false;
- int index = 0;
- void *hdr;
- int err;
-
- while (!last) {
- int tmp_index = index;
-
- skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
-
- hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
- &devlink_nl_family, flags | NLM_F_MULTI, cmd);
- if (!hdr) {
- err = -EMSGSIZE;
- goto nla_put_failure;
- }
-
- err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
- if (!err)
- last = true;
- else if (err != -EMSGSIZE || tmp_index == index)
- goto nla_put_failure;
-
- genlmsg_end(skb, hdr);
- err = genlmsg_reply(skb, info);
- if (err)
- return err;
- }
-
- skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
- nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = -EMSGSIZE;
- goto nla_put_failure;
- }
-
- return genlmsg_reply(skb, info);
-
-nla_put_failure:
- nlmsg_free(skb);
- return err;
-}
-
-static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb,
- struct netlink_callback *cb,
- enum devlink_command cmd)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- int index = state->idx;
- int tmp_index = index;
- void *hdr;
- int err;
-
- hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd);
- if (!hdr) {
- err = -EMSGSIZE;
- goto nla_put_failure;
- }
-
- err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
- if ((err && err != -EMSGSIZE) || tmp_index == index)
- goto nla_put_failure;
-
- state->idx = index;
- genlmsg_end(skb, hdr);
- return skb->len;
-
-nla_put_failure:
- genlmsg_cancel(skb, hdr);
- return err;
-}
-
-struct devlink_health_reporter {
- struct list_head list;
- void *priv;
- const struct devlink_health_reporter_ops *ops;
- struct devlink *devlink;
- struct devlink_port *devlink_port;
- struct devlink_fmsg *dump_fmsg;
- struct mutex dump_lock; /* lock parallel read/write from dump buffers */
- u64 graceful_period;
- bool auto_recover;
- bool auto_dump;
- u8 health_state;
- u64 dump_ts;
- u64 dump_real_ts;
- u64 error_count;
- u64 recovery_count;
- u64 last_recovery_ts;
-};
-
-void *
-devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
-{
- return reporter->priv;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
-
-static struct devlink_health_reporter *
-__devlink_health_reporter_find_by_name(struct list_head *reporter_list,
- const char *reporter_name)
-{
- struct devlink_health_reporter *reporter;
-
- list_for_each_entry(reporter, reporter_list, list)
- if (!strcmp(reporter->ops->name, reporter_name))
- return reporter;
- return NULL;
-}
-
-static struct devlink_health_reporter *
-devlink_health_reporter_find_by_name(struct devlink *devlink,
- const char *reporter_name)
-{
- return __devlink_health_reporter_find_by_name(&devlink->reporter_list,
- reporter_name);
-}
-
-static struct devlink_health_reporter *
-devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port,
- const char *reporter_name)
-{
- return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list,
- reporter_name);
-}
-
-static struct devlink_health_reporter *
-__devlink_health_reporter_create(struct devlink *devlink,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
-{
- struct devlink_health_reporter *reporter;
-
- if (WARN_ON(graceful_period && !ops->recover))
- return ERR_PTR(-EINVAL);
-
- reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
- if (!reporter)
- return ERR_PTR(-ENOMEM);
-
- reporter->priv = priv;
- reporter->ops = ops;
- reporter->devlink = devlink;
- reporter->graceful_period = graceful_period;
- reporter->auto_recover = !!ops->recover;
- reporter->auto_dump = !!ops->dump;
- mutex_init(&reporter->dump_lock);
- return reporter;
-}
-
-/**
- * devl_port_health_reporter_create - create devlink health reporter for
- * specified port instance
- *
- * @port: devlink_port which should contain the new reporter
- * @ops: ops
- * @graceful_period: to avoid recovery loops, in msecs
- * @priv: priv
- */
-struct devlink_health_reporter *
-devl_port_health_reporter_create(struct devlink_port *port,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
-{
- struct devlink_health_reporter *reporter;
-
- devl_assert_locked(port->devlink);
-
- if (__devlink_health_reporter_find_by_name(&port->reporter_list,
- ops->name))
- return ERR_PTR(-EEXIST);
-
- reporter = __devlink_health_reporter_create(port->devlink, ops,
- graceful_period, priv);
- if (IS_ERR(reporter))
- return reporter;
-
- reporter->devlink_port = port;
- list_add_tail(&reporter->list, &port->reporter_list);
- return reporter;
-}
-EXPORT_SYMBOL_GPL(devl_port_health_reporter_create);
-
-struct devlink_health_reporter *
-devlink_port_health_reporter_create(struct devlink_port *port,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
-{
- struct devlink_health_reporter *reporter;
- struct devlink *devlink = port->devlink;
-
- devl_lock(devlink);
- reporter = devl_port_health_reporter_create(port, ops,
- graceful_period, priv);
- devl_unlock(devlink);
- return reporter;
-}
-EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create);
-
-/**
- * devl_health_reporter_create - create devlink health reporter
- *
- * @devlink: devlink
- * @ops: ops
- * @graceful_period: to avoid recovery loops, in msecs
- * @priv: priv
- */
-struct devlink_health_reporter *
-devl_health_reporter_create(struct devlink *devlink,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
-{
- struct devlink_health_reporter *reporter;
-
- devl_assert_locked(devlink);
-
- if (devlink_health_reporter_find_by_name(devlink, ops->name))
- return ERR_PTR(-EEXIST);
-
- reporter = __devlink_health_reporter_create(devlink, ops,
- graceful_period, priv);
- if (IS_ERR(reporter))
- return reporter;
-
- list_add_tail(&reporter->list, &devlink->reporter_list);
- return reporter;
-}
-EXPORT_SYMBOL_GPL(devl_health_reporter_create);
-
-struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
-{
- struct devlink_health_reporter *reporter;
-
- devl_lock(devlink);
- reporter = devl_health_reporter_create(devlink, ops,
- graceful_period, priv);
- devl_unlock(devlink);
- return reporter;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
-
-static void
-devlink_health_reporter_free(struct devlink_health_reporter *reporter)
-{
- mutex_destroy(&reporter->dump_lock);
- if (reporter->dump_fmsg)
- devlink_fmsg_free(reporter->dump_fmsg);
- kfree(reporter);
-}
-
-/**
- * devl_health_reporter_destroy - destroy devlink health reporter
- *
- * @reporter: devlink health reporter to destroy
- */
-void
-devl_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
- devl_assert_locked(reporter->devlink);
-
- list_del(&reporter->list);
- devlink_health_reporter_free(reporter);
-}
-EXPORT_SYMBOL_GPL(devl_health_reporter_destroy);
-
-void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
- struct devlink *devlink = reporter->devlink;
-
- devl_lock(devlink);
- devl_health_reporter_destroy(reporter);
- devl_unlock(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
-
-static int
-devlink_nl_health_reporter_fill(struct sk_buff *msg,
- struct devlink_health_reporter *reporter,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- struct devlink *devlink = reporter->devlink;
- struct nlattr *reporter_attr;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- if (reporter->devlink_port) {
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index))
- goto genlmsg_cancel;
- }
- reporter_attr = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_HEALTH_REPORTER);
- if (!reporter_attr)
- goto genlmsg_cancel;
- if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
- reporter->ops->name))
- goto reporter_nest_cancel;
- if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
- reporter->health_state))
- goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
- reporter->error_count, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
- reporter->recovery_count, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->recover &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
- reporter->graceful_period,
- DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->recover &&
- nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
- reporter->auto_recover))
- goto reporter_nest_cancel;
- if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
- jiffies_to_msecs(reporter->dump_ts),
- DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
- reporter->dump_real_ts, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->dump &&
- nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
- reporter->auto_dump))
- goto reporter_nest_cancel;
-
- nla_nest_end(msg, reporter_attr);
- genlmsg_end(msg, hdr);
- return 0;
-
-reporter_nest_cancel:
- nla_nest_end(msg, reporter_attr);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_recover_notify(struct devlink_health_reporter *reporter,
- enum devlink_command cmd)
-{
- struct devlink *devlink = reporter->devlink;
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
- WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-void
-devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter)
-{
- reporter->recovery_count++;
- reporter->last_recovery_ts = jiffies;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done);
-
-static int
-devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
- void *priv_ctx, struct netlink_ext_ack *extack)
-{
- int err;
-
- if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY)
- return 0;
-
- if (!reporter->ops->recover)
- return -EOPNOTSUPP;
-
- err = reporter->ops->recover(reporter, priv_ctx, extack);
- if (err)
- return err;
-
- devlink_health_reporter_recovery_done(reporter);
- reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
- devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
-
- return 0;
-}
-
-static void
-devlink_health_dump_clear(struct devlink_health_reporter *reporter)
-{
- if (!reporter->dump_fmsg)
- return;
- devlink_fmsg_free(reporter->dump_fmsg);
- reporter->dump_fmsg = NULL;
-}
-
-static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
- void *priv_ctx,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- if (!reporter->ops->dump)
- return 0;
-
- if (reporter->dump_fmsg)
- return 0;
-
- reporter->dump_fmsg = devlink_fmsg_alloc();
- if (!reporter->dump_fmsg) {
- err = -ENOMEM;
- return err;
- }
-
- err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
- if (err)
- goto dump_err;
-
- err = reporter->ops->dump(reporter, reporter->dump_fmsg,
- priv_ctx, extack);
- if (err)
- goto dump_err;
-
- err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
- if (err)
- goto dump_err;
-
- reporter->dump_ts = jiffies;
- reporter->dump_real_ts = ktime_get_real_ns();
-
- return 0;
-
-dump_err:
- devlink_health_dump_clear(reporter);
- return err;
-}
-
-int devlink_health_report(struct devlink_health_reporter *reporter,
- const char *msg, void *priv_ctx)
-{
- enum devlink_health_reporter_state prev_health_state;
- struct devlink *devlink = reporter->devlink;
- unsigned long recover_ts_threshold;
- int ret;
-
- /* write a log message of the current error */
- WARN_ON(!msg);
- trace_devlink_health_report(devlink, reporter->ops->name, msg);
- reporter->error_count++;
- prev_health_state = reporter->health_state;
- reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
- devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
-
- /* abort if the previous error wasn't recovered */
- recover_ts_threshold = reporter->last_recovery_ts +
- msecs_to_jiffies(reporter->graceful_period);
- if (reporter->auto_recover &&
- (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
- (reporter->last_recovery_ts && reporter->recovery_count &&
- time_is_after_jiffies(recover_ts_threshold)))) {
- trace_devlink_health_recover_aborted(devlink,
- reporter->ops->name,
- reporter->health_state,
- jiffies -
- reporter->last_recovery_ts);
- return -ECANCELED;
- }
-
- if (reporter->auto_dump) {
- mutex_lock(&reporter->dump_lock);
- /* store current dump of current error, for later analysis */
- devlink_health_do_dump(reporter, priv_ctx, NULL);
- mutex_unlock(&reporter->dump_lock);
- }
-
- if (!reporter->auto_recover)
- return 0;
-
- devl_lock(devlink);
- ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL);
- devl_unlock(devlink);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(devlink_health_report);
-
-static struct devlink_health_reporter *
-devlink_health_reporter_get_from_attrs(struct devlink *devlink,
- struct nlattr **attrs)
-{
- struct devlink_port *devlink_port;
- char *reporter_name;
-
- if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
- return NULL;
-
- reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
- devlink_port = devlink_port_get_from_attrs(devlink, attrs);
- if (IS_ERR(devlink_port))
- return devlink_health_reporter_find_by_name(devlink,
- reporter_name);
- else
- return devlink_port_health_reporter_find_by_name(devlink_port,
- reporter_name);
-}
-
-static struct devlink_health_reporter *
-devlink_health_reporter_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- return devlink_health_reporter_get_from_attrs(devlink, info->attrs);
-}
-
-static struct devlink_health_reporter *
-devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
-{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- struct devlink_health_reporter *reporter;
- struct nlattr **attrs = info->attrs;
- struct devlink *devlink;
-
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
- if (IS_ERR(devlink))
- return NULL;
- devl_unlock(devlink);
-
- reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
- devlink_put(devlink);
- return reporter;
-}
-
-void
-devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
- enum devlink_health_reporter_state state)
-{
- if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
- state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
- return;
-
- if (reporter->health_state == state)
- return;
-
- reporter->health_state = state;
- trace_devlink_health_reporter_state_update(reporter->devlink,
- reporter->ops->name, state);
- devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
-
-static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
- struct sk_buff *msg;
- int err;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_health_reporter_fill(msg, reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- info->snd_portid, info->snd_seq,
- 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
- struct devlink *devlink,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_health_reporter *reporter;
- struct devlink_port *port;
- unsigned long port_index;
- int idx = 0;
- int err;
-
- list_for_each_entry(reporter, &devlink->reporter_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_health_reporter_fill(msg, reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- return err;
- }
- idx++;
- }
- xa_for_each(&devlink->ports, port_index, port) {
- list_for_each_entry(reporter, &port->reporter_list, list) {
- if (idx < state->idx) {
- idx++;
- continue;
- }
- err = devlink_nl_health_reporter_fill(msg, reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- state->idx = idx;
- return err;
- }
- idx++;
- }
- }
-
- return 0;
-}
-
-const struct devlink_cmd devl_cmd_health_reporter_get = {
- .dump_one = devlink_nl_cmd_health_reporter_get_dump_one,
-};
-
-static int
-devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->recover &&
- (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
- info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
- return -EOPNOTSUPP;
-
- if (!reporter->ops->dump &&
- info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
- return -EOPNOTSUPP;
-
- if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
- reporter->graceful_period =
- nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
-
- if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
- reporter->auto_recover =
- nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
-
- if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
- reporter->auto_dump =
- nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);
-
- return 0;
-}
-
-static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- return devlink_health_reporter_recover(reporter, NULL, info->extack);
-}
-
-static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
- struct devlink_fmsg *fmsg;
- int err;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->diagnose)
- return -EOPNOTSUPP;
-
- fmsg = devlink_fmsg_alloc();
- if (!fmsg)
- return -ENOMEM;
-
- err = devlink_fmsg_obj_nest_start(fmsg);
- if (err)
- goto out;
-
- err = reporter->ops->diagnose(reporter, fmsg, info->extack);
- if (err)
- goto out;
-
- err = devlink_fmsg_obj_nest_end(fmsg);
- if (err)
- goto out;
-
- err = devlink_fmsg_snd(fmsg, info,
- DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0);
-
-out:
- devlink_fmsg_free(fmsg);
- return err;
-}
-
-static int
-devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- struct devlink_nl_dump_state *state = devlink_dump_state(cb);
- struct devlink_health_reporter *reporter;
- int err;
-
- reporter = devlink_health_reporter_get_from_cb(cb);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->dump)
- return -EOPNOTSUPP;
-
- mutex_lock(&reporter->dump_lock);
- if (!state->idx) {
- err = devlink_health_do_dump(reporter, NULL, cb->extack);
- if (err)
- goto unlock;
- state->dump_ts = reporter->dump_ts;
- }
- if (!reporter->dump_fmsg || state->dump_ts != reporter->dump_ts) {
- NL_SET_ERR_MSG_MOD(cb->extack, "Dump trampled, please retry");
- err = -EAGAIN;
- goto unlock;
- }
-
- err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb,
- DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET);
-unlock:
- mutex_unlock(&reporter->dump_lock);
- return err;
-}
-
-static int
-devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->dump)
- return -EOPNOTSUPP;
-
- mutex_lock(&reporter->dump_lock);
- devlink_health_dump_clear(reporter);
- mutex_unlock(&reporter->dump_lock);
- return 0;
-}
-
-static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->test)
- return -EOPNOTSUPP;
-
- return reporter->ops->test(reporter, info->extack);
-}
-
struct devlink_stats {
u64_stats_t rx_bytes;
u64_stats_t rx_packets;
@@ -7025,7 +5682,7 @@ static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
trap_item = devlink_trap_item_get_from_info(devlink, info);
if (!trap_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap");
return -ENOENT;
}
@@ -7088,7 +5745,7 @@ static int __devlink_trap_action_set(struct devlink *devlink,
if (trap_item->action != trap_action &&
trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
- NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping");
+ NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping");
return 0;
}
@@ -7114,7 +5771,7 @@ static int devlink_trap_action_set(struct devlink *devlink,
err = devlink_trap_action_get_from_info(info, &trap_action);
if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ NL_SET_ERR_MSG(info->extack, "Invalid trap action");
return -EINVAL;
}
@@ -7134,7 +5791,7 @@ static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
trap_item = devlink_trap_item_get_from_info(devlink, info);
if (!trap_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap");
return -ENOENT;
}
@@ -7236,7 +5893,7 @@ static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
group_item = devlink_trap_group_item_get_from_info(devlink, info);
if (!group_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap group");
return -ENOENT;
}
@@ -7345,7 +6002,7 @@ devlink_trap_group_action_set(struct devlink *devlink,
err = devlink_trap_action_get_from_info(info, &trap_action);
if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ NL_SET_ERR_MSG(info->extack, "Invalid trap action");
return -EINVAL;
}
@@ -7379,7 +6036,7 @@ static int devlink_trap_group_set(struct devlink *devlink,
policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
if (policer_id && !policer_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
return -ENOENT;
}
policer = policer_item ? policer_item->policer : NULL;
@@ -7408,7 +6065,7 @@ static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
group_item = devlink_trap_group_item_get_from_info(devlink, info);
if (!group_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap group");
return -ENOENT;
}
@@ -7425,7 +6082,7 @@ static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
err_trap_group_set:
if (modified)
- NL_SET_ERR_MSG_MOD(extack, "Trap group set failed, but some changes were committed already");
+ NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already");
return err;
}
@@ -7530,7 +6187,7 @@ static int devlink_nl_cmd_trap_policer_get_doit(struct sk_buff *skb,
policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
if (!policer_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
return -ENOENT;
}
@@ -7605,22 +6262,22 @@ devlink_trap_policer_set(struct devlink *devlink,
burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]);
if (rate < policer_item->policer->min_rate) {
- NL_SET_ERR_MSG_MOD(extack, "Policer rate lower than limit");
+ NL_SET_ERR_MSG(extack, "Policer rate lower than limit");
return -EINVAL;
}
if (rate > policer_item->policer->max_rate) {
- NL_SET_ERR_MSG_MOD(extack, "Policer rate higher than limit");
+ NL_SET_ERR_MSG(extack, "Policer rate higher than limit");
return -EINVAL;
}
if (burst < policer_item->policer->min_burst) {
- NL_SET_ERR_MSG_MOD(extack, "Policer burst size lower than limit");
+ NL_SET_ERR_MSG(extack, "Policer burst size lower than limit");
return -EINVAL;
}
if (burst > policer_item->policer->max_burst) {
- NL_SET_ERR_MSG_MOD(extack, "Policer burst size higher than limit");
+ NL_SET_ERR_MSG(extack, "Policer burst size higher than limit");
return -EINVAL;
}
@@ -7650,7 +6307,7 @@ static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
if (!policer_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
return -ENOENT;
}
@@ -8044,6 +6701,7 @@ void devlink_notify_register(struct devlink *devlink)
struct devlink_rate *rate_node;
struct devlink_region *region;
unsigned long port_index;
+ unsigned long param_id;
devlink_notify(devlink, DEVLINK_CMD_NEW);
list_for_each_entry(linecard, &devlink->linecard_list, list)
@@ -8069,7 +6727,7 @@ void devlink_notify_register(struct devlink *devlink)
list_for_each_entry(region, &devlink->region_list, list)
devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
- list_for_each_entry(param_item, &devlink->param_list, list)
+ xa_for_each(&devlink->params, param_id, param_item)
devlink_param_notify(devlink, 0, param_item,
DEVLINK_CMD_PARAM_NEW);
}
@@ -8084,8 +6742,9 @@ void devlink_notify_unregister(struct devlink *devlink)
struct devlink_rate *rate_node;
struct devlink_region *region;
unsigned long port_index;
+ unsigned long param_id;
- list_for_each_entry_reverse(param_item, &devlink->param_list, list)
+ xa_for_each(&devlink->params, param_id, param_item)
devlink_param_notify(devlink, 0, param_item,
DEVLINK_CMD_PARAM_DEL);
@@ -9512,9 +8171,10 @@ static int devlink_param_register(struct devlink *devlink,
const struct devlink_param *param)
{
struct devlink_param_item *param_item;
+ int err;
WARN_ON(devlink_param_verify(param));
- WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name));
+ WARN_ON(devlink_param_find_by_name(&devlink->params, param->name));
if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
WARN_ON(param->get || param->set);
@@ -9527,9 +8187,16 @@ static int devlink_param_register(struct devlink *devlink,
param_item->param = param;
- list_add_tail(&param_item->list, &devlink->param_list);
+ err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL);
+ if (err)
+ goto err_xa_insert;
+
devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
return 0;
+
+err_xa_insert:
+ kfree(param_item);
+ return err;
}
static void devlink_param_unregister(struct devlink *devlink,
@@ -9537,12 +8204,11 @@ static void devlink_param_unregister(struct devlink *devlink,
{
struct devlink_param_item *param_item;
- param_item =
- devlink_param_find_by_name(&devlink->param_list, param->name);
+ param_item = devlink_param_find_by_id(&devlink->params, param->id);
if (WARN_ON(!param_item))
return;
devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL);
- list_del(&param_item->list);
+ xa_erase(&devlink->params, param->id);
kfree(param_item);
}
@@ -9630,22 +8296,32 @@ EXPORT_SYMBOL_GPL(devlink_params_unregister);
*
* @devlink: devlink
* @param_id: parameter ID
- * @init_val: value of parameter in driverinit configuration mode
+ * @val: pointer to store the value of parameter in driverinit
+ * configuration mode
*
* This function should be used by the driver to get driverinit
* configuration for initialization after reload command.
+ *
+ * Note that lockless call of this function relies on the
+ * driver to maintain following basic sane behavior:
+ * 1) Driver ensures a call to this function cannot race with
+ * registering/unregistering the parameter with the same parameter ID.
+ * 2) Driver ensures a call to this function cannot race with
+ * devl_param_driverinit_value_set() call with the same parameter ID.
+ * 3) Driver ensures a call to this function cannot race with
+ * reload operation.
+ * If the driver is not able to comply, it has to take the devlink->lock
+ * while calling this.
*/
int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
- union devlink_param_value *init_val)
+ union devlink_param_value *val)
{
struct devlink_param_item *param_item;
- lockdep_assert_held(&devlink->lock);
-
if (WARN_ON(!devlink_reload_supported(devlink->ops)))
return -EOPNOTSUPP;
- param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
if (!param_item)
return -EINVAL;
@@ -9656,10 +8332,7 @@ int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
DEVLINK_PARAM_CMODE_DRIVERINIT)))
return -EOPNOTSUPP;
- if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
- strcpy(init_val->vstr, param_item->driverinit_value.vstr);
- else
- *init_val = param_item->driverinit_value;
+ *val = param_item->driverinit_value;
return 0;
}
@@ -9682,7 +8355,9 @@ void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
{
struct devlink_param_item *param_item;
- param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+ devl_assert_locked(devlink);
+
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
if (WARN_ON(!param_item))
return;
@@ -9690,16 +8365,29 @@ void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
DEVLINK_PARAM_CMODE_DRIVERINIT)))
return;
- if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
- strcpy(param_item->driverinit_value.vstr, init_val.vstr);
- else
- param_item->driverinit_value = init_val;
+ param_item->driverinit_value = init_val;
param_item->driverinit_value_valid = true;
devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
}
EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set);
+void devlink_params_driverinit_load_new(struct devlink *devlink)
+{
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+
+ xa_for_each(&devlink->params, param_id, param_item) {
+ if (!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT) ||
+ !param_item->driverinit_value_new_valid)
+ continue;
+ param_item->driverinit_value = param_item->driverinit_value_new;
+ param_item->driverinit_value_valid = true;
+ param_item->driverinit_value_new_valid = false;
+ }
+}
+
/**
* devl_param_value_changed - notify devlink on a parameter's value
* change. Should be called by the driver
@@ -9716,7 +8404,7 @@ void devl_param_value_changed(struct devlink *devlink, u32 param_id)
{
struct devlink_param_item *param_item;
- param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
WARN_ON(!param_item);
devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index ae0732460e88..f7b189ed96b2 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -413,7 +413,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT
extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1];
extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1];
extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1];
-extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH + 1];
+extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_PUSH + 1];
extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index 2a2d3539630c..f358cd57d094 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -56,7 +56,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */
nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */
nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */
- nla_total_size(sizeof(u8))); /* _RINGS_TX_PUSH */
+ nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */
+ nla_total_size(sizeof(u8))); /* _RINGS_RX_PUSH */
}
static int rings_fill_reply(struct sk_buff *skb,
@@ -96,7 +97,8 @@ static int rings_fill_reply(struct sk_buff *skb,
kr->tcp_data_split))) ||
(kr->cqe_size &&
(nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) ||
- nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push))
+ nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push) ||
+ nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push))
return -EMSGSIZE;
return 0;
@@ -114,6 +116,7 @@ const struct nla_policy ethnl_rings_set_policy[] = {
[ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1),
[ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1),
[ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
};
static int
@@ -147,6 +150,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info,
return -EOPNOTSUPP;
}
+ if (tb[ETHTOOL_A_RINGS_RX_PUSH] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_PUSH)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_RX_PUSH],
+ "setting rx push not supported");
+ return -EOPNOTSUPP;
+ }
+
return ops->get_ringparam && ops->set_ringparam ? 1 : -EOPNOTSUPP;
}
@@ -176,6 +187,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod);
ethnl_update_u8(&kernel_ringparam.tx_push,
tb[ETHTOOL_A_RINGS_TX_PUSH], &mod);
+ ethnl_update_u8(&kernel_ringparam.rx_push,
+ tb[ETHTOOL_A_RINGS_RX_PUSH], &mod);
if (!mod)
return 0;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 2c778b013cb0..8db6747f892f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -156,7 +156,6 @@ void inet_sock_destruct(struct sock *sk)
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1));
- sk_refcnt_debug_dec(sk);
}
EXPORT_SYMBOL(inet_sock_destruct);
@@ -357,8 +356,6 @@ lookup_protocol:
inet->mc_list = NULL;
inet->rcv_tos = 0;
- sk_refcnt_debug_inc(sk);
-
if (inet->inet_num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7d206a10ad14..eedcf4146d29 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1199,8 +1199,6 @@ void inet_csk_destroy_sock(struct sock *sk)
xfrm_sk_free_policy(sk);
- sk_refcnt_debug_release(sk);
-
this_cpu_dec(*sk->sk_prot->orphan_count);
sock_put(sk);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index beed32fff484..40052414c7c7 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -77,9 +77,6 @@ void inet_twsk_free(struct inet_timewait_sock *tw)
{
struct module *owner = tw->tw_prot->owner;
twsk_destructor((struct sock *)tw);
-#ifdef SOCK_REFCNT_DEBUG
- pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw);
-#endif
kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
module_put(owner);
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 847934763868..38689bedfce7 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -239,16 +239,6 @@ lookup_protocol:
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
- /*
- * Increment only the relevant sk_prot->socks debug field, this changes
- * the previous behaviour of incrementing both the equivalent to
- * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
- *
- * This allows better debug granularity as we'll know exactly how many
- * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
- * transport protocol socks. -acme
- */
- sk_refcnt_debug_inc(sk);
if (inet->inet_num) {
/* It assumes that any protocol which allows
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index c9346515e24d..f32bc98155bf 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -813,16 +813,19 @@ out_bh_enable:
local_bh_enable();
}
-void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
+ u8 code, __be32 info)
{
struct inet6_skb_parm *opt = IP6CB(skb);
+ struct net *net = dev_net(skb->dev);
const struct inet6_protocol *ipprot;
+ enum skb_drop_reason reason;
int inner_offset;
__be16 frag_off;
u8 nexthdr;
- struct net *net = dev_net(skb->dev);
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
+ if (reason != SKB_NOT_DROPPED_YET)
goto out;
seg6_icmp_srh(skb, opt);
@@ -832,14 +835,17 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
/* now skip over extension headers */
inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
&nexthdr, &frag_off);
- if (inner_offset < 0)
+ if (inner_offset < 0) {
+ SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
goto out;
+ }
} else {
inner_offset = sizeof(struct ipv6hdr);
}
/* Checkin header including 8 bytes of inner protocol header. */
- if (!pskb_may_pull(skb, inner_offset+8))
+ reason = pskb_may_pull_reason(skb, inner_offset + 8);
+ if (reason != SKB_NOT_DROPPED_YET)
goto out;
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
@@ -854,10 +860,11 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
ipprot->err_handler(skb, opt, type, code, inner_offset, info);
raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
- return;
+ return SKB_CONSUMED;
out:
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ return reason;
}
/*
@@ -953,7 +960,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
case ICMPV6_DEST_UNREACH:
case ICMPV6_TIME_EXCEED:
case ICMPV6_PARAMPROB:
- icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+ reason = icmpv6_notify(skb, type, hdr->icmp6_code,
+ hdr->icmp6_mtu);
break;
case NDISC_ROUTER_SOLICITATION:
@@ -961,7 +969,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
case NDISC_NEIGHBOUR_SOLICITATION:
case NDISC_NEIGHBOUR_ADVERTISEMENT:
case NDISC_REDIRECT:
- ndisc_rcv(skb);
+ reason = ndisc_rcv(skb);
break;
case ICMPV6_MGM_QUERY:
@@ -995,7 +1003,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
* must pass to upper level
*/
- icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+ reason = icmpv6_notify(skb, type, hdr->icmp6_code,
+ hdr->icmp6_mtu);
}
/* until the v6 path can be better sorted assume failure and
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9ce51680290b..2917dd8d198c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -464,13 +464,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
__ipv6_sock_mc_close(sk);
__ipv6_sock_ac_close(sk);
- /*
- * Sock is moving from IPv6 to IPv4 (sk_prot), so
- * remove it from the refcnt debug socks count in the
- * original family...
- */
- sk_refcnt_debug_dec(sk);
-
if (sk->sk_protocol == IPPROTO_TCP) {
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -507,11 +500,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
inet6_cleanup_sock(sk);
- /*
- * ... and add it to the refcnt debug socks count
- * in the new family. -acme
- */
- sk_refcnt_debug_inc(sk);
module_put(THIS_MODULE);
retv = 0;
break;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3a553494ff16..9548b5a44714 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1804,15 +1804,16 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
return false;
}
-int ndisc_rcv(struct sk_buff *skb)
+enum skb_drop_reason ndisc_rcv(struct sk_buff *skb)
{
struct nd_msg *msg;
+ SKB_DR(reason);
if (ndisc_suppress_frag_ndisc(skb))
- return 0;
+ return SKB_DROP_REASON_IPV6_NDISC_FRAG;
if (skb_linearize(skb))
- return 0;
+ return SKB_DROP_REASON_NOMEM;
msg = (struct nd_msg *)skb_transport_header(skb);
@@ -1821,13 +1822,13 @@ int ndisc_rcv(struct sk_buff *skb)
if (ipv6_hdr(skb)->hop_limit != 255) {
ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n",
ipv6_hdr(skb)->hop_limit);
- return 0;
+ return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT;
}
if (msg->icmph.icmp6_code != 0) {
ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n",
msg->icmph.icmp6_code);
- return 0;
+ return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE;
}
switch (msg->icmph.icmp6_type) {
@@ -1853,7 +1854,7 @@ int ndisc_rcv(struct sk_buff *skb)
break;
}
- return 0;
+ return reason;
}
static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 487f8e98deaa..dd433cc265c8 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -109,8 +109,15 @@ struct bpf_lwt_prog {
#define next_csid_chk_lcnode_fn_bits(flen) \
next_csid_chk_lcblock_bits(flen)
+#define SEG6_F_LOCAL_FLV_OP(flvname) BIT(SEG6_LOCAL_FLV_OP_##flvname)
+#define SEG6_F_LOCAL_FLV_PSP SEG6_F_LOCAL_FLV_OP(PSP)
+
+/* Supported RFC8986 Flavor operations are reported in this bitmask */
+#define SEG6_LOCAL_FLV8986_SUPP_OPS SEG6_F_LOCAL_FLV_PSP
+
/* Supported Flavor operations are reported in this bitmask */
-#define SEG6_LOCAL_FLV_SUPP_OPS (BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID))
+#define SEG6_LOCAL_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_OP(NEXT_CSID) | \
+ SEG6_LOCAL_FLV8986_SUPP_OPS)
struct seg6_flavors_info {
/* Flavor operations */
@@ -364,6 +371,14 @@ static void seg6_next_csid_advance_arg(struct in6_addr *addr,
memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects);
}
+static int input_action_end_finish(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ seg6_lookup_nexthop(skb, NULL, 0);
+
+ return dst_input(skb);
+}
+
static int input_action_end_core(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
@@ -375,9 +390,7 @@ static int input_action_end_core(struct sk_buff *skb,
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- seg6_lookup_nexthop(skb, NULL, 0);
-
- return dst_input(skb);
+ return input_action_end_finish(skb, slwt);
drop:
kfree_skb(skb);
@@ -395,9 +408,7 @@ static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
/* update DA */
seg6_next_csid_advance_arg(daddr, finfo);
- seg6_lookup_nexthop(skb, NULL, 0);
-
- return dst_input(skb);
+ return input_action_end_finish(skb, slwt);
}
static bool seg6_next_csid_enabled(__u32 fops)
@@ -405,15 +416,331 @@ static bool seg6_next_csid_enabled(__u32 fops)
return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID);
}
+/* We describe the packet state in relation to the absence/presence of the SRH
+ * and the Segment Left (SL) field.
+ * For our purposes, it is not necessary to record the exact value of the SL
+ * when the SID List consists of two or more segments.
+ */
+enum seg6_local_pktinfo {
+ /* the order really matters! */
+ SEG6_LOCAL_PKTINFO_NOHDR = 0,
+ SEG6_LOCAL_PKTINFO_SL_ZERO,
+ SEG6_LOCAL_PKTINFO_SL_ONE,
+ SEG6_LOCAL_PKTINFO_SL_MORE,
+ __SEG6_LOCAL_PKTINFO_MAX,
+};
+
+#define SEG6_LOCAL_PKTINFO_MAX (__SEG6_LOCAL_PKTINFO_MAX - 1)
+
+static enum seg6_local_pktinfo seg6_get_srh_pktinfo(struct ipv6_sr_hdr *srh)
+{
+ __u8 sgl;
+
+ if (!srh)
+ return SEG6_LOCAL_PKTINFO_NOHDR;
+
+ sgl = srh->segments_left;
+ if (sgl < 2)
+ return SEG6_LOCAL_PKTINFO_SL_ZERO + sgl;
+
+ return SEG6_LOCAL_PKTINFO_SL_MORE;
+}
+
+enum seg6_local_flv_action {
+ SEG6_LOCAL_FLV_ACT_UNSPEC = 0,
+ SEG6_LOCAL_FLV_ACT_END,
+ SEG6_LOCAL_FLV_ACT_PSP,
+ SEG6_LOCAL_FLV_ACT_USP,
+ SEG6_LOCAL_FLV_ACT_USD,
+ __SEG6_LOCAL_FLV_ACT_MAX
+};
+
+#define SEG6_LOCAL_FLV_ACT_MAX (__SEG6_LOCAL_FLV_ACT_MAX - 1)
+
+/* The action table for RFC8986 flavors (see the flv8986_act_tbl below)
+ * contains the actions (i.e. processing operations) to be applied on packets
+ * when flavors are configured for an End* behavior.
+ * By combining the pkinfo data and from the flavors mask, the macro
+ * computes the index used to access the elements (actions) stored in the
+ * action table. The index is structured as follows:
+ *
+ * index
+ * _______________/\________________
+ * / \
+ * +----------------+----------------+
+ * | pf | afm |
+ * +----------------+----------------+
+ * ph-1 ... p1 p0 fk-1 ... f1 f0
+ * MSB LSB
+ *
+ * where:
+ * - 'afm' (adjusted flavor mask) is the mask containing a combination of the
+ * RFC8986 flavors currently supported. 'afm' corresponds to the @fm
+ * argument of the macro whose value is righ-shifted by 1 bit. By doing so,
+ * we discard the SEG6_LOCAL_FLV_OP_UNSPEC flag (bit 0 in @fm) which is
+ * never used here;
+ * - 'pf' encodes the packet info (pktinfo) regarding the presence/absence of
+ * the SRH, SL = 0, etc. 'pf' is set with the value of @pf provided as
+ * argument to the macro.
+ */
+#define flv8986_act_tbl_idx(pf, fm) \
+ ((((pf) << bits_per(SEG6_LOCAL_FLV8986_SUPP_OPS)) | \
+ ((fm) & SEG6_LOCAL_FLV8986_SUPP_OPS)) >> SEG6_LOCAL_FLV_OP_PSP)
+
+/* We compute the size of the action table by considering the RFC8986 flavors
+ * actually supported by the kernel. In this way, the size is automatically
+ * adjusted when new flavors are supported.
+ */
+#define FLV8986_ACT_TBL_SIZE \
+ roundup_pow_of_two(flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_MAX, \
+ SEG6_LOCAL_FLV8986_SUPP_OPS))
+
+/* tbl_cfg(act, pf, fm) macro is used to easily configure the action
+ * table; it accepts 3 arguments:
+ * i) @act, the suffix from SEG6_LOCAL_FLV_ACT_{act} representing
+ * the action that should be applied on the packet;
+ * ii) @pf, the suffix from SEG6_LOCAL_PKTINFO_{pf} reporting the packet
+ * info about the lack/presence of SRH, SRH with SL = 0, etc;
+ * iii) @fm, the mask of flavors.
+ */
+#define tbl_cfg(act, pf, fm) \
+ [flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_##pf, \
+ (fm))] = SEG6_LOCAL_FLV_ACT_##act
+
+/* shorthand for improving readability */
+#define F_PSP SEG6_F_LOCAL_FLV_PSP
+
+/* The table contains, for each combination of the pktinfo data and
+ * flavors, the action that should be taken on a packet (e.g.
+ * "standard" Endpoint processing, Penultimate Segment Pop, etc).
+ *
+ * By default, table entries not explicitly configured are initialized with the
+ * SEG6_LOCAL_FLV_ACT_UNSPEC action, which generally has the effect of
+ * discarding the processed packet.
+ */
+static const u8 flv8986_act_tbl[FLV8986_ACT_TBL_SIZE] = {
+ /* PSP variant for packet where SRH with SL = 1 */
+ tbl_cfg(PSP, SL_ONE, F_PSP),
+ /* End for packet where the SRH with SL > 1*/
+ tbl_cfg(END, SL_MORE, F_PSP),
+};
+
+#undef F_PSP
+#undef tbl_cfg
+
+/* For each flavor defined in RFC8986 (or a combination of them) an action is
+ * performed on the packet. The specific action depends on:
+ * - info extracted from the packet (i.e. pktinfo data) regarding the
+ * lack/presence of the SRH, and if the SRH is available, on the value of
+ * Segment Left field;
+ * - the mask of flavors configured for the specific SRv6 End* behavior.
+ *
+ * The function combines both the pkinfo and the flavors mask to evaluate the
+ * corresponding action to be taken on the packet.
+ */
+static enum seg6_local_flv_action
+seg6_local_flv8986_act_lookup(enum seg6_local_pktinfo pinfo, __u32 flvmask)
+{
+ unsigned long index;
+
+ /* check if the provided mask of flavors is supported */
+ if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS))
+ return SEG6_LOCAL_FLV_ACT_UNSPEC;
+
+ index = flv8986_act_tbl_idx(pinfo, flvmask);
+ if (unlikely(index >= FLV8986_ACT_TBL_SIZE))
+ return SEG6_LOCAL_FLV_ACT_UNSPEC;
+
+ return flv8986_act_tbl[index];
+}
+
+/* skb->data must be aligned with skb->network_header */
+static bool seg6_pop_srh(struct sk_buff *skb, int srhoff)
+{
+ struct ipv6_sr_hdr *srh;
+ struct ipv6hdr *iph;
+ __u8 srh_nexthdr;
+ int thoff = -1;
+ int srhlen;
+ int nhlen;
+
+ if (unlikely(srhoff < sizeof(*iph) ||
+ !pskb_may_pull(skb, srhoff + sizeof(*srh))))
+ return false;
+
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+ srhlen = ipv6_optlen(srh);
+
+ /* we are about to mangle the pkt, let's check if we can write on it */
+ if (unlikely(skb_ensure_writable(skb, srhoff + srhlen)))
+ return false;
+
+ /* skb_ensure_writable() may change skb pointers; evaluate srh again */
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+ srh_nexthdr = srh->nexthdr;
+
+ if (unlikely(!skb_transport_header_was_set(skb)))
+ goto pull;
+
+ nhlen = skb_network_header_len(skb);
+ /* we have to deal with the transport header: it could be set before
+ * the SRH, after the SRH, or within it (which is considered wrong,
+ * however).
+ */
+ if (likely(nhlen <= srhoff))
+ thoff = nhlen;
+ else if (nhlen >= srhoff + srhlen)
+ /* transport_header is set after the SRH */
+ thoff = nhlen - srhlen;
+ else
+ /* transport_header falls inside the SRH; hence, we can't
+ * restore the transport_header pointer properly after
+ * SRH removing operation.
+ */
+ return false;
+pull:
+ /* we need to pop the SRH:
+ * 1) first of all, we pull out everything from IPv6 header up to SRH
+ * (included) evaluating also the rcsum;
+ * 2) we overwrite (and then remove) the SRH by properly moving the
+ * IPv6 along with any extension header that precedes the SRH;
+ * 3) At the end, we push back the pulled headers (except for SRH,
+ * obviously).
+ */
+ skb_pull_rcsum(skb, srhoff + srhlen);
+ memmove(skb_network_header(skb) + srhlen, skb_network_header(skb),
+ srhoff);
+ skb_push(skb, srhoff);
+
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+ if (likely(thoff >= 0))
+ skb_set_transport_header(skb, thoff);
+
+ iph = ipv6_hdr(skb);
+ if (iph->nexthdr == NEXTHDR_ROUTING) {
+ iph->nexthdr = srh_nexthdr;
+ } else {
+ /* we must look for the extension header (EXTH, for short) that
+ * immediately precedes the SRH we have just removed.
+ * Then, we update the value of the EXTH nexthdr with the one
+ * contained in the SRH nexthdr.
+ */
+ unsigned int off = sizeof(*iph);
+ struct ipv6_opt_hdr *hp, _hdr;
+ __u8 nexthdr = iph->nexthdr;
+
+ for (;;) {
+ if (unlikely(!ipv6_ext_hdr(nexthdr) ||
+ nexthdr == NEXTHDR_NONE))
+ return false;
+
+ hp = skb_header_pointer(skb, off, sizeof(_hdr), &_hdr);
+ if (unlikely(!hp))
+ return false;
+
+ if (hp->nexthdr == NEXTHDR_ROUTING) {
+ hp->nexthdr = srh_nexthdr;
+ break;
+ }
+
+ switch (nexthdr) {
+ case NEXTHDR_FRAGMENT:
+ fallthrough;
+ case NEXTHDR_AUTH:
+ /* we expect SRH before FRAG and AUTH */
+ return false;
+ default:
+ off += ipv6_optlen(hp);
+ break;
+ }
+
+ nexthdr = hp->nexthdr;
+ }
+ }
+
+ iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+ skb_postpush_rcsum(skb, iph, srhoff);
+
+ return true;
+}
+
+/* process the packet on the basis of the RFC8986 flavors set for the given
+ * SRv6 End behavior instance.
+ */
+static int end_flv8986_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ enum seg6_local_flv_action action;
+ enum seg6_local_pktinfo pinfo;
+ struct ipv6_sr_hdr *srh;
+ __u32 flvmask;
+ int srhoff;
+
+ srh = seg6_get_srh(skb, 0);
+ srhoff = srh ? ((unsigned char *)srh - skb->data) : 0;
+ pinfo = seg6_get_srh_pktinfo(srh);
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (srh && !seg6_hmac_validate_skb(skb))
+ goto drop;
+#endif
+ flvmask = finfo->flv_ops;
+ if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS)) {
+ pr_warn_once("seg6local: invalid RFC8986 flavors\n");
+ goto drop;
+ }
+
+ /* retrieve the action triggered by the combination of pktinfo data and
+ * the flavors mask.
+ */
+ action = seg6_local_flv8986_act_lookup(pinfo, flvmask);
+ switch (action) {
+ case SEG6_LOCAL_FLV_ACT_END:
+ /* process the packet as the "standard" End behavior */
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+ break;
+ case SEG6_LOCAL_FLV_ACT_PSP:
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ if (unlikely(!seg6_pop_srh(skb, srhoff)))
+ goto drop;
+ break;
+ case SEG6_LOCAL_FLV_ACT_UNSPEC:
+ fallthrough;
+ default:
+ /* by default, we drop the packet since we could not find a
+ * suitable action.
+ */
+ goto drop;
+ }
+
+ return input_action_end_finish(skb, slwt);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
/* regular endpoint function */
static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ __u32 fops = finfo->flv_ops;
- if (seg6_next_csid_enabled(finfo->flv_ops))
+ if (!fops)
+ return input_action_end_core(skb, slwt);
+
+ /* check for the presence of NEXT-C-SID since it applies first */
+ if (seg6_next_csid_enabled(fops))
return end_next_csid_core(skb, slwt);
- return input_action_end_core(skb, slwt);
+ /* the specific processing function to be performed on the packet
+ * depends on the combination of flavors defined in RFC8986 and some
+ * information extracted from the packet, e.g. presence/absence of SRH,
+ * Segment Left = 0, etc.
+ */
+ return end_flv8986_core(skb, slwt);
}
/* regular endpoint, and forward to specified nexthop */
@@ -2300,6 +2627,13 @@ int __init seg6_local_init(void)
BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS));
BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS));
+ /* To be memory efficient, we use 'u8' to represent the different
+ * actions related to RFC8986 flavors. If the kernel build stops here,
+ * it means that it is not possible to correctly encode these actions
+ * with the data type chosen for the action table.
+ */
+ BUILD_BUG_ON(SEG6_LOCAL_FLV_ACT_MAX > (typeof(flv8986_act_tbl[0]))~0U);
+
return lwtunnel_encap_add_ops(&seg6_local_ops,
LWTUNNEL_ENCAP_SEG6_LOCAL);
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index c9817aa0f413..3ad9c46202fc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2875,7 +2875,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
- sk_refcnt_debug_release(sk);
sock_put(sk);
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8ffb19c643ab..d4e76e2ae153 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1335,8 +1335,6 @@ static void packet_sock_destruct(struct sock *sk)
pr_err("Attempt to release alive packet socket: %p\n", sk);
return;
}
-
- sk_refcnt_debug_dec(sk);
}
static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
@@ -3174,7 +3172,6 @@ static int packet_release(struct socket *sock)
skb_queue_purge(&sk->sk_receive_queue);
packet_free_pending(po);
- sk_refcnt_debug_release(sk);
sock_put(sk);
return 0;
@@ -3364,7 +3361,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
packet_cached_dev_reset(po);
sk->sk_destruct = packet_sock_destruct;
- sk_refcnt_debug_inc(sk);
/*
* Attach a protocol block
diff --git a/net/rds/message.c b/net/rds/message.c
index c19c93561227..7af59d2443e5 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -118,7 +118,7 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
ck = &info->zcookies;
memset(ck, 0, sizeof(*ck));
WARN_ON(!rds_zcookie_add(info, cookie));
- list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
+ list_add_tail(&info->rs_zcookie_next, &q->zcookie_head);
spin_unlock_irqrestore(&q->lock, flags);
/* caller invokes rds_wake_sk_sleep() */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 4f7b52f5a11c..4b95cb1ac435 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -45,23 +45,6 @@ if NET_SCHED
comment "Queueing/Scheduling"
-config NET_SCH_CBQ
- tristate "Class Based Queueing (CBQ)"
- help
- Say Y here if you want to use the Class-Based Queueing (CBQ) packet
- scheduling algorithm. This algorithm classifies the waiting packets
- into a tree-like hierarchy of classes; the leaves of this tree are
- in turn scheduled by separate algorithms.
-
- See the top of <file:net/sched/sch_cbq.c> for more details.
-
- CBQ is a commonly used scheduler, so if you're unsure, you should
- say Y here. Then say Y to all the queueing algorithms below that you
- want to use as leaf disciplines.
-
- To compile this code as a module, choose M here: the
- module will be called sch_cbq.
-
config NET_SCH_HTB
tristate "Hierarchical Token Bucket (HTB)"
help
@@ -85,20 +68,6 @@ config NET_SCH_HFSC
To compile this code as a module, choose M here: the
module will be called sch_hfsc.
-config NET_SCH_ATM
- tristate "ATM Virtual Circuits (ATM)"
- depends on ATM
- help
- Say Y here if you want to use the ATM pseudo-scheduler. This
- provides a framework for invoking classifiers, which in turn
- select classes of this queuing discipline. Each class maps
- the flow(s) it is handling to a given virtual circuit.
-
- See the top of <file:net/sched/sch_atm.c> for more details.
-
- To compile this code as a module, choose M here: the
- module will be called sch_atm.
-
config NET_SCH_PRIO
tristate "Multi Band Priority Queueing (PRIO)"
help
@@ -223,17 +192,6 @@ config NET_SCH_GRED
To compile this code as a module, choose M here: the
module will be called sch_gred.
-config NET_SCH_DSMARK
- tristate "Differentiated Services marker (DSMARK)"
- help
- Say Y if you want to schedule packets according to the
- Differentiated Services architecture proposed in RFC 2475.
- Technical information on this method, with pointers to associated
- RFCs, is available at <http://www.gta.ufrj.br/diffserv/>.
-
- To compile this code as a module, choose M here: the
- module will be called sch_dsmark.
-
config NET_SCH_NETEM
tristate "Network emulator (NETEM)"
help
@@ -510,17 +468,6 @@ config NET_CLS_BASIC
To compile this code as a module, choose M here: the
module will be called cls_basic.
-config NET_CLS_TCINDEX
- tristate "Traffic-Control Index (TCINDEX)"
- select NET_CLS
- help
- Say Y here if you want to be able to classify packets based on
- traffic control indices. You will want this feature if you want
- to implement Differentiated Services together with DSMARK.
-
- To compile this code as a module, choose M here: the
- module will be called cls_tcindex.
-
config NET_CLS_ROUTE4
tristate "Routing decision (ROUTE)"
depends on INET
@@ -566,34 +513,6 @@ config CLS_U32_MARK
help
Say Y here to be able to use netfilter marks as u32 key.
-config NET_CLS_RSVP
- tristate "IPv4 Resource Reservation Protocol (RSVP)"
- select NET_CLS
- help
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp.
-
-config NET_CLS_RSVP6
- tristate "IPv6 Resource Reservation Protocol (RSVP6)"
- select NET_CLS
- help
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests and you are using the IPv6 protocol.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp6.
-
config NET_CLS_FLOW
tristate "Flow classifier"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7911eec09837..b5fd49641d91 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -33,20 +33,17 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
obj-$(CONFIG_NET_ACT_CT) += act_ct.o
obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
-obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
-obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
-obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
@@ -70,9 +67,6 @@ obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
-obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o
-obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
-obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cd09ef49df22..eda58b78da13 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -169,11 +169,6 @@ static bool tc_act_skip_sw(u32 flags)
return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false;
}
-static bool tc_act_in_hw(struct tc_action *act)
-{
- return !!act->in_hw_count;
-}
-
/* SKIP_HW and SKIP_SW are mutually exclusive flags. */
static bool tc_act_flags_valid(u32 flags)
{
@@ -192,6 +187,7 @@ static int offload_action_init(struct flow_offload_action *fl_action,
fl_action->extack = extack;
fl_action->command = cmd;
fl_action->index = act->tcfa_index;
+ fl_action->cookie = (unsigned long)act;
if (act->ops->offload_act_setup) {
spin_lock_bh(&act->tcfa_lock);
@@ -307,9 +303,6 @@ int tcf_action_update_hw_stats(struct tc_action *action)
struct flow_offload_action fl_act = {};
int err;
- if (!tc_act_in_hw(action))
- return -EOPNOTSUPP;
-
err = offload_action_init(&fl_act, action, FLOW_ACT_STATS, NULL);
if (err)
return err;
@@ -539,6 +532,8 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
(unsigned long)p->tcfa_tm.lastuse))
continue;
+ tcf_action_update_hw_stats(p);
+
nest = nla_nest_start_noflag(skb, n_i);
if (!nest) {
index--;
@@ -1539,9 +1534,6 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
if (p == NULL)
goto errout;
- /* update hw stats for this action */
- tcf_action_update_hw_stats(p);
-
/* compat_mode being true specifies a call that is supposed
* to add additional backward compatibility statistic TLVs.
*/
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 7e63ff7e3ed7..8dabfb52ea3d 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -36,13 +36,15 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = to_connmark(a);
+ struct tcf_connmark_parms *parms;
struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;
- spin_lock(&ca->tcf_lock);
tcf_lastuse_update(&ca->tcf_tm);
- bstats_update(&ca->tcf_bstats, skb);
+ tcf_action_update_bstats(&ca->common, skb);
+
+ parms = rcu_dereference_bh(ca->parms);
switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
@@ -64,31 +66,29 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
c = nf_ct_get(skb, &ctinfo);
if (c) {
skb->mark = READ_ONCE(c->mark);
- /* using overlimits stats to count how many packets marked */
- ca->tcf_qstats.overlimits++;
- goto out;
+ goto count;
}
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- proto, ca->net, &tuple))
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, parms->net,
+ &tuple))
goto out;
- zone.id = ca->zone;
+ zone.id = parms->zone;
zone.dir = NF_CT_DEFAULT_ZONE_DIR;
- thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
+ thash = nf_conntrack_find_get(parms->net, &zone, &tuple);
if (!thash)
goto out;
c = nf_ct_tuplehash_to_ctrack(thash);
- /* using overlimits stats to count how many packets marked */
- ca->tcf_qstats.overlimits++;
skb->mark = READ_ONCE(c->mark);
nf_ct_put(c);
+count:
+ /* using overlimits stats to count how many packets marked */
+ tcf_action_inc_overlimit_qstats(&ca->common);
out:
- spin_unlock(&ca->tcf_lock);
- return ca->tcf_action;
+ return READ_ONCE(ca->tcf_action);
}
static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
@@ -101,6 +101,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id);
+ struct tcf_connmark_parms *nparms, *oparms;
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_chain *goto_ch = NULL;
@@ -120,52 +121,66 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_CONNMARK_PARMS])
return -EINVAL;
+ nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
+ if (!nparms)
+ return -ENOMEM;
+
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
index = parm->index;
ret = tcf_idr_check_alloc(tn, &index, a, bind);
if (!ret) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_connmark_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_connmark_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
- return ret;
+ err = ret;
+ goto out_free;
}
ci = to_connmark(*a);
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
- extack);
- if (err < 0)
- goto release_idr;
- tcf_action_set_ctrlact(*a, parm->action, goto_ch);
- ci->net = net;
- ci->zone = parm->zone;
+
+ nparms->net = net;
+ nparms->zone = parm->zone;
ret = ACT_P_CREATED;
} else if (ret > 0) {
ci = to_connmark(*a);
- if (bind)
- return 0;
- if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
+ if (bind) {
+ err = 0;
+ goto out_free;
}
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
- extack);
- if (err < 0)
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+ err = -EEXIST;
goto release_idr;
- /* replacing action and zone */
- spin_lock_bh(&ci->tcf_lock);
- goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
- ci->zone = parm->zone;
- spin_unlock_bh(&ci->tcf_lock);
- if (goto_ch)
- tcf_chain_put_by_act(goto_ch);
+ }
+
+ nparms->net = rtnl_dereference(ci->parms)->net;
+ nparms->zone = parm->zone;
+
ret = 0;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ spin_lock_bh(&ci->tcf_lock);
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ oparms = rcu_replace_pointer(ci->parms, nparms, lockdep_is_held(&ci->tcf_lock));
+ spin_unlock_bh(&ci->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+ if (oparms)
+ kfree_rcu(oparms, rcu);
+
return ret;
+
release_idr:
tcf_idr_release(*a, bind);
+out_free:
+ kfree(nparms);
return err;
}
@@ -179,11 +194,14 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
.refcnt = refcount_read(&ci->tcf_refcnt) - ref,
.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
};
+ struct tcf_connmark_parms *parms;
struct tcf_t t;
spin_lock_bh(&ci->tcf_lock);
+ parms = rcu_dereference_protected(ci->parms, lockdep_is_held(&ci->tcf_lock));
+
opt.action = ci->tcf_action;
- opt.zone = ci->zone;
+ opt.zone = parms->zone;
if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -201,6 +219,16 @@ nla_put_failure:
return -1;
}
+static void tcf_connmark_cleanup(struct tc_action *a)
+{
+ struct tcf_connmark_info *ci = to_connmark(a);
+ struct tcf_connmark_parms *parms;
+
+ parms = rcu_dereference_protected(ci->parms, 1);
+ if (parms)
+ kfree_rcu(parms, rcu);
+}
+
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.id = TCA_ID_CONNMARK,
@@ -208,6 +236,7 @@ static struct tc_action_ops act_connmark_ops = {
.act = tcf_connmark_act,
.dump = tcf_connmark_dump,
.init = tcf_connmark_init,
+ .cleanup = tcf_connmark_cleanup,
.size = sizeof(struct tcf_connmark_info),
};
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 9b8def0be41e..c9a811f4c7ee 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -119,35 +119,37 @@ TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_gate *gact = to_gate(a);
-
- spin_lock(&gact->tcf_lock);
+ int action = READ_ONCE(gact->tcf_action);
tcf_lastuse_update(&gact->tcf_tm);
- bstats_update(&gact->tcf_bstats, skb);
+ tcf_action_update_bstats(&gact->common, skb);
+ spin_lock(&gact->tcf_lock);
if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
spin_unlock(&gact->tcf_lock);
- return gact->tcf_action;
+ return action;
}
- if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
+ if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN)) {
+ spin_unlock(&gact->tcf_lock);
goto drop;
+ }
if (gact->current_max_octets >= 0) {
gact->current_entry_octets += qdisc_pkt_len(skb);
if (gact->current_entry_octets > gact->current_max_octets) {
- gact->tcf_qstats.overlimits++;
- goto drop;
+ spin_unlock(&gact->tcf_lock);
+ goto overlimit;
}
}
-
spin_unlock(&gact->tcf_lock);
- return gact->tcf_action;
-drop:
- gact->tcf_qstats.drops++;
- spin_unlock(&gact->tcf_lock);
+ return action;
+overlimit:
+ tcf_action_inc_overlimit_qstats(&gact->common);
+drop:
+ tcf_action_inc_drop_qstats(&gact->common);
return TC_ACT_SHOT;
}
@@ -357,8 +359,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
return 0;
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_gate_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_gate_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 74c74be33048..4184af5abbf3 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -38,6 +38,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
{
struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
+ struct tcf_nat_parms *nparm, *oparm;
struct nlattr *tb[TCA_NAT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
@@ -59,8 +60,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_nat_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a, &act_nat_ops,
+ bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -79,19 +80,31 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
+
+ nparm = kzalloc(sizeof(*nparm), GFP_KERNEL);
+ if (!nparm) {
+ err = -ENOMEM;
+ goto release_idr;
+ }
+
+ nparm->old_addr = parm->old_addr;
+ nparm->new_addr = parm->new_addr;
+ nparm->mask = parm->mask;
+ nparm->flags = parm->flags;
+
p = to_tcf_nat(*a);
spin_lock_bh(&p->tcf_lock);
- p->old_addr = parm->old_addr;
- p->new_addr = parm->new_addr;
- p->mask = parm->mask;
- p->flags = parm->flags;
-
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ oparm = rcu_replace_pointer(p->parms, nparm, lockdep_is_held(&p->tcf_lock));
spin_unlock_bh(&p->tcf_lock);
+
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+ if (oparm)
+ kfree_rcu(oparm, rcu);
+
return ret;
release_idr:
tcf_idr_release(*a, bind);
@@ -103,6 +116,7 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_nat *p = to_tcf_nat(a);
+ struct tcf_nat_parms *parms;
struct iphdr *iph;
__be32 old_addr;
__be32 new_addr;
@@ -113,18 +127,16 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
int ihl;
int noff;
- spin_lock(&p->tcf_lock);
-
tcf_lastuse_update(&p->tcf_tm);
- old_addr = p->old_addr;
- new_addr = p->new_addr;
- mask = p->mask;
- egress = p->flags & TCA_NAT_FLAG_EGRESS;
- action = p->tcf_action;
+ tcf_action_update_bstats(&p->common, skb);
- bstats_update(&p->tcf_bstats, skb);
+ action = READ_ONCE(p->tcf_action);
- spin_unlock(&p->tcf_lock);
+ parms = rcu_dereference_bh(p->parms);
+ old_addr = parms->old_addr;
+ new_addr = parms->new_addr;
+ mask = parms->mask;
+ egress = parms->flags & TCA_NAT_FLAG_EGRESS;
if (unlikely(action == TC_ACT_SHOT))
goto drop;
@@ -248,9 +260,7 @@ out:
return action;
drop:
- spin_lock(&p->tcf_lock);
- p->tcf_qstats.drops++;
- spin_unlock(&p->tcf_lock);
+ tcf_action_inc_drop_qstats(&p->common);
return TC_ACT_SHOT;
}
@@ -264,15 +274,20 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
};
+ struct tcf_nat_parms *parms;
struct tcf_t t;
spin_lock_bh(&p->tcf_lock);
- opt.old_addr = p->old_addr;
- opt.new_addr = p->new_addr;
- opt.mask = p->mask;
- opt.flags = p->flags;
+
opt.action = p->tcf_action;
+ parms = rcu_dereference_protected(p->parms, lockdep_is_held(&p->tcf_lock));
+
+ opt.old_addr = parms->old_addr;
+ opt.new_addr = parms->new_addr;
+ opt.mask = parms->mask;
+ opt.flags = parms->flags;
+
if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -289,6 +304,16 @@ nla_put_failure:
return -1;
}
+static void tcf_nat_cleanup(struct tc_action *a)
+{
+ struct tcf_nat *p = to_tcf_nat(a);
+ struct tcf_nat_parms *parms;
+
+ parms = rcu_dereference_protected(p->parms, 1);
+ if (parms)
+ kfree_rcu(parms, rcu);
+}
+
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
.id = TCA_ID_NAT,
@@ -296,6 +321,7 @@ static struct tc_action_ops act_nat_ops = {
.act = tcf_nat_act,
.dump = tcf_nat_dump,
.init = tcf_nat_init,
+ .cleanup = tcf_nat_cleanup,
.size = sizeof(struct tcf_nat),
};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index c42fcc47dd6d..77d288d384ae 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -443,9 +443,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
goto done;
bad:
- spin_lock(&p->tcf_lock);
- p->tcf_qstats.overlimits++;
- spin_unlock(&p->tcf_lock);
+ tcf_action_inc_overlimit_qstats(&p->common);
done:
return p->tcf_action;
}
@@ -545,7 +543,28 @@ static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data,
}
*index_inc = k;
} else {
- return -EOPNOTSUPP;
+ struct flow_offload_action *fl_action = entry_data;
+ u32 cmd = tcf_pedit_cmd(act, 0);
+ int k;
+
+ switch (cmd) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ fl_action->id = FLOW_ACTION_MANGLE;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ fl_action->id = FLOW_ACTION_ADD;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload");
+ return -EOPNOTSUPP;
+ }
+
+ for (k = 1; k < tcf_pedit_nkeys(act); k++) {
+ if (cmd != tcf_pedit_cmd(act, k)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload");
+ return -EOPNOTSUPP;
+ }
+ }
}
return 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5b4a95e8a1ee..bfabc9c95fa9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3577,6 +3577,7 @@ int tc_setup_action(struct flow_action *flow_action,
for (k = 0; k < index ; k++) {
entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
entry[k].hw_index = act->tcfa_index;
+ entry[k].act_cookie = (unsigned long)act;
}
j += index;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 0b15698b3531..885c95191ccf 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -502,12 +502,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false,
rtnl_held);
- tcf_exts_hw_stats_update(&f->exts, cls_flower.stats.bytes,
- cls_flower.stats.pkts,
- cls_flower.stats.drops,
- cls_flower.stats.lastused,
- cls_flower.stats.used_hw_stats,
- cls_flower.stats.used_hw_stats_valid);
+ tcf_exts_hw_stats_update(&f->exts, &cls_flower.stats, cls_flower.use_act_stats);
}
static void __fl_put(struct cls_fl_filter *f)
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 705f63da2c21..fa3bbd187eb9 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -331,11 +331,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp,
tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
- tcf_exts_hw_stats_update(&head->exts, cls_mall.stats.bytes,
- cls_mall.stats.pkts, cls_mall.stats.drops,
- cls_mall.stats.lastused,
- cls_mall.stats.used_hw_stats,
- cls_mall.stats.used_hw_stats_valid);
+ tcf_exts_hw_stats_update(&head->exts, &cls_mall.stats, cls_mall.use_act_stats);
}
static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
deleted file mode 100644
index 03d8619bd9c6..000000000000
--- a/net/sched/cls_rsvp.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-#include <net/tc_wrapper.h>
-
-#define RSVP_DST_LEN 1
-#define RSVP_ID "rsvp"
-#define RSVP_OPS cls_rsvp_ops
-#define RSVP_CLS rsvp_classify
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
deleted file mode 100644
index 869efba9f834..000000000000
--- a/net/sched/cls_rsvp.h
+++ /dev/null
@@ -1,764 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-/*
- Comparing to general packet classification problem,
- RSVP needs only several relatively simple rules:
-
- * (dst, protocol) are always specified,
- so that we are able to hash them.
- * src may be exact, or may be wildcard, so that
- we can keep a hash table plus one wildcard entry.
- * source port (or flow label) is important only if src is given.
-
- IMPLEMENTATION.
-
- We use a two level hash table: The top level is keyed by
- destination address and protocol ID, every bucket contains a list
- of "rsvp sessions", identified by destination address, protocol and
- DPI(="Destination Port ID"): triple (key, mask, offset).
-
- Every bucket has a smaller hash table keyed by source address
- (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
- Every bucket is again a list of "RSVP flows", selected by
- source address and SPI(="Source Port ID" here rather than
- "security parameter index"): triple (key, mask, offset).
-
-
- NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
- and all fragmented packets go to the best-effort traffic class.
-
-
- NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
- only one "Generalized Port Identifier". So that for classic
- ah, esp (and udp,tcp) both *pi should coincide or one of them
- should be wildcard.
-
- At first sight, this redundancy is just a waste of CPU
- resources. But DPI and SPI add the possibility to assign different
- priorities to GPIs. Look also at note 4 about tunnels below.
-
-
- NOTE 3. One complication is the case of tunneled packets.
- We implement it as following: if the first lookup
- matches a special session with "tunnelhdr" value not zero,
- flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
- In this case, we pull tunnelhdr bytes and restart lookup
- with tunnel ID added to the list of keys. Simple and stupid 8)8)
- It's enough for PIMREG and IPIP.
-
-
- NOTE 4. Two GPIs make it possible to parse even GRE packets.
- F.e. DPI can select ETH_P_IP (and necessary flags to make
- tunnelhdr correct) in GRE protocol field and SPI matches
- GRE key. Is it not nice? 8)8)
-
-
- Well, as result, despite its simplicity, we get a pretty
- powerful classification engine. */
-
-
-struct rsvp_head {
- u32 tmap[256/32];
- u32 hgenerator;
- u8 tgenerator;
- struct rsvp_session __rcu *ht[256];
- struct rcu_head rcu;
-};
-
-struct rsvp_session {
- struct rsvp_session __rcu *next;
- __be32 dst[RSVP_DST_LEN];
- struct tc_rsvp_gpi dpi;
- u8 protocol;
- u8 tunnelid;
- /* 16 (src,sport) hash slots, and one wildcard source slot */
- struct rsvp_filter __rcu *ht[16 + 1];
- struct rcu_head rcu;
-};
-
-
-struct rsvp_filter {
- struct rsvp_filter __rcu *next;
- __be32 src[RSVP_DST_LEN];
- struct tc_rsvp_gpi spi;
- u8 tunnelhdr;
-
- struct tcf_result res;
- struct tcf_exts exts;
-
- u32 handle;
- struct rsvp_session *sess;
- struct rcu_work rwork;
-};
-
-static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
-{
- unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
-
- h ^= h>>16;
- h ^= h>>8;
- return (h ^ protocol ^ tunnelid) & 0xFF;
-}
-
-static inline unsigned int hash_src(__be32 *src)
-{
- unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
-
- h ^= h>>16;
- h ^= h>>8;
- h ^= h>>4;
- return h & 0xF;
-}
-
-#define RSVP_APPLY_RESULT() \
-{ \
- int r = tcf_exts_exec(skb, &f->exts, res); \
- if (r < 0) \
- continue; \
- else if (r > 0) \
- return r; \
-}
-
-TC_INDIRECT_SCOPE int RSVP_CLS(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- struct rsvp_head *head = rcu_dereference_bh(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1, h2;
- __be32 *dst, *src;
- u8 protocol;
- u8 tunnelid = 0;
- u8 *xprt;
-#if RSVP_DST_LEN == 4
- struct ipv6hdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ipv6_hdr(skb);
-#else
- struct iphdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ip_hdr(skb);
-#endif
-restart:
-
-#if RSVP_DST_LEN == 4
- src = &nhptr->saddr.s6_addr32[0];
- dst = &nhptr->daddr.s6_addr32[0];
- protocol = nhptr->nexthdr;
- xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
-#else
- src = &nhptr->saddr;
- dst = &nhptr->daddr;
- protocol = nhptr->protocol;
- xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
- if (ip_is_fragment(nhptr))
- return -1;
-#endif
-
- h1 = hash_dst(dst, protocol, tunnelid);
- h2 = hash_src(src);
-
- for (s = rcu_dereference_bh(head->ht[h1]); s;
- s = rcu_dereference_bh(s->next)) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
- protocol == s->protocol &&
- !(s->dpi.mask &
- (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- tunnelid == s->tunnelid) {
-
- for (f = rcu_dereference_bh(s->ht[h2]); f;
- f = rcu_dereference_bh(f->next)) {
- if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
- !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
-#if RSVP_DST_LEN == 4
- &&
- src[0] == f->src[0] &&
- src[1] == f->src[1] &&
- src[2] == f->src[2]
-#endif
- ) {
- *res = f->res;
- RSVP_APPLY_RESULT();
-
-matched:
- if (f->tunnelhdr == 0)
- return 0;
-
- tunnelid = f->res.classid;
- nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
- goto restart;
- }
- }
-
- /* And wildcard bucket... */
- for (f = rcu_dereference_bh(s->ht[16]); f;
- f = rcu_dereference_bh(f->next)) {
- *res = f->res;
- RSVP_APPLY_RESULT();
- goto matched;
- }
- return -1;
- }
- }
- return -1;
-}
-
-static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter __rcu **ins;
- struct rsvp_filter *pins;
- unsigned int h1 = h & 0xFF;
- unsigned int h2 = (h >> 8) & 0xFF;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
- ins = &pins->next, pins = rtnl_dereference(*ins)) {
- if (pins->handle == h) {
- RCU_INIT_POINTER(n->next, pins->next);
- rcu_assign_pointer(*ins, n);
- return;
- }
- }
- }
-
- /* Something went wrong if we are trying to replace a non-existent
- * node. Mind as well halt instead of silently failing.
- */
- BUG_ON(1);
-}
-
-static void *rsvp_get(struct tcf_proto *tp, u32 handle)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1 = handle & 0xFF;
- unsigned int h2 = (handle >> 8) & 0xFF;
-
- if (h2 > 16)
- return NULL;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->handle == handle)
- return f;
- }
- }
- return NULL;
-}
-
-static int rsvp_init(struct tcf_proto *tp)
-{
- struct rsvp_head *data;
-
- data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
- if (data) {
- rcu_assign_pointer(tp->root, data);
- return 0;
- }
- return -ENOBUFS;
-}
-
-static void __rsvp_delete_filter(struct rsvp_filter *f)
-{
- tcf_exts_destroy(&f->exts);
- tcf_exts_put_net(&f->exts);
- kfree(f);
-}
-
-static void rsvp_delete_filter_work(struct work_struct *work)
-{
- struct rsvp_filter *f = container_of(to_rcu_work(work),
- struct rsvp_filter,
- rwork);
- rtnl_lock();
- __rsvp_delete_filter(f);
- rtnl_unlock();
-}
-
-static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
-{
- tcf_unbind_filter(tp, &f->res);
- /* all classifiers are required to call tcf_exts_destroy() after rcu
- * grace period, since converted-to-rcu actions are relying on that
- * in cleanup() callback
- */
- if (tcf_exts_get_net(&f->exts))
- tcf_queue_work(&f->rwork, rsvp_delete_filter_work);
- else
- __rsvp_delete_filter(f);
-}
-
-static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
- struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int h1, h2;
-
- if (data == NULL)
- return;
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
-
- while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
- RCU_INIT_POINTER(data->ht[h1], s->next);
-
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
- rcu_assign_pointer(s->ht[h2], f->next);
- rsvp_delete_filter(tp, f);
- }
- }
- kfree_rcu(s, rcu);
- }
- }
- kfree_rcu(data, rcu);
-}
-
-static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
- bool rtnl_held, struct netlink_ext_ack *extack)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_filter *nfp, *f = arg;
- struct rsvp_filter __rcu **fp;
- unsigned int h = f->handle;
- struct rsvp_session __rcu **sp;
- struct rsvp_session *nsp, *s = f->sess;
- int i, h1;
-
- fp = &s->ht[(h >> 8) & 0xFF];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- if (nfp == f) {
- RCU_INIT_POINTER(*fp, f->next);
- rsvp_delete_filter(tp, f);
-
- /* Strip tree */
-
- for (i = 0; i <= 16; i++)
- if (s->ht[i])
- goto out;
-
- /* OK, session has no flows */
- sp = &head->ht[h & 0xFF];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if (nsp == s) {
- RCU_INIT_POINTER(*sp, s->next);
- kfree_rcu(s, rcu);
- goto out;
- }
- }
-
- break;
- }
- }
-
-out:
- *last = true;
- for (h1 = 0; h1 < 256; h1++) {
- if (rcu_access_pointer(head->ht[h1])) {
- *last = false;
- break;
- }
- }
-
- return 0;
-}
-
-static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int i = 0xFFFF;
-
- while (i-- > 0) {
- u32 h;
-
- if ((data->hgenerator += 0x10000) == 0)
- data->hgenerator = 0x10000;
- h = data->hgenerator|salt;
- if (!rsvp_get(tp, h))
- return h;
- }
- return 0;
-}
-
-static int tunnel_bts(struct rsvp_head *data)
-{
- int n = data->tgenerator >> 5;
- u32 b = 1 << (data->tgenerator & 0x1F);
-
- if (data->tmap[n] & b)
- return 0;
- data->tmap[n] |= b;
- return 1;
-}
-
-static void tunnel_recycle(struct rsvp_head *data)
-{
- struct rsvp_session __rcu **sht = data->ht;
- u32 tmap[256/32];
- int h1, h2;
-
- memset(tmap, 0, sizeof(tmap));
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
- for (s = rtnl_dereference(sht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->tunnelhdr == 0)
- continue;
- data->tgenerator = f->res.classid;
- tunnel_bts(data);
- }
- }
- }
- }
-
- memcpy(data->tmap, tmap, sizeof(tmap));
-}
-
-static u32 gen_tunnel(struct rsvp_head *data)
-{
- int i, k;
-
- for (k = 0; k < 2; k++) {
- for (i = 255; i > 0; i--) {
- if (++data->tgenerator == 0)
- data->tgenerator = 1;
- if (tunnel_bts(data))
- return data->tgenerator;
- }
- tunnel_recycle(data);
- }
- return 0;
-}
-
-static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
- [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
-};
-
-static int rsvp_change(struct net *net, struct sk_buff *in_skb,
- struct tcf_proto *tp, unsigned long base,
- u32 handle, struct nlattr **tca,
- void **arg, u32 flags,
- struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- struct rsvp_filter *f, *nfp;
- struct rsvp_filter __rcu **fp;
- struct rsvp_session *nsp, *s;
- struct rsvp_session __rcu **sp;
- struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_RSVP_MAX + 1];
- struct tcf_exts e;
- unsigned int h1, h2;
- __be32 *dst;
- int err;
-
- if (opt == NULL)
- return handle ? -EINVAL : 0;
-
- err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy,
- NULL);
- if (err < 0)
- return err;
-
- err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags,
- extack);
- if (err < 0)
- goto errout2;
-
- f = *arg;
- if (f) {
- /* Node exists: adjust only classid */
- struct rsvp_filter *n;
-
- if (f->handle != handle && handle)
- goto errout2;
-
- n = kmemdup(f, sizeof(*f), GFP_KERNEL);
- if (!n) {
- err = -ENOMEM;
- goto errout2;
- }
-
- err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT,
- TCA_RSVP_POLICE);
- if (err < 0) {
- kfree(n);
- goto errout2;
- }
-
- if (tb[TCA_RSVP_CLASSID]) {
- n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- tcf_bind_filter(tp, &n->res, base);
- }
-
- tcf_exts_change(&n->exts, &e);
- rsvp_replace(tp, n, handle);
- return 0;
- }
-
- /* Now more serious part... */
- err = -EINVAL;
- if (handle)
- goto errout2;
- if (tb[TCA_RSVP_DST] == NULL)
- goto errout2;
-
- err = -ENOBUFS;
- f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
- if (f == NULL)
- goto errout2;
-
- err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- goto errout;
- h2 = 16;
- if (tb[TCA_RSVP_SRC]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
- h2 = hash_src(f->src);
- }
- if (tb[TCA_RSVP_PINFO]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO]);
- f->spi = pinfo->spi;
- f->tunnelhdr = pinfo->tunnelhdr;
- }
- if (tb[TCA_RSVP_CLASSID])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
-
- dst = nla_data(tb[TCA_RSVP_DST]);
- h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
-
- err = -ENOMEM;
- if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
- goto errout;
-
- if (f->tunnelhdr) {
- err = -EINVAL;
- if (f->res.classid > 255)
- goto errout;
-
- err = -ENOMEM;
- if (f->res.classid == 0 &&
- (f->res.classid = gen_tunnel(data)) == 0)
- goto errout;
- }
-
- for (sp = &data->ht[h1];
- (s = rtnl_dereference(*sp)) != NULL;
- sp = &s->next) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
- pinfo && pinfo->protocol == s->protocol &&
- memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- pinfo->tunnelid == s->tunnelid) {
-
-insert:
- /* OK, we found appropriate session */
-
- fp = &s->ht[h2];
-
- f->sess = s;
- if (f->tunnelhdr == 0)
- tcf_bind_filter(tp, &f->res, base);
-
- tcf_exts_change(&f->exts, &e);
-
- fp = &s->ht[h2];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- __u32 mask = nfp->spi.mask & f->spi.mask;
-
- if (mask != f->spi.mask)
- break;
- }
- RCU_INIT_POINTER(f->next, nfp);
- rcu_assign_pointer(*fp, f);
-
- *arg = f;
- return 0;
- }
- }
-
- /* No session found. Create new one. */
-
- err = -ENOBUFS;
- s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
- if (s == NULL)
- goto errout;
- memcpy(s->dst, dst, sizeof(s->dst));
-
- if (pinfo) {
- s->dpi = pinfo->dpi;
- s->protocol = pinfo->protocol;
- s->tunnelid = pinfo->tunnelid;
- }
- sp = &data->ht[h1];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
- break;
- }
- RCU_INIT_POINTER(s->next, nsp);
- rcu_assign_pointer(*sp, s);
-
- goto insert;
-
-errout:
- tcf_exts_destroy(&f->exts);
- kfree(f);
-errout2:
- tcf_exts_destroy(&e);
- return err;
-}
-
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
- bool rtnl_held)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- unsigned int h, h1;
-
- if (arg->stop)
- return;
-
- for (h = 0; h < 256; h++) {
- struct rsvp_session *s;
-
- for (s = rtnl_dereference(head->ht[h]); s;
- s = rtnl_dereference(s->next)) {
- for (h1 = 0; h1 <= 16; h1++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h1]); f;
- f = rtnl_dereference(f->next)) {
- if (!tc_cls_stats_dump(tp, arg, f))
- return;
- }
- }
- }
- }
-}
-
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
-{
- struct rsvp_filter *f = fh;
- struct rsvp_session *s;
- struct nlattr *nest;
- struct tc_rsvp_pinfo pinfo;
-
- if (f == NULL)
- return skb->len;
- s = f->sess;
-
- t->tcm_handle = f->handle;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
- goto nla_put_failure;
- pinfo.dpi = s->dpi;
- pinfo.spi = f->spi;
- pinfo.protocol = s->protocol;
- pinfo.tunnelid = s->tunnelid;
- pinfo.tunnelhdr = f->tunnelhdr;
- pinfo.pad = 0;
- if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
- goto nla_put_failure;
- if (f->res.classid &&
- nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
- goto nla_put_failure;
- if (((f->handle >> 8) & 0xFF) != 16 &&
- nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
- goto nla_put_failure;
-
- if (tcf_exts_dump(skb, &f->exts) < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, nest);
-
- if (tcf_exts_dump_stats(skb, &f->exts) < 0)
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
- unsigned long base)
-{
- struct rsvp_filter *f = fh;
-
- tc_cls_bind_class(classid, cl, q, &f->res, base);
-}
-
-static struct tcf_proto_ops RSVP_OPS __read_mostly = {
- .kind = RSVP_ID,
- .classify = RSVP_CLS,
- .init = rsvp_init,
- .destroy = rsvp_destroy,
- .get = rsvp_get,
- .change = rsvp_change,
- .delete = rsvp_delete,
- .walk = rsvp_walk,
- .dump = rsvp_dump,
- .bind_class = rsvp_bind_class,
- .owner = THIS_MODULE,
-};
-
-static int __init init_rsvp(void)
-{
- return register_tcf_proto_ops(&RSVP_OPS);
-}
-
-static void __exit exit_rsvp(void)
-{
- unregister_tcf_proto_ops(&RSVP_OPS);
-}
-
-module_init(init_rsvp)
-module_exit(exit_rsvp)
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
deleted file mode 100644
index e627cc32d633..000000000000
--- a/net/sched/cls_rsvp6.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ipv6.h>
-#include <linux/skbuff.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-#include <net/netlink.h>
-#include <net/tc_wrapper.h>
-
-#define RSVP_DST_LEN 4
-#define RSVP_ID "rsvp6"
-#define RSVP_OPS cls_rsvp6_ops
-#define RSVP_CLS rsvp6_classify
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
deleted file mode 100644
index ee2a050c887b..000000000000
--- a/net/sched/cls_tcindex.c
+++ /dev/null
@@ -1,716 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * net/sched/cls_tcindex.c Packet classifier for skb->tc_index
- *
- * Written 1998,1999 by Werner Almesberger, EPFL ICA
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/refcount.h>
-#include <net/act_api.h>
-#include <net/netlink.h>
-#include <net/pkt_cls.h>
-#include <net/sch_generic.h>
-#include <net/tc_wrapper.h>
-
-/*
- * Passing parameters to the root seems to be done more awkwardly than really
- * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
- * verified. FIXME.
- */
-
-#define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */
-#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
-
-
-struct tcindex_data;
-
-struct tcindex_filter_result {
- struct tcf_exts exts;
- struct tcf_result res;
- struct tcindex_data *p;
- struct rcu_work rwork;
-};
-
-struct tcindex_filter {
- u16 key;
- struct tcindex_filter_result result;
- struct tcindex_filter __rcu *next;
- struct rcu_work rwork;
-};
-
-
-struct tcindex_data {
- struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
- struct tcindex_filter __rcu **h; /* imperfect hash; */
- struct tcf_proto *tp;
- u16 mask; /* AND key with mask */
- u32 shift; /* shift ANDed key to the right */
- u32 hash; /* hash table size; 0 if undefined */
- u32 alloc_hash; /* allocated size */
- u32 fall_through; /* 0: only classify if explicit match */
- refcount_t refcnt; /* a temporary refcnt for perfect hash */
- struct rcu_work rwork;
-};
-
-static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
-{
- return tcf_exts_has_actions(&r->exts) || r->res.classid;
-}
-
-static void tcindex_data_get(struct tcindex_data *p)
-{
- refcount_inc(&p->refcnt);
-}
-
-static void tcindex_data_put(struct tcindex_data *p)
-{
- if (refcount_dec_and_test(&p->refcnt)) {
- kfree(p->perfect);
- kfree(p->h);
- kfree(p);
- }
-}
-
-static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
- u16 key)
-{
- if (p->perfect) {
- struct tcindex_filter_result *f = p->perfect + key;
-
- return tcindex_filter_is_set(f) ? f : NULL;
- } else if (p->h) {
- struct tcindex_filter __rcu **fp;
- struct tcindex_filter *f;
-
- fp = &p->h[key % p->hash];
- for (f = rcu_dereference_bh_rtnl(*fp);
- f;
- fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
- if (f->key == key)
- return &f->result;
- }
-
- return NULL;
-}
-
-TC_INDIRECT_SCOPE int tcindex_classify(struct sk_buff *skb,
- const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- struct tcindex_data *p = rcu_dereference_bh(tp->root);
- struct tcindex_filter_result *f;
- int key = (skb->tc_index & p->mask) >> p->shift;
-
- pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
- skb, tp, res, p);
-
- f = tcindex_lookup(p, key);
- if (!f) {
- struct Qdisc *q = tcf_block_q(tp->chain->block);
-
- if (!p->fall_through)
- return -1;
- res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
- res->class = 0;
- pr_debug("alg 0x%x\n", res->classid);
- return 0;
- }
- *res = f->res;
- pr_debug("map 0x%x\n", res->classid);
-
- return tcf_exts_exec(skb, &f->exts, res);
-}
-
-
-static void *tcindex_get(struct tcf_proto *tp, u32 handle)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r;
-
- pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
- if (p->perfect && handle >= p->alloc_hash)
- return NULL;
- r = tcindex_lookup(p, handle);
- return r && tcindex_filter_is_set(r) ? r : NULL;
-}
-
-static int tcindex_init(struct tcf_proto *tp)
-{
- struct tcindex_data *p;
-
- pr_debug("tcindex_init(tp %p)\n", tp);
- p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- p->mask = 0xffff;
- p->hash = DEFAULT_HASH_SIZE;
- p->fall_through = 1;
- refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */
-
- rcu_assign_pointer(tp->root, p);
- return 0;
-}
-
-static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
-{
- tcf_exts_destroy(&r->exts);
- tcf_exts_put_net(&r->exts);
- tcindex_data_put(r->p);
-}
-
-static void tcindex_destroy_rexts_work(struct work_struct *work)
-{
- struct tcindex_filter_result *r;
-
- r = container_of(to_rcu_work(work),
- struct tcindex_filter_result,
- rwork);
- rtnl_lock();
- __tcindex_destroy_rexts(r);
- rtnl_unlock();
-}
-
-static void __tcindex_destroy_fexts(struct tcindex_filter *f)
-{
- tcf_exts_destroy(&f->result.exts);
- tcf_exts_put_net(&f->result.exts);
- kfree(f);
-}
-
-static void tcindex_destroy_fexts_work(struct work_struct *work)
-{
- struct tcindex_filter *f = container_of(to_rcu_work(work),
- struct tcindex_filter,
- rwork);
-
- rtnl_lock();
- __tcindex_destroy_fexts(f);
- rtnl_unlock();
-}
-
-static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
- bool rtnl_held, struct netlink_ext_ack *extack)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = arg;
- struct tcindex_filter __rcu **walk;
- struct tcindex_filter *f = NULL;
-
- pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
- if (p->perfect) {
- if (!r->res.class)
- return -ENOENT;
- } else {
- int i;
-
- for (i = 0; i < p->hash; i++) {
- walk = p->h + i;
- for (f = rtnl_dereference(*walk); f;
- walk = &f->next, f = rtnl_dereference(*walk)) {
- if (&f->result == r)
- goto found;
- }
- }
- return -ENOENT;
-
-found:
- rcu_assign_pointer(*walk, rtnl_dereference(f->next));
- }
- tcf_unbind_filter(tp, &r->res);
- /* all classifiers are required to call tcf_exts_destroy() after rcu
- * grace period, since converted-to-rcu actions are relying on that
- * in cleanup() callback
- */
- if (f) {
- if (tcf_exts_get_net(&f->result.exts))
- tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work);
- else
- __tcindex_destroy_fexts(f);
- } else {
- tcindex_data_get(p);
-
- if (tcf_exts_get_net(&r->exts))
- tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
- else
- __tcindex_destroy_rexts(r);
- }
-
- *last = false;
- return 0;
-}
-
-static void tcindex_destroy_work(struct work_struct *work)
-{
- struct tcindex_data *p = container_of(to_rcu_work(work),
- struct tcindex_data,
- rwork);
-
- tcindex_data_put(p);
-}
-
-static inline int
-valid_perfect_hash(struct tcindex_data *p)
-{
- return p->hash > (p->mask >> p->shift);
-}
-
-static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
- [TCA_TCINDEX_HASH] = { .type = NLA_U32 },
- [TCA_TCINDEX_MASK] = { .type = NLA_U16 },
- [TCA_TCINDEX_SHIFT] = { .type = NLA_U32 },
- [TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 },
- [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
-};
-
-static int tcindex_filter_result_init(struct tcindex_filter_result *r,
- struct tcindex_data *p,
- struct net *net)
-{
- memset(r, 0, sizeof(*r));
- r->p = p;
- return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
- TCA_TCINDEX_POLICE);
-}
-
-static void tcindex_free_perfect_hash(struct tcindex_data *cp);
-
-static void tcindex_partial_destroy_work(struct work_struct *work)
-{
- struct tcindex_data *p = container_of(to_rcu_work(work),
- struct tcindex_data,
- rwork);
-
- rtnl_lock();
- if (p->perfect)
- tcindex_free_perfect_hash(p);
- kfree(p);
- rtnl_unlock();
-}
-
-static void tcindex_free_perfect_hash(struct tcindex_data *cp)
-{
- int i;
-
- for (i = 0; i < cp->hash; i++)
- tcf_exts_destroy(&cp->perfect[i].exts);
- kfree(cp->perfect);
-}
-
-static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
-{
- int i, err = 0;
-
- cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
- GFP_KERNEL | __GFP_NOWARN);
- if (!cp->perfect)
- return -ENOMEM;
-
- for (i = 0; i < cp->hash; i++) {
- err = tcf_exts_init(&cp->perfect[i].exts, net,
- TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- if (err < 0)
- goto errout;
- cp->perfect[i].p = cp;
- }
-
- return 0;
-
-errout:
- tcindex_free_perfect_hash(cp);
- return err;
-}
-
-static int
-tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
- u32 handle, struct tcindex_data *p,
- struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
-{
- struct tcindex_filter_result new_filter_result;
- struct tcindex_data *cp = NULL, *oldp;
- struct tcindex_filter *f = NULL; /* make gcc behave */
- struct tcf_result cr = {};
- int err, balloc = 0;
- struct tcf_exts e;
-
- err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- if (err < 0)
- return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
- if (err < 0)
- goto errout;
-
- err = -ENOMEM;
- /* tcindex_data attributes must look atomic to classifier/lookup so
- * allocate new tcindex data and RCU assign it onto root. Keeping
- * perfect hash and hash pointers from old data.
- */
- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
- if (!cp)
- goto errout;
-
- cp->mask = p->mask;
- cp->shift = p->shift;
- cp->hash = p->hash;
- cp->alloc_hash = p->alloc_hash;
- cp->fall_through = p->fall_through;
- cp->tp = tp;
- refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */
-
- if (tb[TCA_TCINDEX_HASH])
- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-
- if (tb[TCA_TCINDEX_MASK])
- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-
- if (tb[TCA_TCINDEX_SHIFT]) {
- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
- if (cp->shift > 16) {
- err = -EINVAL;
- goto errout;
- }
- }
- if (!cp->hash) {
- /* Hash not specified, use perfect hash if the upper limit
- * of the hashing index is below the threshold.
- */
- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
- cp->hash = (cp->mask >> cp->shift) + 1;
- else
- cp->hash = DEFAULT_HASH_SIZE;
- }
-
- if (p->perfect) {
- int i;
-
- if (tcindex_alloc_perfect_hash(net, cp) < 0)
- goto errout;
- cp->alloc_hash = cp->hash;
- for (i = 0; i < min(cp->hash, p->hash); i++)
- cp->perfect[i].res = p->perfect[i].res;
- balloc = 1;
- }
- cp->h = p->h;
-
- err = tcindex_filter_result_init(&new_filter_result, cp, net);
- if (err < 0)
- goto errout_alloc;
- if (r)
- cr = r->res;
-
- err = -EBUSY;
-
- /* Hash already allocated, make sure that we still meet the
- * requirements for the allocated hash.
- */
- if (cp->perfect) {
- if (!valid_perfect_hash(cp) ||
- cp->hash > cp->alloc_hash)
- goto errout_alloc;
- } else if (cp->h && cp->hash != cp->alloc_hash) {
- goto errout_alloc;
- }
-
- err = -EINVAL;
- if (tb[TCA_TCINDEX_FALL_THROUGH])
- cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
-
- if (!cp->perfect && !cp->h)
- cp->alloc_hash = cp->hash;
-
- /* Note: this could be as restrictive as if (handle & ~(mask >> shift))
- * but then, we'd fail handles that may become valid after some future
- * mask change. While this is extremely unlikely to ever matter,
- * the check below is safer (and also more backwards-compatible).
- */
- if (cp->perfect || valid_perfect_hash(cp))
- if (handle >= cp->alloc_hash)
- goto errout_alloc;
-
-
- err = -ENOMEM;
- if (!cp->perfect && !cp->h) {
- if (valid_perfect_hash(cp)) {
- if (tcindex_alloc_perfect_hash(net, cp) < 0)
- goto errout_alloc;
- balloc = 1;
- } else {
- struct tcindex_filter __rcu **hash;
-
- hash = kcalloc(cp->hash,
- sizeof(struct tcindex_filter *),
- GFP_KERNEL);
-
- if (!hash)
- goto errout_alloc;
-
- cp->h = hash;
- balloc = 2;
- }
- }
-
- if (cp->perfect)
- r = cp->perfect + handle;
- else
- r = tcindex_lookup(cp, handle) ? : &new_filter_result;
-
- if (r == &new_filter_result) {
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (!f)
- goto errout_alloc;
- f->key = handle;
- f->next = NULL;
- err = tcindex_filter_result_init(&f->result, cp, net);
- if (err < 0) {
- kfree(f);
- goto errout_alloc;
- }
- }
-
- if (tb[TCA_TCINDEX_CLASSID]) {
- cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
- tcf_bind_filter(tp, &cr, base);
- }
-
- oldp = p;
- r->res = cr;
- tcf_exts_change(&r->exts, &e);
-
- rcu_assign_pointer(tp->root, cp);
-
- if (r == &new_filter_result) {
- struct tcindex_filter *nfp;
- struct tcindex_filter __rcu **fp;
-
- f->result.res = r->res;
- tcf_exts_change(&f->result.exts, &r->exts);
-
- fp = cp->h + (handle % cp->hash);
- for (nfp = rtnl_dereference(*fp);
- nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp))
- ; /* nothing */
-
- rcu_assign_pointer(*fp, f);
- } else {
- tcf_exts_destroy(&new_filter_result.exts);
- }
-
- if (oldp)
- tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
- return 0;
-
-errout_alloc:
- if (balloc == 1)
- tcindex_free_perfect_hash(cp);
- else if (balloc == 2)
- kfree(cp->h);
- tcf_exts_destroy(&new_filter_result.exts);
-errout:
- kfree(cp);
- tcf_exts_destroy(&e);
- return err;
-}
-
-static int
-tcindex_change(struct net *net, struct sk_buff *in_skb,
- struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, u32 flags,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_TCINDEX_MAX + 1];
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = *arg;
- int err;
-
- pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
- "p %p,r %p,*arg %p\n",
- tp, handle, tca, arg, opt, p, r, *arg);
-
- if (!opt)
- return 0;
-
- err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt,
- tcindex_policy, NULL);
- if (err < 0)
- return err;
-
- return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE], flags, extack);
-}
-
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
- bool rtnl_held)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter *f, *next;
- int i;
-
- pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
- if (p->perfect) {
- for (i = 0; i < p->hash; i++) {
- if (!p->perfect[i].res.class)
- continue;
- if (!tc_cls_stats_dump(tp, walker, p->perfect + i))
- return;
- }
- }
- if (!p->h)
- return;
- for (i = 0; i < p->hash; i++) {
- for (f = rtnl_dereference(p->h[i]); f; f = next) {
- next = rtnl_dereference(f->next);
- if (!tc_cls_stats_dump(tp, walker, &f->result))
- return;
- }
- }
-}
-
-static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
- struct netlink_ext_ack *extack)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- int i;
-
- pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
-
- if (p->perfect) {
- for (i = 0; i < p->hash; i++) {
- struct tcindex_filter_result *r = p->perfect + i;
-
- /* tcf_queue_work() does not guarantee the ordering we
- * want, so we have to take this refcnt temporarily to
- * ensure 'p' is freed after all tcindex_filter_result
- * here. Imperfect hash does not need this, because it
- * uses linked lists rather than an array.
- */
- tcindex_data_get(p);
-
- tcf_unbind_filter(tp, &r->res);
- if (tcf_exts_get_net(&r->exts))
- tcf_queue_work(&r->rwork,
- tcindex_destroy_rexts_work);
- else
- __tcindex_destroy_rexts(r);
- }
- }
-
- for (i = 0; p->h && i < p->hash; i++) {
- struct tcindex_filter *f, *next;
- bool last;
-
- for (f = rtnl_dereference(p->h[i]); f; f = next) {
- next = rtnl_dereference(f->next);
- tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
- }
- }
-
- tcf_queue_work(&p->rwork, tcindex_destroy_work);
-}
-
-
-static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = fh;
- struct nlattr *nest;
-
- pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
- tp, fh, skb, t, p, r);
- pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (!fh) {
- t->tcm_handle = ~0; /* whatever ... */
- if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
- nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
- nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
- nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
- goto nla_put_failure;
- nla_nest_end(skb, nest);
- } else {
- if (p->perfect) {
- t->tcm_handle = r - p->perfect;
- } else {
- struct tcindex_filter *f;
- struct tcindex_filter __rcu **fp;
- int i;
-
- t->tcm_handle = 0;
- for (i = 0; !t->tcm_handle && i < p->hash; i++) {
- fp = &p->h[i];
- for (f = rtnl_dereference(*fp);
- !t->tcm_handle && f;
- fp = &f->next, f = rtnl_dereference(*fp)) {
- if (&f->result == r)
- t->tcm_handle = f->key;
- }
- }
- }
- pr_debug("handle = %d\n", t->tcm_handle);
- if (r->res.class &&
- nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
- goto nla_put_failure;
-
- if (tcf_exts_dump(skb, &r->exts) < 0)
- goto nla_put_failure;
- nla_nest_end(skb, nest);
-
- if (tcf_exts_dump_stats(skb, &r->exts) < 0)
- goto nla_put_failure;
- }
-
- return skb->len;
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
- void *q, unsigned long base)
-{
- struct tcindex_filter_result *r = fh;
-
- tc_cls_bind_class(classid, cl, q, &r->res, base);
-}
-
-static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
- .kind = "tcindex",
- .classify = tcindex_classify,
- .init = tcindex_init,
- .destroy = tcindex_destroy,
- .get = tcindex_get,
- .change = tcindex_change,
- .delete = tcindex_delete,
- .walk = tcindex_walk,
- .dump = tcindex_dump,
- .bind_class = tcindex_bind_class,
- .owner = THIS_MODULE,
-};
-
-static int __init init_tcindex(void)
-{
- return register_tcf_proto_ops(&cls_tcindex_ops);
-}
-
-static void __exit exit_tcindex(void)
-{
- unregister_tcf_proto_ops(&cls_tcindex_ops);
-}
-
-module_init(init_tcindex)
-module_exit(exit_tcindex)
-MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e9780631b5b5..aba789c30a2e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1286,7 +1286,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
stab = qdisc_get_stab(tca[TCA_STAB], extack);
if (IS_ERR(stab)) {
err = PTR_ERR(stab);
- goto err_out4;
+ goto err_out3;
}
rcu_assign_pointer(sch->stab, stab);
}
@@ -1294,14 +1294,14 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
if (ops->init) {
err = ops->init(sch, tca[TCA_OPTIONS], extack);
if (err != 0)
- goto err_out5;
+ goto err_out4;
}
if (tca[TCA_RATE]) {
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT) {
NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
- goto err_out5;
+ goto err_out4;
}
err = gen_new_estimator(&sch->bstats,
@@ -1312,7 +1312,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
tca[TCA_RATE]);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
- goto err_out5;
+ goto err_out4;
}
}
@@ -1321,12 +1321,13 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
return sch;
-err_out5:
- qdisc_put_stab(rtnl_dereference(sch->stab));
err_out4:
- /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
+ /* Even if ops->init() failed, we call ops->destroy()
+ * like qdisc_create_dflt().
+ */
if (ops->destroy)
ops->destroy(sch);
+ qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
deleted file mode 100644
index 4a981ca90b0b..000000000000
--- a/net/sched/sch_atm.c
+++ /dev/null
@@ -1,706 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
-
-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/atmdev.h>
-#include <linux/atmclip.h>
-#include <linux/rtnetlink.h>
-#include <linux/file.h> /* for fput */
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-
-/*
- * The ATM queuing discipline provides a framework for invoking classifiers
- * (aka "filters"), which in turn select classes of this queuing discipline.
- * Each class maps the flow(s) it is handling to a given VC. Multiple classes
- * may share the same VC.
- *
- * When creating a class, VCs are specified by passing the number of the open
- * socket descriptor by which the calling process references the VC. The kernel
- * keeps the VC open at least until all classes using it are removed.
- *
- * In this file, most functions are named atm_tc_* to avoid confusion with all
- * the atm_* in net/atm. This naming convention differs from what's used in the
- * rest of net/sched.
- *
- * Known bugs:
- * - sometimes messes up the IP stack
- * - any manipulations besides the few operations described in the README, are
- * untested and likely to crash the system
- * - should lock the flow while there is data in the queue (?)
- */
-
-#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
-
-struct atm_flow_data {
- struct Qdisc_class_common common;
- struct Qdisc *q; /* FIFO, TBF, etc. */
- struct tcf_proto __rcu *filter_list;
- struct tcf_block *block;
- struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
- void (*old_pop)(struct atm_vcc *vcc,
- struct sk_buff *skb); /* chaining */
- struct atm_qdisc_data *parent; /* parent qdisc */
- struct socket *sock; /* for closing */
- int ref; /* reference count */
- struct gnet_stats_basic_sync bstats;
- struct gnet_stats_queue qstats;
- struct list_head list;
- struct atm_flow_data *excess; /* flow for excess traffic;
- NULL to set CLP instead */
- int hdr_len;
- unsigned char hdr[]; /* header data; MUST BE LAST */
-};
-
-struct atm_qdisc_data {
- struct atm_flow_data link; /* unclassified skbs go here */
- struct list_head flows; /* NB: "link" is also on this
- list */
- struct tasklet_struct task; /* dequeue tasklet */
-};
-
-/* ------------------------- Class/flow operations ------------------------- */
-
-static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- list_for_each_entry(flow, &p->flows, list) {
- if (flow->common.classid == classid)
- return flow;
- }
- return NULL;
-}
-
-static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
- pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
- sch, p, flow, new, old);
- if (list_empty(&flow->list))
- return -EINVAL;
- if (!new)
- new = &noop_qdisc;
- *old = flow->q;
- flow->q = new;
- if (*old)
- qdisc_reset(*old);
- return 0;
-}
-
-static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
-{
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
- pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
- return flow ? flow->q : NULL;
-}
-
-static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid)
-{
- struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
- flow = lookup_flow(sch, classid);
- pr_debug("%s: flow %p\n", __func__, flow);
- return (unsigned long)flow;
-}
-
-static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
- unsigned long parent, u32 classid)
-{
- struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
- flow = lookup_flow(sch, classid);
- if (flow)
- flow->ref++;
- pr_debug("%s: flow %p\n", __func__, flow);
- return (unsigned long)flow;
-}
-
-/*
- * atm_tc_put handles all destructions, including the ones that are explicitly
- * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
- * anything that still seems to be in use.
- */
-static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
- pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- if (--flow->ref)
- return;
- pr_debug("atm_tc_put: destroying\n");
- list_del_init(&flow->list);
- pr_debug("atm_tc_put: qdisc %p\n", flow->q);
- qdisc_put(flow->q);
- tcf_block_put(flow->block);
- if (flow->sock) {
- pr_debug("atm_tc_put: f_count %ld\n",
- file_count(flow->sock->file));
- flow->vcc->pop = flow->old_pop;
- sockfd_put(flow->sock);
- }
- if (flow->excess)
- atm_tc_put(sch, (unsigned long)flow->excess);
- if (flow != &p->link)
- kfree(flow);
- /*
- * If flow == &p->link, the qdisc no longer works at this point and
- * needs to be removed. (By the caller of atm_tc_put.)
- */
-}
-
-static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb)
-{
- struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
-
- pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
- VCC2FLOW(vcc)->old_pop(vcc, skb);
- tasklet_schedule(&p->task);
-}
-
-static const u8 llc_oui_ip[] = {
- 0xaa, /* DSAP: non-ISO */
- 0xaa, /* SSAP: non-ISO */
- 0x03, /* Ctrl: Unnumbered Information Command PDU */
- 0x00, /* OUI: EtherType */
- 0x00, 0x00,
- 0x08, 0x00
-}; /* Ethertype IP (0800) */
-
-static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
- [TCA_ATM_FD] = { .type = NLA_U32 },
- [TCA_ATM_EXCESS] = { .type = NLA_U32 },
-};
-
-static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
- struct nlattr **tca, unsigned long *arg,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
- struct atm_flow_data *excess = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_ATM_MAX + 1];
- struct socket *sock;
- int fd, error, hdr_len;
- void *hdr;
-
- pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
- "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt);
- /*
- * The concept of parents doesn't apply for this qdisc.
- */
- if (parent && parent != TC_H_ROOT && parent != sch->handle)
- return -EINVAL;
- /*
- * ATM classes cannot be changed. In order to change properties of the
- * ATM connection, that socket needs to be modified directly (via the
- * native ATM API. In order to send a flow to a different VC, the old
- * class needs to be removed and a new one added. (This may be changed
- * later.)
- */
- if (flow)
- return -EBUSY;
- if (opt == NULL)
- return -EINVAL;
-
- error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy,
- NULL);
- if (error < 0)
- return error;
-
- if (!tb[TCA_ATM_FD])
- return -EINVAL;
- fd = nla_get_u32(tb[TCA_ATM_FD]);
- pr_debug("atm_tc_change: fd %d\n", fd);
- if (tb[TCA_ATM_HDR]) {
- hdr_len = nla_len(tb[TCA_ATM_HDR]);
- hdr = nla_data(tb[TCA_ATM_HDR]);
- } else {
- hdr_len = RFC1483LLC_LEN;
- hdr = NULL; /* default LLC/SNAP for IP */
- }
- if (!tb[TCA_ATM_EXCESS])
- excess = NULL;
- else {
- excess = (struct atm_flow_data *)
- atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
- if (!excess)
- return -ENOENT;
- }
- pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n",
- opt->nla_type, nla_len(opt), hdr_len);
- sock = sockfd_lookup(fd, &error);
- if (!sock)
- return error; /* f_count++ */
- pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
- if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
- error = -EPROTOTYPE;
- goto err_out;
- }
- /* @@@ should check if the socket is really operational or we'll crash
- on vcc->send */
- if (classid) {
- if (TC_H_MAJ(classid ^ sch->handle)) {
- pr_debug("atm_tc_change: classid mismatch\n");
- error = -EINVAL;
- goto err_out;
- }
- } else {
- int i;
- unsigned long cl;
-
- for (i = 1; i < 0x8000; i++) {
- classid = TC_H_MAKE(sch->handle, 0x8000 | i);
- cl = atm_tc_find(sch, classid);
- if (!cl)
- break;
- }
- }
- pr_debug("atm_tc_change: new id %x\n", classid);
- flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
- pr_debug("atm_tc_change: flow %p\n", flow);
- if (!flow) {
- error = -ENOBUFS;
- goto err_out;
- }
-
- error = tcf_block_get(&flow->block, &flow->filter_list, sch,
- extack);
- if (error) {
- kfree(flow);
- goto err_out;
- }
-
- flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
- extack);
- if (!flow->q)
- flow->q = &noop_qdisc;
- pr_debug("atm_tc_change: qdisc %p\n", flow->q);
- flow->sock = sock;
- flow->vcc = ATM_SD(sock); /* speedup */
- flow->vcc->user_back = flow;
- pr_debug("atm_tc_change: vcc %p\n", flow->vcc);
- flow->old_pop = flow->vcc->pop;
- flow->parent = p;
- flow->vcc->pop = sch_atm_pop;
- flow->common.classid = classid;
- flow->ref = 1;
- flow->excess = excess;
- list_add(&flow->list, &p->link.list);
- flow->hdr_len = hdr_len;
- if (hdr)
- memcpy(flow->hdr, hdr, hdr_len);
- else
- memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip));
- *arg = (unsigned long)flow;
- return 0;
-err_out:
- sockfd_put(sock);
- return error;
-}
-
-static int atm_tc_delete(struct Qdisc *sch, unsigned long arg,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
- pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- if (list_empty(&flow->list))
- return -EINVAL;
- if (rcu_access_pointer(flow->filter_list) || flow == &p->link)
- return -EBUSY;
- /*
- * Reference count must be 2: one for "keepalive" (set at class
- * creation), and one for the reference held when calling delete.
- */
- if (flow->ref < 2) {
- pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
- return -EINVAL;
- }
- if (flow->ref > 2)
- return -EBUSY; /* catch references via excess, etc. */
- atm_tc_put(sch, arg);
- return 0;
-}
-
-static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
- if (walker->stop)
- return;
- list_for_each_entry(flow, &p->flows, list) {
- if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker))
- break;
- }
-}
-
-static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
- pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- return flow ? flow->block : p->link.block;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
-
-static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
- struct tcf_result res;
- int result;
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-
- pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
- result = TC_ACT_OK; /* be nice to gcc */
- flow = NULL;
- if (TC_H_MAJ(skb->priority) != sch->handle ||
- !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) {
- struct tcf_proto *fl;
-
- list_for_each_entry(flow, &p->flows, list) {
- fl = rcu_dereference_bh(flow->filter_list);
- if (fl) {
- result = tcf_classify(skb, NULL, fl, &res, true);
- if (result < 0)
- continue;
- if (result == TC_ACT_SHOT)
- goto done;
-
- flow = (struct atm_flow_data *)res.class;
- if (!flow)
- flow = lookup_flow(sch, res.classid);
- goto drop;
- }
- }
- flow = NULL;
-done:
- ;
- }
- if (!flow) {
- flow = &p->link;
- } else {
- if (flow->vcc)
- ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
- /*@@@ looks good ... but it's not supposed to work :-) */
-#ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
- case TC_ACT_TRAP:
- __qdisc_drop(skb, to_free);
- return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- case TC_ACT_SHOT:
- __qdisc_drop(skb, to_free);
- goto drop;
- case TC_ACT_RECLASSIFY:
- if (flow->excess)
- flow = flow->excess;
- else
- ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP;
- break;
- }
-#endif
- }
-
- ret = qdisc_enqueue(skb, flow->q, to_free);
- if (ret != NET_XMIT_SUCCESS) {
-drop: __maybe_unused
- if (net_xmit_drop_count(ret)) {
- qdisc_qstats_drop(sch);
- if (flow)
- flow->qstats.drops++;
- }
- return ret;
- }
- /*
- * Okay, this may seem weird. We pretend we've dropped the packet if
- * it goes via ATM. The reason for this is that the outer qdisc
- * expects to be able to q->dequeue the packet later on if we return
- * success at this place. Also, sch->q.qdisc needs to reflect whether
- * there is a packet egligible for dequeuing or not. Note that the
- * statistics of the outer qdisc are necessarily wrong because of all
- * this. There's currently no correct solution for this.
- */
- if (flow == &p->link) {
- sch->q.qlen++;
- return NET_XMIT_SUCCESS;
- }
- tasklet_schedule(&p->task);
- return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-}
-
-/*
- * Dequeue packets and send them over ATM. Note that we quite deliberately
- * avoid checking net_device's flow control here, simply because sch_atm
- * uses its own channels, which have nothing to do with any CLIP/LANE/or
- * non-ATM interfaces.
- */
-
-static void sch_atm_dequeue(struct tasklet_struct *t)
-{
- struct atm_qdisc_data *p = from_tasklet(p, t, task);
- struct Qdisc *sch = qdisc_from_priv(p);
- struct atm_flow_data *flow;
- struct sk_buff *skb;
-
- pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list) {
- if (flow == &p->link)
- continue;
- /*
- * If traffic is properly shaped, this won't generate nasty
- * little bursts. Otherwise, it may ... (but that's okay)
- */
- while ((skb = flow->q->ops->peek(flow->q))) {
- if (!atm_may_send(flow->vcc, skb->truesize))
- break;
-
- skb = qdisc_dequeue_peeked(flow->q);
- if (unlikely(!skb))
- break;
-
- qdisc_bstats_update(sch, skb);
- bstats_update(&flow->bstats, skb);
- pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
- /* remove any LL header somebody else has attached */
- skb_pull(skb, skb_network_offset(skb));
- if (skb_headroom(skb) < flow->hdr_len) {
- struct sk_buff *new;
-
- new = skb_realloc_headroom(skb, flow->hdr_len);
- dev_kfree_skb(skb);
- if (!new)
- continue;
- skb = new;
- }
- pr_debug("sch_atm_dequeue: ip %p, data %p\n",
- skb_network_header(skb), skb->data);
- ATM_SKB(skb)->vcc = flow->vcc;
- memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
- flow->hdr_len);
- refcount_add(skb->truesize,
- &sk_atm(flow->vcc)->sk_wmem_alloc);
- /* atm.atm_options are already set by atm_tc_enqueue */
- flow->vcc->send(flow->vcc, skb);
- }
- }
-}
-
-static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct sk_buff *skb;
-
- pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
- tasklet_schedule(&p->task);
- skb = qdisc_dequeue_peeked(p->link.q);
- if (skb)
- sch->q.qlen--;
- return skb;
-}
-
-static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p);
-
- return p->link.q->ops->peek(p->link.q);
-}
-
-static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- int err;
-
- pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
- INIT_LIST_HEAD(&p->flows);
- INIT_LIST_HEAD(&p->link.list);
- gnet_stats_basic_sync_init(&p->link.bstats);
- list_add(&p->link.list, &p->flows);
- p->link.q = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, sch->handle, extack);
- if (!p->link.q)
- p->link.q = &noop_qdisc;
- pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- p->link.vcc = NULL;
- p->link.sock = NULL;
- p->link.common.classid = sch->handle;
- p->link.ref = 1;
-
- err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
- extack);
- if (err)
- return err;
-
- tasklet_setup(&p->task, sch_atm_dequeue);
- return 0;
-}
-
-static void atm_tc_reset(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list)
- qdisc_reset(flow->q);
-}
-
-static void atm_tc_destroy(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow, *tmp;
-
- pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list) {
- tcf_block_put(flow->block);
- flow->block = NULL;
- }
-
- list_for_each_entry_safe(flow, tmp, &p->flows, list) {
- if (flow->ref > 1)
- pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
- atm_tc_put(sch, (unsigned long)flow);
- }
- tasklet_kill(&p->task);
-}
-
-static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
- struct nlattr *nest;
-
- pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
- sch, p, flow, skb, tcm);
- if (list_empty(&flow->list))
- return -EINVAL;
- tcm->tcm_handle = flow->common.classid;
- tcm->tcm_info = flow->q->handle;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr))
- goto nla_put_failure;
- if (flow->vcc) {
- struct sockaddr_atmpvc pvc;
- int state;
-
- memset(&pvc, 0, sizeof(pvc));
- pvc.sap_family = AF_ATMPVC;
- pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
- pvc.sap_addr.vpi = flow->vcc->vpi;
- pvc.sap_addr.vci = flow->vcc->vci;
- if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc))
- goto nla_put_failure;
- state = ATM_VF2VS(flow->vcc->flags);
- if (nla_put_u32(skb, TCA_ATM_STATE, state))
- goto nla_put_failure;
- }
- if (flow->excess) {
- if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid))
- goto nla_put_failure;
- } else {
- if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
- goto nla_put_failure;
- }
- return nla_nest_end(skb, nest);
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-static int
-atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- struct gnet_dump *d)
-{
- struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
- if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
- gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
- return -1;
-
- return 0;
-}
-
-static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- return 0;
-}
-
-static const struct Qdisc_class_ops atm_class_ops = {
- .graft = atm_tc_graft,
- .leaf = atm_tc_leaf,
- .find = atm_tc_find,
- .change = atm_tc_change,
- .delete = atm_tc_delete,
- .walk = atm_tc_walk,
- .tcf_block = atm_tc_tcf_block,
- .bind_tcf = atm_tc_bind_filter,
- .unbind_tcf = atm_tc_put,
- .dump = atm_tc_dump_class,
- .dump_stats = atm_tc_dump_class_stats,
-};
-
-static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
- .cl_ops = &atm_class_ops,
- .id = "atm",
- .priv_size = sizeof(struct atm_qdisc_data),
- .enqueue = atm_tc_enqueue,
- .dequeue = atm_tc_dequeue,
- .peek = atm_tc_peek,
- .init = atm_tc_init,
- .reset = atm_tc_reset,
- .destroy = atm_tc_destroy,
- .dump = atm_tc_dump,
- .owner = THIS_MODULE,
-};
-
-static int __init atm_init(void)
-{
- return register_qdisc(&atm_qdisc_ops);
-}
-
-static void __exit atm_exit(void)
-{
- unregister_qdisc(&atm_qdisc_ops);
-}
-
-module_init(atm_init)
-module_exit(atm_exit)
-MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
deleted file mode 100644
index 36db5f6782f2..000000000000
--- a/net/sched/sch_cbq.c
+++ /dev/null
@@ -1,1727 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/sch_cbq.c Class-Based Queueing discipline.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-
-
-/* Class-Based Queueing (CBQ) algorithm.
- =======================================
-
- Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
- Management Models for Packet Networks",
- IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
-
- [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
-
- [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
- Parameters", 1996
-
- [4] Sally Floyd and Michael Speer, "Experimental Results
- for Class-Based Queueing", 1998, not published.
-
- -----------------------------------------------------------------------
-
- Algorithm skeleton was taken from NS simulator cbq.cc.
- If someone wants to check this code against the LBL version,
- he should take into account that ONLY the skeleton was borrowed,
- the implementation is different. Particularly:
-
- --- The WRR algorithm is different. Our version looks more
- reasonable (I hope) and works when quanta are allowed to be
- less than MTU, which is always the case when real time classes
- have small rates. Note, that the statement of [3] is
- incomplete, delay may actually be estimated even if class
- per-round allotment is less than MTU. Namely, if per-round
- allotment is W*r_i, and r_1+...+r_k = r < 1
-
- delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
-
- In the worst case we have IntServ estimate with D = W*r+k*MTU
- and C = MTU*r. The proof (if correct at all) is trivial.
-
-
- --- It seems that cbq-2.0 is not very accurate. At least, I cannot
- interpret some places, which look like wrong translations
- from NS. Anyone is advised to find these differences
- and explain to me, why I am wrong 8).
-
- --- Linux has no EOI event, so that we cannot estimate true class
- idle time. Workaround is to consider the next dequeue event
- as sign that previous packet is finished. This is wrong because of
- internal device queueing, but on a permanently loaded link it is true.
- Moreover, combined with clock integrator, this scheme looks
- very close to an ideal solution. */
-
-struct cbq_sched_data;
-
-
-struct cbq_class {
- struct Qdisc_class_common common;
- struct cbq_class *next_alive; /* next class with backlog in this priority band */
-
-/* Parameters */
- unsigned char priority; /* class priority */
- unsigned char priority2; /* priority to be used after overlimit */
- unsigned char ewma_log; /* time constant for idle time calculation */
-
- u32 defmap;
-
- /* Link-sharing scheduler parameters */
- long maxidle; /* Class parameters: see below. */
- long offtime;
- long minidle;
- u32 avpkt;
- struct qdisc_rate_table *R_tab;
-
- /* General scheduler (WRR) parameters */
- long allot;
- long quantum; /* Allotment per WRR round */
- long weight; /* Relative allotment: see below */
-
- struct Qdisc *qdisc; /* Ptr to CBQ discipline */
- struct cbq_class *split; /* Ptr to split node */
- struct cbq_class *share; /* Ptr to LS parent in the class tree */
- struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
- struct cbq_class *borrow; /* NULL if class is bandwidth limited;
- parent otherwise */
- struct cbq_class *sibling; /* Sibling chain */
- struct cbq_class *children; /* Pointer to children chain */
-
- struct Qdisc *q; /* Elementary queueing discipline */
-
-
-/* Variables */
- unsigned char cpriority; /* Effective priority */
- unsigned char delayed;
- unsigned char level; /* level of the class in hierarchy:
- 0 for leaf classes, and maximal
- level of children + 1 for nodes.
- */
-
- psched_time_t last; /* Last end of service */
- psched_time_t undertime;
- long avgidle;
- long deficit; /* Saved deficit for WRR */
- psched_time_t penalized;
- struct gnet_stats_basic_sync bstats;
- struct gnet_stats_queue qstats;
- struct net_rate_estimator __rcu *rate_est;
- struct tc_cbq_xstats xstats;
-
- struct tcf_proto __rcu *filter_list;
- struct tcf_block *block;
-
- int filters;
-
- struct cbq_class *defaults[TC_PRIO_MAX + 1];
-};
-
-struct cbq_sched_data {
- struct Qdisc_class_hash clhash; /* Hash table of all classes */
- int nclasses[TC_CBQ_MAXPRIO + 1];
- unsigned int quanta[TC_CBQ_MAXPRIO + 1];
-
- struct cbq_class link;
-
- unsigned int activemask;
- struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
- with backlog */
-
-#ifdef CONFIG_NET_CLS_ACT
- struct cbq_class *rx_class;
-#endif
- struct cbq_class *tx_class;
- struct cbq_class *tx_borrowed;
- int tx_len;
- psched_time_t now; /* Cached timestamp */
- unsigned int pmask;
-
- struct qdisc_watchdog watchdog; /* Watchdog timer,
- started when CBQ has
- backlog, but cannot
- transmit just now */
- psched_tdiff_t wd_expires;
- int toplevel;
- u32 hgenerator;
-};
-
-
-#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
-
-static inline struct cbq_class *
-cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
-{
- struct Qdisc_class_common *clc;
-
- clc = qdisc_class_find(&q->clhash, classid);
- if (clc == NULL)
- return NULL;
- return container_of(clc, struct cbq_class, common);
-}
-
-#ifdef CONFIG_NET_CLS_ACT
-
-static struct cbq_class *
-cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
-{
- struct cbq_class *cl;
-
- for (cl = this->tparent; cl; cl = cl->tparent) {
- struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
-
- if (new != NULL && new != this)
- return new;
- }
- return NULL;
-}
-
-#endif
-
-/* Classify packet. The procedure is pretty complicated, but
- * it allows us to combine link sharing and priority scheduling
- * transparently.
- *
- * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
- * so that it resolves to split nodes. Then packets are classified
- * by logical priority, or a more specific classifier may be attached
- * to the split node.
- */
-
-static struct cbq_class *
-cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *head = &q->link;
- struct cbq_class **defmap;
- struct cbq_class *cl = NULL;
- u32 prio = skb->priority;
- struct tcf_proto *fl;
- struct tcf_result res;
-
- /*
- * Step 1. If skb->priority points to one of our classes, use it.
- */
- if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
- (cl = cbq_class_lookup(q, prio)) != NULL)
- return cl;
-
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- for (;;) {
- int result = 0;
- defmap = head->defaults;
-
- fl = rcu_dereference_bh(head->filter_list);
- /*
- * Step 2+n. Apply classifier.
- */
- result = tcf_classify(skb, NULL, fl, &res, true);
- if (!fl || result < 0)
- goto fallback;
- if (result == TC_ACT_SHOT)
- return NULL;
-
- cl = (void *)res.class;
- if (!cl) {
- if (TC_H_MAJ(res.classid))
- cl = cbq_class_lookup(q, res.classid);
- else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
- cl = defmap[TC_PRIO_BESTEFFORT];
-
- if (cl == NULL)
- goto fallback;
- }
- if (cl->level >= head->level)
- goto fallback;
-#ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
- case TC_ACT_TRAP:
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- fallthrough;
- case TC_ACT_RECLASSIFY:
- return cbq_reclassify(skb, cl);
- }
-#endif
- if (cl->level == 0)
- return cl;
-
- /*
- * Step 3+n. If classifier selected a link sharing class,
- * apply agency specific classifier.
- * Repeat this procedure until we hit a leaf node.
- */
- head = cl;
- }
-
-fallback:
- cl = head;
-
- /*
- * Step 4. No success...
- */
- if (TC_H_MAJ(prio) == 0 &&
- !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
- !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
- return head;
-
- return cl;
-}
-
-/*
- * A packet has just been enqueued on the empty class.
- * cbq_activate_class adds it to the tail of active class list
- * of its priority band.
- */
-
-static inline void cbq_activate_class(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- int prio = cl->cpriority;
- struct cbq_class *cl_tail;
-
- cl_tail = q->active[prio];
- q->active[prio] = cl;
-
- if (cl_tail != NULL) {
- cl->next_alive = cl_tail->next_alive;
- cl_tail->next_alive = cl;
- } else {
- cl->next_alive = cl;
- q->activemask |= (1<<prio);
- }
-}
-
-/*
- * Unlink class from active chain.
- * Note that this same procedure is done directly in cbq_dequeue*
- * during round-robin procedure.
- */
-
-static void cbq_deactivate_class(struct cbq_class *this)
-{
- struct cbq_sched_data *q = qdisc_priv(this->qdisc);
- int prio = this->cpriority;
- struct cbq_class *cl;
- struct cbq_class *cl_prev = q->active[prio];
-
- do {
- cl = cl_prev->next_alive;
- if (cl == this) {
- cl_prev->next_alive = cl->next_alive;
- cl->next_alive = NULL;
-
- if (cl == q->active[prio]) {
- q->active[prio] = cl_prev;
- if (cl == q->active[prio]) {
- q->active[prio] = NULL;
- q->activemask &= ~(1<<prio);
- return;
- }
- }
- return;
- }
- } while ((cl_prev = cl) != q->active[prio]);
-}
-
-static void
-cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
-{
- int toplevel = q->toplevel;
-
- if (toplevel > cl->level) {
- psched_time_t now = psched_get_time();
-
- do {
- if (cl->undertime < now) {
- q->toplevel = cl->level;
- return;
- }
- } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
- }
-}
-
-static int
-cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- int ret;
- struct cbq_class *cl = cbq_classify(skb, sch, &ret);
-
-#ifdef CONFIG_NET_CLS_ACT
- q->rx_class = cl;
-#endif
- if (cl == NULL) {
- if (ret & __NET_XMIT_BYPASS)
- qdisc_qstats_drop(sch);
- __qdisc_drop(skb, to_free);
- return ret;
- }
-
- ret = qdisc_enqueue(skb, cl->q, to_free);
- if (ret == NET_XMIT_SUCCESS) {
- sch->q.qlen++;
- cbq_mark_toplevel(q, cl);
- if (!cl->next_alive)
- cbq_activate_class(cl);
- return ret;
- }
-
- if (net_xmit_drop_count(ret)) {
- qdisc_qstats_drop(sch);
- cbq_mark_toplevel(q, cl);
- cl->qstats.drops++;
- }
- return ret;
-}
-
-/* Overlimit action: penalize leaf class by adding offtime */
-static void cbq_overlimit(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- psched_tdiff_t delay = cl->undertime - q->now;
-
- if (!cl->delayed) {
- delay += cl->offtime;
-
- /*
- * Class goes to sleep, so that it will have no
- * chance to work avgidle. Let's forgive it 8)
- *
- * BTW cbq-2.0 has a crap in this
- * place, apparently they forgot to shift it by cl->ewma_log.
- */
- if (cl->avgidle < 0)
- delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
- if (cl->avgidle < cl->minidle)
- cl->avgidle = cl->minidle;
- if (delay <= 0)
- delay = 1;
- cl->undertime = q->now + delay;
-
- cl->xstats.overactions++;
- cl->delayed = 1;
- }
- if (q->wd_expires == 0 || q->wd_expires > delay)
- q->wd_expires = delay;
-
- /* Dirty work! We must schedule wakeups based on
- * real available rate, rather than leaf rate,
- * which may be tiny (even zero).
- */
- if (q->toplevel == TC_CBQ_MAXLEVEL) {
- struct cbq_class *b;
- psched_tdiff_t base_delay = q->wd_expires;
-
- for (b = cl->borrow; b; b = b->borrow) {
- delay = b->undertime - q->now;
- if (delay < base_delay) {
- if (delay <= 0)
- delay = 1;
- base_delay = delay;
- }
- }
-
- q->wd_expires = base_delay;
- }
-}
-
-/*
- * It is mission critical procedure.
- *
- * We "regenerate" toplevel cutoff, if transmitting class
- * has backlog and it is not regulated. It is not part of
- * original CBQ description, but looks more reasonable.
- * Probably, it is wrong. This question needs further investigation.
- */
-
-static inline void
-cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
- struct cbq_class *borrowed)
-{
- if (cl && q->toplevel >= borrowed->level) {
- if (cl->q->q.qlen > 1) {
- do {
- if (borrowed->undertime == PSCHED_PASTPERFECT) {
- q->toplevel = borrowed->level;
- return;
- }
- } while ((borrowed = borrowed->borrow) != NULL);
- }
-#if 0
- /* It is not necessary now. Uncommenting it
- will save CPU cycles, but decrease fairness.
- */
- q->toplevel = TC_CBQ_MAXLEVEL;
-#endif
- }
-}
-
-static void
-cbq_update(struct cbq_sched_data *q)
-{
- struct cbq_class *this = q->tx_class;
- struct cbq_class *cl = this;
- int len = q->tx_len;
- psched_time_t now;
-
- q->tx_class = NULL;
- /* Time integrator. We calculate EOS time
- * by adding expected packet transmission time.
- */
- now = q->now + L2T(&q->link, len);
-
- for ( ; cl; cl = cl->share) {
- long avgidle = cl->avgidle;
- long idle;
-
- _bstats_update(&cl->bstats, len, 1);
-
- /*
- * (now - last) is total time between packet right edges.
- * (last_pktlen/rate) is "virtual" busy time, so that
- *
- * idle = (now - last) - last_pktlen/rate
- */
-
- idle = now - cl->last;
- if ((unsigned long)idle > 128*1024*1024) {
- avgidle = cl->maxidle;
- } else {
- idle -= L2T(cl, len);
-
- /* true_avgidle := (1-W)*true_avgidle + W*idle,
- * where W=2^{-ewma_log}. But cl->avgidle is scaled:
- * cl->avgidle == true_avgidle/W,
- * hence:
- */
- avgidle += idle - (avgidle>>cl->ewma_log);
- }
-
- if (avgidle <= 0) {
- /* Overlimit or at-limit */
-
- if (avgidle < cl->minidle)
- avgidle = cl->minidle;
-
- cl->avgidle = avgidle;
-
- /* Calculate expected time, when this class
- * will be allowed to send.
- * It will occur, when:
- * (1-W)*true_avgidle + W*delay = 0, i.e.
- * idle = (1/W - 1)*(-true_avgidle)
- * or
- * idle = (1 - W)*(-cl->avgidle);
- */
- idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
-
- /*
- * That is not all.
- * To maintain the rate allocated to the class,
- * we add to undertime virtual clock,
- * necessary to complete transmitted packet.
- * (len/phys_bandwidth has been already passed
- * to the moment of cbq_update)
- */
-
- idle -= L2T(&q->link, len);
- idle += L2T(cl, len);
-
- cl->undertime = now + idle;
- } else {
- /* Underlimit */
-
- cl->undertime = PSCHED_PASTPERFECT;
- if (avgidle > cl->maxidle)
- cl->avgidle = cl->maxidle;
- else
- cl->avgidle = avgidle;
- }
- if ((s64)(now - cl->last) > 0)
- cl->last = now;
- }
-
- cbq_update_toplevel(q, this, q->tx_borrowed);
-}
-
-static inline struct cbq_class *
-cbq_under_limit(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- struct cbq_class *this_cl = cl;
-
- if (cl->tparent == NULL)
- return cl;
-
- if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
- cl->delayed = 0;
- return cl;
- }
-
- do {
- /* It is very suspicious place. Now overlimit
- * action is generated for not bounded classes
- * only if link is completely congested.
- * Though it is in agree with ancestor-only paradigm,
- * it looks very stupid. Particularly,
- * it means that this chunk of code will either
- * never be called or result in strong amplification
- * of burstiness. Dangerous, silly, and, however,
- * no another solution exists.
- */
- cl = cl->borrow;
- if (!cl) {
- this_cl->qstats.overlimits++;
- cbq_overlimit(this_cl);
- return NULL;
- }
- if (cl->level > q->toplevel)
- return NULL;
- } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
-
- cl->delayed = 0;
- return cl;
-}
-
-static inline struct sk_buff *
-cbq_dequeue_prio(struct Qdisc *sch, int prio)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl_tail, *cl_prev, *cl;
- struct sk_buff *skb;
- int deficit;
-
- cl_tail = cl_prev = q->active[prio];
- cl = cl_prev->next_alive;
-
- do {
- deficit = 0;
-
- /* Start round */
- do {
- struct cbq_class *borrow = cl;
-
- if (cl->q->q.qlen &&
- (borrow = cbq_under_limit(cl)) == NULL)
- goto skip_class;
-
- if (cl->deficit <= 0) {
- /* Class exhausted its allotment per
- * this round. Switch to the next one.
- */
- deficit = 1;
- cl->deficit += cl->quantum;
- goto next_class;
- }
-
- skb = cl->q->dequeue(cl->q);
-
- /* Class did not give us any skb :-(
- * It could occur even if cl->q->q.qlen != 0
- * f.e. if cl->q == "tbf"
- */
- if (skb == NULL)
- goto skip_class;
-
- cl->deficit -= qdisc_pkt_len(skb);
- q->tx_class = cl;
- q->tx_borrowed = borrow;
- if (borrow != cl) {
-#ifndef CBQ_XSTATS_BORROWS_BYTES
- borrow->xstats.borrows++;
- cl->xstats.borrows++;
-#else
- borrow->xstats.borrows += qdisc_pkt_len(skb);
- cl->xstats.borrows += qdisc_pkt_len(skb);
-#endif
- }
- q->tx_len = qdisc_pkt_len(skb);
-
- if (cl->deficit <= 0) {
- q->active[prio] = cl;
- cl = cl->next_alive;
- cl->deficit += cl->quantum;
- }
- return skb;
-
-skip_class:
- if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
- /* Class is empty or penalized.
- * Unlink it from active chain.
- */
- cl_prev->next_alive = cl->next_alive;
- cl->next_alive = NULL;
-
- /* Did cl_tail point to it? */
- if (cl == cl_tail) {
- /* Repair it! */
- cl_tail = cl_prev;
-
- /* Was it the last class in this band? */
- if (cl == cl_tail) {
- /* Kill the band! */
- q->active[prio] = NULL;
- q->activemask &= ~(1<<prio);
- if (cl->q->q.qlen)
- cbq_activate_class(cl);
- return NULL;
- }
-
- q->active[prio] = cl_tail;
- }
- if (cl->q->q.qlen)
- cbq_activate_class(cl);
-
- cl = cl_prev;
- }
-
-next_class:
- cl_prev = cl;
- cl = cl->next_alive;
- } while (cl_prev != cl_tail);
- } while (deficit);
-
- q->active[prio] = cl_prev;
-
- return NULL;
-}
-
-static inline struct sk_buff *
-cbq_dequeue_1(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct sk_buff *skb;
- unsigned int activemask;
-
- activemask = q->activemask & 0xFF;
- while (activemask) {
- int prio = ffz(~activemask);
- activemask &= ~(1<<prio);
- skb = cbq_dequeue_prio(sch, prio);
- if (skb)
- return skb;
- }
- return NULL;
-}
-
-static struct sk_buff *
-cbq_dequeue(struct Qdisc *sch)
-{
- struct sk_buff *skb;
- struct cbq_sched_data *q = qdisc_priv(sch);
- psched_time_t now;
-
- now = psched_get_time();
-
- if (q->tx_class)
- cbq_update(q);
-
- q->now = now;
-
- for (;;) {
- q->wd_expires = 0;
-
- skb = cbq_dequeue_1(sch);
- if (skb) {
- qdisc_bstats_update(sch, skb);
- sch->q.qlen--;
- return skb;
- }
-
- /* All the classes are overlimit.
- *
- * It is possible, if:
- *
- * 1. Scheduler is empty.
- * 2. Toplevel cutoff inhibited borrowing.
- * 3. Root class is overlimit.
- *
- * Reset 2d and 3d conditions and retry.
- *
- * Note, that NS and cbq-2.0 are buggy, peeking
- * an arbitrary class is appropriate for ancestor-only
- * sharing, but not for toplevel algorithm.
- *
- * Our version is better, but slower, because it requires
- * two passes, but it is unavoidable with top-level sharing.
- */
-
- if (q->toplevel == TC_CBQ_MAXLEVEL &&
- q->link.undertime == PSCHED_PASTPERFECT)
- break;
-
- q->toplevel = TC_CBQ_MAXLEVEL;
- q->link.undertime = PSCHED_PASTPERFECT;
- }
-
- /* No packets in scheduler or nobody wants to give them to us :-(
- * Sigh... start watchdog timer in the last case.
- */
-
- if (sch->q.qlen) {
- qdisc_qstats_overlimit(sch);
- if (q->wd_expires)
- qdisc_watchdog_schedule(&q->watchdog,
- now + q->wd_expires);
- }
- return NULL;
-}
-
-/* CBQ class maintenance routines */
-
-static void cbq_adjust_levels(struct cbq_class *this)
-{
- if (this == NULL)
- return;
-
- do {
- int level = 0;
- struct cbq_class *cl;
-
- cl = this->children;
- if (cl) {
- do {
- if (cl->level > level)
- level = cl->level;
- } while ((cl = cl->sibling) != this->children);
- }
- this->level = level + 1;
- } while ((this = this->tparent) != NULL);
-}
-
-static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
-{
- struct cbq_class *cl;
- unsigned int h;
-
- if (q->quanta[prio] == 0)
- return;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- /* BUGGGG... Beware! This expression suffer of
- * arithmetic overflows!
- */
- if (cl->priority == prio) {
- cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
- q->quanta[prio];
- }
- if (cl->quantum <= 0 ||
- cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
- pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
- cl->common.classid, cl->quantum);
- cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
- }
- }
- }
-}
-
-static void cbq_sync_defmap(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- struct cbq_class *split = cl->split;
- unsigned int h;
- int i;
-
- if (split == NULL)
- return;
-
- for (i = 0; i <= TC_PRIO_MAX; i++) {
- if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
- split->defaults[i] = NULL;
- }
-
- for (i = 0; i <= TC_PRIO_MAX; i++) {
- int level = split->level;
-
- if (split->defaults[i])
- continue;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- struct cbq_class *c;
-
- hlist_for_each_entry(c, &q->clhash.hash[h],
- common.hnode) {
- if (c->split == split && c->level < level &&
- c->defmap & (1<<i)) {
- split->defaults[i] = c;
- level = c->level;
- }
- }
- }
- }
-}
-
-static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
-{
- struct cbq_class *split = NULL;
-
- if (splitid == 0) {
- split = cl->split;
- if (!split)
- return;
- splitid = split->common.classid;
- }
-
- if (split == NULL || split->common.classid != splitid) {
- for (split = cl->tparent; split; split = split->tparent)
- if (split->common.classid == splitid)
- break;
- }
-
- if (split == NULL)
- return;
-
- if (cl->split != split) {
- cl->defmap = 0;
- cbq_sync_defmap(cl);
- cl->split = split;
- cl->defmap = def & mask;
- } else
- cl->defmap = (cl->defmap & ~mask) | (def & mask);
-
- cbq_sync_defmap(cl);
-}
-
-static void cbq_unlink_class(struct cbq_class *this)
-{
- struct cbq_class *cl, **clp;
- struct cbq_sched_data *q = qdisc_priv(this->qdisc);
-
- qdisc_class_hash_remove(&q->clhash, &this->common);
-
- if (this->tparent) {
- clp = &this->sibling;
- cl = *clp;
- do {
- if (cl == this) {
- *clp = cl->sibling;
- break;
- }
- clp = &cl->sibling;
- } while ((cl = *clp) != this->sibling);
-
- if (this->tparent->children == this) {
- this->tparent->children = this->sibling;
- if (this->sibling == this)
- this->tparent->children = NULL;
- }
- } else {
- WARN_ON(this->sibling != this);
- }
-}
-
-static void cbq_link_class(struct cbq_class *this)
-{
- struct cbq_sched_data *q = qdisc_priv(this->qdisc);
- struct cbq_class *parent = this->tparent;
-
- this->sibling = this;
- qdisc_class_hash_insert(&q->clhash, &this->common);
-
- if (parent == NULL)
- return;
-
- if (parent->children == NULL) {
- parent->children = this;
- } else {
- this->sibling = parent->children->sibling;
- parent->children->sibling = this;
- }
-}
-
-static void
-cbq_reset(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl;
- int prio;
- unsigned int h;
-
- q->activemask = 0;
- q->pmask = 0;
- q->tx_class = NULL;
- q->tx_borrowed = NULL;
- qdisc_watchdog_cancel(&q->watchdog);
- q->toplevel = TC_CBQ_MAXLEVEL;
- q->now = psched_get_time();
-
- for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
- q->active[prio] = NULL;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- qdisc_reset(cl->q);
-
- cl->next_alive = NULL;
- cl->undertime = PSCHED_PASTPERFECT;
- cl->avgidle = cl->maxidle;
- cl->deficit = cl->quantum;
- cl->cpriority = cl->priority;
- }
- }
-}
-
-
-static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
-{
- if (lss->change & TCF_CBQ_LSS_FLAGS) {
- cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
- cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
- }
- if (lss->change & TCF_CBQ_LSS_EWMA)
- cl->ewma_log = lss->ewma_log;
- if (lss->change & TCF_CBQ_LSS_AVPKT)
- cl->avpkt = lss->avpkt;
- if (lss->change & TCF_CBQ_LSS_MINIDLE)
- cl->minidle = -(long)lss->minidle;
- if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
- cl->maxidle = lss->maxidle;
- cl->avgidle = lss->maxidle;
- }
- if (lss->change & TCF_CBQ_LSS_OFFTIME)
- cl->offtime = lss->offtime;
-}
-
-static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
-{
- q->nclasses[cl->priority]--;
- q->quanta[cl->priority] -= cl->weight;
- cbq_normalize_quanta(q, cl->priority);
-}
-
-static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
-{
- q->nclasses[cl->priority]++;
- q->quanta[cl->priority] += cl->weight;
- cbq_normalize_quanta(q, cl->priority);
-}
-
-static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-
- if (wrr->allot)
- cl->allot = wrr->allot;
- if (wrr->weight)
- cl->weight = wrr->weight;
- if (wrr->priority) {
- cl->priority = wrr->priority - 1;
- cl->cpriority = cl->priority;
- if (cl->priority >= cl->priority2)
- cl->priority2 = TC_CBQ_MAXPRIO - 1;
- }
-
- cbq_addprio(q, cl);
- return 0;
-}
-
-static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
-{
- cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
- return 0;
-}
-
-static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
- [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
- [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
- [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
- [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
- [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
- [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
- [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
-};
-
-static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1],
- struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- if (!opt) {
- NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
- return -EINVAL;
- }
-
- err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt,
- cbq_policy, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_CBQ_WRROPT]) {
- const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]);
-
- if (wrr->priority > TC_CBQ_MAXPRIO) {
- NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO");
- err = -EINVAL;
- }
- }
- return err;
-}
-
-static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct nlattr *tb[TCA_CBQ_MAX + 1];
- struct tc_ratespec *r;
- int err;
-
- qdisc_watchdog_init(&q->watchdog, sch);
-
- err = cbq_opt_parse(tb, opt, extack);
- if (err < 0)
- return err;
-
- if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) {
- NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete");
- return -EINVAL;
- }
-
- r = nla_data(tb[TCA_CBQ_RATE]);
-
- q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack);
- if (!q->link.R_tab)
- return -EINVAL;
-
- err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack);
- if (err)
- goto put_rtab;
-
- err = qdisc_class_hash_init(&q->clhash);
- if (err < 0)
- goto put_block;
-
- q->link.sibling = &q->link;
- q->link.common.classid = sch->handle;
- q->link.qdisc = sch;
- q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- sch->handle, NULL);
- if (!q->link.q)
- q->link.q = &noop_qdisc;
- else
- qdisc_hash_add(q->link.q, true);
-
- q->link.priority = TC_CBQ_MAXPRIO - 1;
- q->link.priority2 = TC_CBQ_MAXPRIO - 1;
- q->link.cpriority = TC_CBQ_MAXPRIO - 1;
- q->link.allot = psched_mtu(qdisc_dev(sch));
- q->link.quantum = q->link.allot;
- q->link.weight = q->link.R_tab->rate.rate;
-
- q->link.ewma_log = TC_CBQ_DEF_EWMA;
- q->link.avpkt = q->link.allot/2;
- q->link.minidle = -0x7FFFFFFF;
-
- q->toplevel = TC_CBQ_MAXLEVEL;
- q->now = psched_get_time();
-
- cbq_link_class(&q->link);
-
- if (tb[TCA_CBQ_LSSOPT])
- cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
-
- cbq_addprio(q, &q->link);
- return 0;
-
-put_block:
- tcf_block_put(q->link.block);
-
-put_rtab:
- qdisc_put_rtab(q->link.R_tab);
- return err;
-}
-
-static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
-
- if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_lssopt opt;
-
- opt.flags = 0;
- if (cl->borrow == NULL)
- opt.flags |= TCF_CBQ_LSS_BOUNDED;
- if (cl->share == NULL)
- opt.flags |= TCF_CBQ_LSS_ISOLATED;
- opt.ewma_log = cl->ewma_log;
- opt.level = cl->level;
- opt.avpkt = cl->avpkt;
- opt.maxidle = cl->maxidle;
- opt.minidle = (u32)(-cl->minidle);
- opt.offtime = cl->offtime;
- opt.change = ~0;
- if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_wrropt opt;
-
- memset(&opt, 0, sizeof(opt));
- opt.flags = 0;
- opt.allot = cl->allot;
- opt.priority = cl->priority + 1;
- opt.cpriority = cl->cpriority + 1;
- opt.weight = cl->weight;
- if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_fopt opt;
-
- if (cl->split || cl->defmap) {
- opt.split = cl->split ? cl->split->common.classid : 0;
- opt.defmap = cl->defmap;
- opt.defchange = ~0;
- if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
- goto nla_put_failure;
- }
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
-{
- if (cbq_dump_lss(skb, cl) < 0 ||
- cbq_dump_rate(skb, cl) < 0 ||
- cbq_dump_wrr(skb, cl) < 0 ||
- cbq_dump_fopt(skb, cl) < 0)
- return -1;
- return 0;
-}
-
-static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct nlattr *nest;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
- if (cbq_dump_attr(skb, &q->link) < 0)
- goto nla_put_failure;
- return nla_nest_end(skb, nest);
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static int
-cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- q->link.xstats.avgidle = q->link.avgidle;
- return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
-}
-
-static int
-cbq_dump_class(struct Qdisc *sch, unsigned long arg,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
- struct nlattr *nest;
-
- if (cl->tparent)
- tcm->tcm_parent = cl->tparent->common.classid;
- else
- tcm->tcm_parent = TC_H_ROOT;
- tcm->tcm_handle = cl->common.classid;
- tcm->tcm_info = cl->q->handle;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
- if (cbq_dump_attr(skb, cl) < 0)
- goto nla_put_failure;
- return nla_nest_end(skb, nest);
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static int
-cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- struct gnet_dump *d)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)arg;
- __u32 qlen;
-
- cl->xstats.avgidle = cl->avgidle;
- cl->xstats.undertime = 0;
- qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
-
- if (cl->undertime != PSCHED_PASTPERFECT)
- cl->xstats.undertime = cl->undertime - q->now;
-
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
- return -1;
-
- return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
-}
-
-static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old, struct netlink_ext_ack *extack)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (new == NULL) {
- new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- cl->common.classid, extack);
- if (new == NULL)
- return -ENOBUFS;
- }
-
- *old = qdisc_replace(sch, new, &cl->q);
- return 0;
-}
-
-static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- return cl->q;
-}
-
-static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- cbq_deactivate_class(cl);
-}
-
-static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- return (unsigned long)cbq_class_lookup(q, classid);
-}
-
-static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- WARN_ON(cl->filters);
-
- tcf_block_put(cl->block);
- qdisc_put(cl->q);
- qdisc_put_rtab(cl->R_tab);
- gen_kill_estimator(&cl->rate_est);
- if (cl != &q->link)
- kfree(cl);
-}
-
-static void cbq_destroy(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct hlist_node *next;
- struct cbq_class *cl;
- unsigned int h;
-
-#ifdef CONFIG_NET_CLS_ACT
- q->rx_class = NULL;
-#endif
- /*
- * Filters must be destroyed first because we don't destroy the
- * classes from root to leafs which means that filters can still
- * be bound to classes which have been destroyed already. --TGR '04
- */
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- tcf_block_put(cl->block);
- cl->block = NULL;
- }
- }
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
- common.hnode)
- cbq_destroy_class(sch, cl);
- }
- qdisc_class_hash_destroy(&q->clhash);
-}
-
-static int
-cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
- unsigned long *arg, struct netlink_ext_ack *extack)
-{
- int err;
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)*arg;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_CBQ_MAX + 1];
- struct cbq_class *parent;
- struct qdisc_rate_table *rtab = NULL;
-
- err = cbq_opt_parse(tb, opt, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) {
- NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params");
- return -EOPNOTSUPP;
- }
-
- if (cl) {
- /* Check parent */
- if (parentid) {
- if (cl->tparent &&
- cl->tparent->common.classid != parentid) {
- NL_SET_ERR_MSG(extack, "Invalid parent id");
- return -EINVAL;
- }
- if (!cl->tparent && parentid != TC_H_ROOT) {
- NL_SET_ERR_MSG(extack, "Parent must be root");
- return -EINVAL;
- }
- }
-
- if (tb[TCA_CBQ_RATE]) {
- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
- tb[TCA_CBQ_RTAB], extack);
- if (rtab == NULL)
- return -EINVAL;
- }
-
- if (tca[TCA_RATE]) {
- err = gen_replace_estimator(&cl->bstats, NULL,
- &cl->rate_est,
- NULL,
- true,
- tca[TCA_RATE]);
- if (err) {
- NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
- qdisc_put_rtab(rtab);
- return err;
- }
- }
-
- /* Change class parameters */
- sch_tree_lock(sch);
-
- if (cl->next_alive != NULL)
- cbq_deactivate_class(cl);
-
- if (rtab) {
- qdisc_put_rtab(cl->R_tab);
- cl->R_tab = rtab;
- }
-
- if (tb[TCA_CBQ_LSSOPT])
- cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
-
- if (tb[TCA_CBQ_WRROPT]) {
- cbq_rmprio(q, cl);
- cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
- }
-
- if (tb[TCA_CBQ_FOPT])
- cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
-
- if (cl->q->q.qlen)
- cbq_activate_class(cl);
-
- sch_tree_unlock(sch);
-
- return 0;
- }
-
- if (parentid == TC_H_ROOT)
- return -EINVAL;
-
- if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) {
- NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing");
- return -EINVAL;
- }
-
- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB],
- extack);
- if (rtab == NULL)
- return -EINVAL;
-
- if (classid) {
- err = -EINVAL;
- if (TC_H_MAJ(classid ^ sch->handle) ||
- cbq_class_lookup(q, classid)) {
- NL_SET_ERR_MSG(extack, "Specified class not found");
- goto failure;
- }
- } else {
- int i;
- classid = TC_H_MAKE(sch->handle, 0x8000);
-
- for (i = 0; i < 0x8000; i++) {
- if (++q->hgenerator >= 0x8000)
- q->hgenerator = 1;
- if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
- break;
- }
- err = -ENOSR;
- if (i >= 0x8000) {
- NL_SET_ERR_MSG(extack, "Unable to generate classid");
- goto failure;
- }
- classid = classid|q->hgenerator;
- }
-
- parent = &q->link;
- if (parentid) {
- parent = cbq_class_lookup(q, parentid);
- err = -EINVAL;
- if (!parent) {
- NL_SET_ERR_MSG(extack, "Failed to find parentid");
- goto failure;
- }
- }
-
- err = -ENOBUFS;
- cl = kzalloc(sizeof(*cl), GFP_KERNEL);
- if (cl == NULL)
- goto failure;
-
- gnet_stats_basic_sync_init(&cl->bstats);
- err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
- if (err) {
- kfree(cl);
- goto failure;
- }
-
- if (tca[TCA_RATE]) {
- err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- NULL, true, tca[TCA_RATE]);
- if (err) {
- NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
- tcf_block_put(cl->block);
- kfree(cl);
- goto failure;
- }
- }
-
- cl->R_tab = rtab;
- rtab = NULL;
- cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
- NULL);
- if (!cl->q)
- cl->q = &noop_qdisc;
- else
- qdisc_hash_add(cl->q, true);
-
- cl->common.classid = classid;
- cl->tparent = parent;
- cl->qdisc = sch;
- cl->allot = parent->allot;
- cl->quantum = cl->allot;
- cl->weight = cl->R_tab->rate.rate;
-
- sch_tree_lock(sch);
- cbq_link_class(cl);
- cl->borrow = cl->tparent;
- if (cl->tparent != &q->link)
- cl->share = cl->tparent;
- cbq_adjust_levels(parent);
- cl->minidle = -0x7FFFFFFF;
- cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
- cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
- if (cl->ewma_log == 0)
- cl->ewma_log = q->link.ewma_log;
- if (cl->maxidle == 0)
- cl->maxidle = q->link.maxidle;
- if (cl->avpkt == 0)
- cl->avpkt = q->link.avpkt;
- if (tb[TCA_CBQ_FOPT])
- cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
- sch_tree_unlock(sch);
-
- qdisc_class_hash_grow(sch, &q->clhash);
-
- *arg = (unsigned long)cl;
- return 0;
-
-failure:
- qdisc_put_rtab(rtab);
- return err;
-}
-
-static int cbq_delete(struct Qdisc *sch, unsigned long arg,
- struct netlink_ext_ack *extack)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (cl->filters || cl->children || cl == &q->link)
- return -EBUSY;
-
- sch_tree_lock(sch);
-
- qdisc_purge_queue(cl->q);
-
- if (cl->next_alive)
- cbq_deactivate_class(cl);
-
- if (q->tx_borrowed == cl)
- q->tx_borrowed = q->tx_class;
- if (q->tx_class == cl) {
- q->tx_class = NULL;
- q->tx_borrowed = NULL;
- }
-#ifdef CONFIG_NET_CLS_ACT
- if (q->rx_class == cl)
- q->rx_class = NULL;
-#endif
-
- cbq_unlink_class(cl);
- cbq_adjust_levels(cl->tparent);
- cl->defmap = 0;
- cbq_sync_defmap(cl);
-
- cbq_rmprio(q, cl);
- sch_tree_unlock(sch);
-
- cbq_destroy_class(sch, cl);
- return 0;
-}
-
-static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg,
- struct netlink_ext_ack *extack)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (cl == NULL)
- cl = &q->link;
-
- return cl->block;
-}
-
-static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
- u32 classid)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *p = (struct cbq_class *)parent;
- struct cbq_class *cl = cbq_class_lookup(q, classid);
-
- if (cl) {
- if (p && p->level <= cl->level)
- return 0;
- cl->filters++;
- return (unsigned long)cl;
- }
- return 0;
-}
-
-static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- cl->filters--;
-}
-
-static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl;
- unsigned int h;
-
- if (arg->stop)
- return;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
- return;
- }
- }
-}
-
-static const struct Qdisc_class_ops cbq_class_ops = {
- .graft = cbq_graft,
- .leaf = cbq_leaf,
- .qlen_notify = cbq_qlen_notify,
- .find = cbq_find,
- .change = cbq_change_class,
- .delete = cbq_delete,
- .walk = cbq_walk,
- .tcf_block = cbq_tcf_block,
- .bind_tcf = cbq_bind_filter,
- .unbind_tcf = cbq_unbind_filter,
- .dump = cbq_dump_class,
- .dump_stats = cbq_dump_class_stats,
-};
-
-static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
- .next = NULL,
- .cl_ops = &cbq_class_ops,
- .id = "cbq",
- .priv_size = sizeof(struct cbq_sched_data),
- .enqueue = cbq_enqueue,
- .dequeue = cbq_dequeue,
- .peek = qdisc_peek_dequeued,
- .init = cbq_init,
- .reset = cbq_reset,
- .destroy = cbq_destroy,
- .change = NULL,
- .dump = cbq_dump,
- .dump_stats = cbq_dump_stats,
- .owner = THIS_MODULE,
-};
-
-static int __init cbq_module_init(void)
-{
- return register_qdisc(&cbq_qdisc_ops);
-}
-static void __exit cbq_module_exit(void)
-{
- unregister_qdisc(&cbq_qdisc_ops);
-}
-module_init(cbq_module_init)
-module_exit(cbq_module_exit)
-MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
deleted file mode 100644
index 401ffaf87d62..000000000000
--- a/net/sched/sch_dsmark.c
+++ /dev/null
@@ -1,518 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* net/sched/sch_dsmark.c - Differentiated Services field marker */
-
-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/bitops.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <asm/byteorder.h>
-
-/*
- * classid class marking
- * ------- ----- -------
- * n/a 0 n/a
- * x:0 1 use entry [0]
- * ... ... ...
- * x:y y>0 y+1 use entry [y]
- * ... ... ...
- * x:indices-1 indices use entry [indices-1]
- * ... ... ...
- * x:y y+1 use entry [y & (indices-1)]
- * ... ... ...
- * 0xffff 0x10000 use entry [indices-1]
- */
-
-
-#define NO_DEFAULT_INDEX (1 << 16)
-
-struct mask_value {
- u8 mask;
- u8 value;
-};
-
-struct dsmark_qdisc_data {
- struct Qdisc *q;
- struct tcf_proto __rcu *filter_list;
- struct tcf_block *block;
- struct mask_value *mv;
- u16 indices;
- u8 set_tc_index;
- u32 default_index; /* index range is 0...0xffff */
-#define DSMARK_EMBEDDED_SZ 16
- struct mask_value embedded[DSMARK_EMBEDDED_SZ];
-};
-
-static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
-{
- return index <= p->indices && index > 0;
-}
-
-/* ------------------------- Class/flow operations ------------------------- */
-
-static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n",
- __func__, sch, p, new, old);
-
- if (new == NULL) {
- new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- sch->handle, NULL);
- if (new == NULL)
- new = &noop_qdisc;
- }
-
- *old = qdisc_replace(sch, new, &p->q);
- return 0;
-}
-
-static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- return p->q;
-}
-
-static unsigned long dsmark_find(struct Qdisc *sch, u32 classid)
-{
- return TC_H_MIN(classid) + 1;
-}
-
-static unsigned long dsmark_bind_filter(struct Qdisc *sch,
- unsigned long parent, u32 classid)
-{
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
- __func__, sch, qdisc_priv(sch), classid);
-
- return dsmark_find(sch, classid);
-}
-
-static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl)
-{
-}
-
-static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
- [TCA_DSMARK_INDICES] = { .type = NLA_U16 },
- [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 },
- [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG },
- [TCA_DSMARK_MASK] = { .type = NLA_U8 },
- [TCA_DSMARK_VALUE] = { .type = NLA_U8 },
-};
-
-static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
- struct nlattr **tca, unsigned long *arg,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_DSMARK_MAX + 1];
- int err = -EINVAL;
-
- pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
- __func__, sch, p, classid, parent, *arg);
-
- if (!dsmark_valid_index(p, *arg)) {
- err = -ENOENT;
- goto errout;
- }
-
- if (!opt)
- goto errout;
-
- err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
- dsmark_policy, NULL);
- if (err < 0)
- goto errout;
-
- if (tb[TCA_DSMARK_VALUE])
- p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]);
-
- if (tb[TCA_DSMARK_MASK])
- p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
-
- err = 0;
-
-errout:
- return err;
-}
-
-static int dsmark_delete(struct Qdisc *sch, unsigned long arg,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- if (!dsmark_valid_index(p, arg))
- return -EINVAL;
-
- p->mv[arg - 1].mask = 0xff;
- p->mv[arg - 1].value = 0;
-
- return 0;
-}
-
-static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- int i;
-
- pr_debug("%s(sch %p,[qdisc %p],walker %p)\n",
- __func__, sch, p, walker);
-
- if (walker->stop)
- return;
-
- for (i = 0; i < p->indices; i++) {
- if (p->mv[i].mask == 0xff && !p->mv[i].value) {
- walker->count++;
- continue;
- }
- if (!tc_qdisc_stats_dump(sch, i + 1, walker))
- break;
- }
-}
-
-static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- return p->block;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
-
-static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- unsigned int len = qdisc_pkt_len(skb);
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- int err;
-
- pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
-
- if (p->set_tc_index) {
- int wlen = skb_network_offset(skb);
-
- switch (skb_protocol(skb, true)) {
- case htons(ETH_P_IP):
- wlen += sizeof(struct iphdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
- goto drop;
-
- skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
- & ~INET_ECN_MASK;
- break;
-
- case htons(ETH_P_IPV6):
- wlen += sizeof(struct ipv6hdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
- goto drop;
-
- skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
- & ~INET_ECN_MASK;
- break;
- default:
- skb->tc_index = 0;
- break;
- }
- }
-
- if (TC_H_MAJ(skb->priority) == sch->handle)
- skb->tc_index = TC_H_MIN(skb->priority);
- else {
- struct tcf_result res;
- struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tcf_classify(skb, NULL, fl, &res, false);
-
- pr_debug("result %d class 0x%04x\n", result, res.classid);
-
- switch (result) {
-#ifdef CONFIG_NET_CLS_ACT
- case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
- case TC_ACT_TRAP:
- __qdisc_drop(skb, to_free);
- return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
-
- case TC_ACT_SHOT:
- goto drop;
-#endif
- case TC_ACT_OK:
- skb->tc_index = TC_H_MIN(res.classid);
- break;
-
- default:
- if (p->default_index != NO_DEFAULT_INDEX)
- skb->tc_index = p->default_index;
- break;
- }
- }
-
- err = qdisc_enqueue(skb, p->q, to_free);
- if (err != NET_XMIT_SUCCESS) {
- if (net_xmit_drop_count(err))
- qdisc_qstats_drop(sch);
- return err;
- }
-
- sch->qstats.backlog += len;
- sch->q.qlen++;
-
- return NET_XMIT_SUCCESS;
-
-drop:
- qdisc_drop(skb, sch, to_free);
- return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-}
-
-static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct sk_buff *skb;
- u32 index;
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- skb = qdisc_dequeue_peeked(p->q);
- if (skb == NULL)
- return NULL;
-
- qdisc_bstats_update(sch, skb);
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
-
- index = skb->tc_index & (p->indices - 1);
- pr_debug("index %d->%d\n", skb->tc_index, index);
-
- switch (skb_protocol(skb, true)) {
- case htons(ETH_P_IP):
- ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
- p->mv[index].value);
- break;
- case htons(ETH_P_IPV6):
- ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask,
- p->mv[index].value);
- break;
- default:
- /*
- * Only complain if a change was actually attempted.
- * This way, we can send non-IP traffic through dsmark
- * and don't need yet another qdisc as a bypass.
- */
- if (p->mv[index].mask != 0xff || p->mv[index].value)
- pr_warn("%s: unsupported protocol %d\n",
- __func__, ntohs(skb_protocol(skb, true)));
- break;
- }
-
- return skb;
-}
-
-static struct sk_buff *dsmark_peek(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- return p->q->ops->peek(p->q);
-}
-
-static int dsmark_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *tb[TCA_DSMARK_MAX + 1];
- int err = -EINVAL;
- u32 default_index = NO_DEFAULT_INDEX;
- u16 indices;
- int i;
-
- pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
-
- if (!opt)
- goto errout;
-
- err = tcf_block_get(&p->block, &p->filter_list, sch, extack);
- if (err)
- return err;
-
- err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
- dsmark_policy, NULL);
- if (err < 0)
- goto errout;
-
- err = -EINVAL;
- if (!tb[TCA_DSMARK_INDICES])
- goto errout;
- indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
-
- if (hweight32(indices) != 1)
- goto errout;
-
- if (tb[TCA_DSMARK_DEFAULT_INDEX])
- default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
-
- if (indices <= DSMARK_EMBEDDED_SZ)
- p->mv = p->embedded;
- else
- p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL);
- if (!p->mv) {
- err = -ENOMEM;
- goto errout;
- }
- for (i = 0; i < indices; i++) {
- p->mv[i].mask = 0xff;
- p->mv[i].value = 0;
- }
- p->indices = indices;
- p->default_index = default_index;
- p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
-
- p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle,
- NULL);
- if (p->q == NULL)
- p->q = &noop_qdisc;
- else
- qdisc_hash_add(p->q, true);
-
- pr_debug("%s: qdisc %p\n", __func__, p->q);
-
- err = 0;
-errout:
- return err;
-}
-
-static void dsmark_reset(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
- if (p->q)
- qdisc_reset(p->q);
-}
-
-static void dsmark_destroy(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- tcf_block_put(p->block);
- qdisc_put(p->q);
- if (p->mv != p->embedded)
- kfree(p->mv);
-}
-
-static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *opts = NULL;
-
- pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl);
-
- if (!dsmark_valid_index(p, cl))
- return -EINVAL;
-
- tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
- tcm->tcm_info = p->q->handle;
-
- opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (opts == NULL)
- goto nla_put_failure;
- if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) ||
- nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value))
- goto nla_put_failure;
-
- return nla_nest_end(skb, opts);
-
-nla_put_failure:
- nla_nest_cancel(skb, opts);
- return -EMSGSIZE;
-}
-
-static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *opts = NULL;
-
- opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (opts == NULL)
- goto nla_put_failure;
- if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices))
- goto nla_put_failure;
-
- if (p->default_index != NO_DEFAULT_INDEX &&
- nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index))
- goto nla_put_failure;
-
- if (p->set_tc_index &&
- nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX))
- goto nla_put_failure;
-
- return nla_nest_end(skb, opts);
-
-nla_put_failure:
- nla_nest_cancel(skb, opts);
- return -EMSGSIZE;
-}
-
-static const struct Qdisc_class_ops dsmark_class_ops = {
- .graft = dsmark_graft,
- .leaf = dsmark_leaf,
- .find = dsmark_find,
- .change = dsmark_change,
- .delete = dsmark_delete,
- .walk = dsmark_walk,
- .tcf_block = dsmark_tcf_block,
- .bind_tcf = dsmark_bind_filter,
- .unbind_tcf = dsmark_unbind_filter,
- .dump = dsmark_dump_class,
-};
-
-static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
- .next = NULL,
- .cl_ops = &dsmark_class_ops,
- .id = "dsmark",
- .priv_size = sizeof(struct dsmark_qdisc_data),
- .enqueue = dsmark_enqueue,
- .dequeue = dsmark_dequeue,
- .peek = dsmark_peek,
- .init = dsmark_init,
- .reset = dsmark_reset,
- .destroy = dsmark_destroy,
- .change = NULL,
- .dump = dsmark_dump,
- .owner = THIS_MODULE,
-};
-
-static int __init dsmark_module_init(void)
-{
- return register_qdisc(&dsmark_qdisc_ops);
-}
-
-static void __exit dsmark_module_exit(void)
-{
- unregister_qdisc(&dsmark_qdisc_ops);
-}
-
-module_init(dsmark_module_init)
-module_exit(dsmark_module_exit)
-
-MODULE_LICENSE("GPL");
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 097bd60ce964..62b436a2c8fe 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -807,8 +807,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- sk_refcnt_debug_inc(newsk);
-
if (newsk->sk_prot->init(newsk)) {
sk_common_release(newsk);
newsk = NULL;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 909a89a1cff4..c365df24ad33 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -601,8 +601,6 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
- sk_refcnt_debug_inc(newsk);
-
if (newsk->sk_prot->init(newsk)) {
sk_common_release(newsk);
newsk = NULL;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index b163266e581a..d7a7420e81ec 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -360,8 +360,6 @@ static void smc_destruct(struct sock *sk)
return;
if (!sock_flag(sk, SOCK_DEAD))
return;
-
- sk_refcnt_debug_dec(sk);
}
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
@@ -390,7 +388,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
spin_lock_init(&smc->accept_q_lock);
spin_lock_init(&smc->conn.send_lock);
sk->sk_prot->hash(sk);
- sk_refcnt_debug_inc(sk);
mutex_init(&smc->clcsock_release_lock);
smc_init_saved_callbacks(smc);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9f0561b67c12..a245c1b4a21b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -845,7 +845,6 @@ static int xsk_release(struct socket *sock)
sock_orphan(sk);
sock->sk = NULL;
- sk_refcnt_debug_release(sk);
sock_put(sk);
return 0;
@@ -1396,8 +1395,6 @@ static void xsk_destruct(struct sock *sk)
if (!xp_put_pool(xs->pool))
xdp_put_umem(xs->umem, !xs->pool);
-
- sk_refcnt_debug_dec(sk);
}
static int xsk_create(struct net *net, struct socket *sock, int protocol,
@@ -1427,7 +1424,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sk->sk_family = PF_XDP;
sk->sk_destruct = xsk_destruct;
- sk_refcnt_debug_inc(sk);
sock_set_flag(sk, SOCK_RCU_FREE);