diff options
Diffstat (limited to 'net')
46 files changed, 2076 insertions, 6237 deletions
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ad13b48e3e08..24f01ff113f0 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -269,7 +269,7 @@ static void brport_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t net_ns_get_ownership(dev_net(p->dev), uid, gid); } -static struct kobj_type brport_ktype = { +static const struct kobj_type brport_ktype = { #ifdef CONFIG_SYSFS .sysfs_ops = &brport_sysfs_ops, #endif diff --git a/net/core/dev.c b/net/core/dev.c index 7307a0c15c9f..357081b0113c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8321,9 +8321,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) } } if (dev->flags != old_flags) { - pr_info("device %s %s promiscuous mode\n", - dev->name, - dev->flags & IFF_PROMISC ? "entered" : "left"); + netdev_info(dev, "%s promiscuous mode\n", + dev->flags & IFF_PROMISC ? "entered" : "left"); if (audit_enabled) { current_uid_gid(&uid, &gid); audit_log(audit_context(), GFP_ATOMIC, @@ -8391,6 +8390,8 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) } } if (dev->flags ^ old_flags) { + netdev_info(dev, "%s allmulticast mode\n", + dev->flags & IFF_ALLMULTI ? "entered" : "left"); dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); if (notify) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4b361ac6a252..e20784b6f873 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1052,7 +1052,7 @@ static void rx_queue_get_ownership(const struct kobject *kobj, net_ns_get_ownership(net, uid, gid); } -static struct kobj_type rx_queue_ktype __ro_after_init = { +static const struct kobj_type rx_queue_ktype = { .sysfs_ops = &rx_queue_sysfs_ops, .release = rx_queue_release, .default_groups = rx_queue_default_groups, @@ -1662,7 +1662,7 @@ static void netdev_queue_get_ownership(const struct kobject *kobj, net_ns_get_ownership(net, uid, gid); } -static struct kobj_type netdev_queue_ktype __ro_after_init = { +static const struct kobj_type netdev_queue_ktype = { .sysfs_ops = &netdev_queue_sysfs_ops, .release = netdev_queue_release, .default_groups = netdev_queue_default_groups, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 13ea10cf8544..98ebce9f6a51 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1406,14 +1406,18 @@ EXPORT_SYMBOL_GPL(skb_morph); int mm_account_pinned_pages(struct mmpin *mmp, size_t size) { - unsigned long max_pg, num_pg, new_pg, old_pg; + unsigned long max_pg, num_pg, new_pg, old_pg, rlim; struct user_struct *user; if (capable(CAP_IPC_LOCK) || !size) return 0; + rlim = rlimit(RLIMIT_MEMLOCK); + if (rlim == RLIM_INFINITY) + return 0; + num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */ - max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + max_pg = rlim >> PAGE_SHIFT; user = mmp->user ? : current_user(); old_pg = atomic_long_read(&user->locked_vm); diff --git a/net/core/sock.c b/net/core/sock.c index afbb02984d5f..341c565dbc26 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2340,17 +2340,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) smp_wmb(); refcount_set(&newsk->sk_refcnt, 2); - /* Increment the counter in the same struct proto as the master - * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that - * is the same as sk->sk_prot->socks, as this field was copied - * with memcpy). - * - * This _changes_ the previous behaviour, where - * tcp_create_openreq_child always was incrementing the - * equivalent to tcp_prot->socks (inet_sock_nr), so this have - * to be taken into account in all callers. -acme - */ - sk_refcnt_debug_inc(newsk); sk_set_socket(newsk, NULL); sk_tx_queue_clear(newsk); RCU_INIT_POINTER(newsk->sk_wq, NULL); @@ -3710,8 +3699,6 @@ void sk_common_release(struct sock *sk) xfrm_sk_free_policy(sk); - sk_refcnt_debug_release(sk); - sock_put(sk); } EXPORT_SYMBOL(sk_common_release); diff --git a/net/devlink/Makefile b/net/devlink/Makefile index daad4521c61e..ef91a76646a3 100644 --- a/net/devlink/Makefile +++ b/net/devlink/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := leftover.o core.o netlink.o dev.o +obj-y := leftover.o core.o netlink.o dev.o health.o diff --git a/net/devlink/core.c b/net/devlink/core.c index a4f47dafb864..777b091ef74d 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -212,6 +212,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, devlink->dev = dev; devlink->ops = ops; xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); + xa_init_flags(&devlink->params, XA_FLAGS_ALLOC); xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); write_pnet(&devlink->_net, net); INIT_LIST_HEAD(&devlink->rate_list); @@ -219,7 +220,6 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, INIT_LIST_HEAD(&devlink->sb_list); INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list); INIT_LIST_HEAD(&devlink->resource_list); - INIT_LIST_HEAD(&devlink->param_list); INIT_LIST_HEAD(&devlink->region_list); INIT_LIST_HEAD(&devlink->reporter_list); INIT_LIST_HEAD(&devlink->trap_list); @@ -255,7 +255,6 @@ void devlink_free(struct devlink *devlink) WARN_ON(!list_empty(&devlink->trap_list)); WARN_ON(!list_empty(&devlink->reporter_list)); WARN_ON(!list_empty(&devlink->region_list)); - WARN_ON(!list_empty(&devlink->param_list)); WARN_ON(!list_empty(&devlink->resource_list)); WARN_ON(!list_empty(&devlink->dpipe_table_list)); WARN_ON(!list_empty(&devlink->sb_list)); @@ -264,6 +263,7 @@ void devlink_free(struct devlink *devlink) WARN_ON(!xa_empty(&devlink->ports)); xa_destroy(&devlink->snapshot_ids); + xa_destroy(&devlink->params); xa_destroy(&devlink->ports); WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb)); diff --git a/net/devlink/dev.c b/net/devlink/dev.c index 78d824eda5ec..b40153fa2680 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -305,7 +305,7 @@ static struct net *devlink_netns_get(struct sk_buff *skb, struct net *net; if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) { - NL_SET_ERR_MSG_MOD(info->extack, "multiple netns identifying attributes specified"); + NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified"); return ERR_PTR(-EINVAL); } @@ -323,7 +323,7 @@ static struct net *devlink_netns_get(struct sk_buff *skb, net = ERR_PTR(-EINVAL); } if (IS_ERR(net)) { - NL_SET_ERR_MSG_MOD(info->extack, "Unknown network namespace"); + NL_SET_ERR_MSG(info->extack, "Unknown network namespace"); return ERR_PTR(-EINVAL); } if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { @@ -369,6 +369,9 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net, if (dest_net && !net_eq(dest_net, curr_net)) devlink_reload_netns_change(devlink, curr_net, dest_net); + if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + devlink_params_driverinit_load_new(devlink); + err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); if (err) @@ -425,7 +428,7 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) err = devlink_resources_validate(devlink, NULL, info); if (err) { - NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed"); + NL_SET_ERR_MSG(info->extack, "resources size validation failed"); return err; } @@ -435,8 +438,7 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT; if (!devlink_reload_action_is_supported(devlink, action)) { - NL_SET_ERR_MSG_MOD(info->extack, - "Requested reload action is not supported by the driver"); + NL_SET_ERR_MSG(info->extack, "Requested reload action is not supported by the driver"); return -EOPNOTSUPP; } @@ -448,7 +450,7 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]); limits_selected = limits.value & limits.selector; if (!limits_selected) { - NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected"); + NL_SET_ERR_MSG(info->extack, "Invalid limit selected"); return -EINVAL; } for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++) @@ -456,18 +458,15 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) break; /* UAPI enables multiselection, but currently it is not used */ if (limits_selected != BIT(limit)) { - NL_SET_ERR_MSG_MOD(info->extack, - "Multiselection of limit is not supported"); + NL_SET_ERR_MSG(info->extack, "Multiselection of limit is not supported"); return -EOPNOTSUPP; } if (!devlink_reload_limit_is_supported(devlink, limit)) { - NL_SET_ERR_MSG_MOD(info->extack, - "Requested limit is not supported by the driver"); + NL_SET_ERR_MSG(info->extack, "Requested limit is not supported by the driver"); return -EOPNOTSUPP; } if (devlink_reload_combination_is_invalid(action, limit)) { - NL_SET_ERR_MSG_MOD(info->extack, - "Requested limit is invalid for this action"); + NL_SET_ERR_MSG(info->extack, "Requested limit is invalid for this action"); return -EINVAL; } } @@ -477,6 +476,12 @@ int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) dest_net = devlink_netns_get(skb, info); if (IS_ERR(dest_net)) return PTR_ERR(dest_net); + if (!net_eq(dest_net, devlink_net(devlink)) && + action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) { + NL_SET_ERR_MSG_MOD(info->extack, + "Changing namespace is only supported for reinit action"); + return -EOPNOTSUPP; + } } err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack); diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 941174e157d4..e133f423294a 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -29,7 +29,7 @@ struct devlink { struct list_head sb_list; struct list_head dpipe_table_list; struct list_head resource_list; - struct list_head param_list; + struct xarray params; struct list_head region_list; struct list_head reporter_list; struct devlink_dpipe_headers *dpipe_headers; @@ -176,6 +176,8 @@ int devlink_port_netdevice_event(struct notifier_block *nb, struct devlink_port * devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info); +struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink, + struct nlattr **attrs); /* Reload */ bool devlink_reload_actions_valid(const struct devlink_ops *ops); @@ -189,6 +191,9 @@ static inline bool devlink_reload_supported(const struct devlink_ops *ops) return ops->reload_down && ops->reload_up; } +/* Params */ +void devlink_params_driverinit_load_new(struct devlink *devlink); + /* Resources */ struct devlink_resource; int devlink_resources_validate(struct devlink *devlink, @@ -218,3 +223,17 @@ int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info); int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info); +int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, + struct genl_info *info); diff --git a/net/devlink/health.c b/net/devlink/health.c new file mode 100644 index 000000000000..0839706d5741 --- /dev/null +++ b/net/devlink/health.c @@ -0,0 +1,1333 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> + */ + +#include <net/genetlink.h> +#include <net/sock.h> +#include <trace/events/devlink.h> +#include "devl_internal.h" + +struct devlink_fmsg_item { + struct list_head list; + int attrtype; + u8 nla_type; + u16 len; + int value[]; +}; + +struct devlink_fmsg { + struct list_head item_list; + bool putting_binary; /* This flag forces enclosing of binary data + * in an array brackets. It forces using + * of designated API: + * devlink_fmsg_binary_pair_nest_start() + * devlink_fmsg_binary_pair_nest_end() + */ +}; + +static struct devlink_fmsg *devlink_fmsg_alloc(void) +{ + struct devlink_fmsg *fmsg; + + fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL); + if (!fmsg) + return NULL; + + INIT_LIST_HEAD(&fmsg->item_list); + + return fmsg; +} + +static void devlink_fmsg_free(struct devlink_fmsg *fmsg) +{ + struct devlink_fmsg_item *item, *tmp; + + list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) { + list_del(&item->list); + kfree(item); + } + kfree(fmsg); +} + +struct devlink_health_reporter { + struct list_head list; + void *priv; + const struct devlink_health_reporter_ops *ops; + struct devlink *devlink; + struct devlink_port *devlink_port; + struct devlink_fmsg *dump_fmsg; + struct mutex dump_lock; /* lock parallel read/write from dump buffers */ + u64 graceful_period; + bool auto_recover; + bool auto_dump; + u8 health_state; + u64 dump_ts; + u64 dump_real_ts; + u64 error_count; + u64 recovery_count; + u64 last_recovery_ts; +}; + +void * +devlink_health_reporter_priv(struct devlink_health_reporter *reporter) +{ + return reporter->priv; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); + +static struct devlink_health_reporter * +__devlink_health_reporter_find_by_name(struct list_head *reporter_list, + const char *reporter_name) +{ + struct devlink_health_reporter *reporter; + + list_for_each_entry(reporter, reporter_list, list) + if (!strcmp(reporter->ops->name, reporter_name)) + return reporter; + return NULL; +} + +static struct devlink_health_reporter * +devlink_health_reporter_find_by_name(struct devlink *devlink, + const char *reporter_name) +{ + return __devlink_health_reporter_find_by_name(&devlink->reporter_list, + reporter_name); +} + +static struct devlink_health_reporter * +devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port, + const char *reporter_name) +{ + return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list, + reporter_name); +} + +static struct devlink_health_reporter * +__devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + if (WARN_ON(graceful_period && !ops->recover)) + return ERR_PTR(-EINVAL); + + reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); + if (!reporter) + return ERR_PTR(-ENOMEM); + + reporter->priv = priv; + reporter->ops = ops; + reporter->devlink = devlink; + reporter->graceful_period = graceful_period; + reporter->auto_recover = !!ops->recover; + reporter->auto_dump = !!ops->dump; + mutex_init(&reporter->dump_lock); + return reporter; +} + +/** + * devl_port_health_reporter_create() - create devlink health reporter for + * specified port instance + * + * @port: devlink_port to which health reports will relate + * @ops: devlink health reporter ops + * @graceful_period: min time (in msec) between recovery attempts + * @priv: driver priv pointer + */ +struct devlink_health_reporter * +devl_port_health_reporter_create(struct devlink_port *port, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + devl_assert_locked(port->devlink); + + if (__devlink_health_reporter_find_by_name(&port->reporter_list, + ops->name)) + return ERR_PTR(-EEXIST); + + reporter = __devlink_health_reporter_create(port->devlink, ops, + graceful_period, priv); + if (IS_ERR(reporter)) + return reporter; + + reporter->devlink_port = port; + list_add_tail(&reporter->list, &port->reporter_list); + return reporter; +} +EXPORT_SYMBOL_GPL(devl_port_health_reporter_create); + +struct devlink_health_reporter * +devlink_port_health_reporter_create(struct devlink_port *port, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + struct devlink *devlink = port->devlink; + + devl_lock(devlink); + reporter = devl_port_health_reporter_create(port, ops, + graceful_period, priv); + devl_unlock(devlink); + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create); + +/** + * devl_health_reporter_create - create devlink health reporter + * + * @devlink: devlink instance which the health reports will relate + * @ops: devlink health reporter ops + * @graceful_period: min time (in msec) between recovery attempts + * @priv: driver priv pointer + */ +struct devlink_health_reporter * +devl_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + devl_assert_locked(devlink); + + if (devlink_health_reporter_find_by_name(devlink, ops->name)) + return ERR_PTR(-EEXIST); + + reporter = __devlink_health_reporter_create(devlink, ops, + graceful_period, priv); + if (IS_ERR(reporter)) + return reporter; + + list_add_tail(&reporter->list, &devlink->reporter_list); + return reporter; +} +EXPORT_SYMBOL_GPL(devl_health_reporter_create); + +struct devlink_health_reporter * +devlink_health_reporter_create(struct devlink *devlink, + const struct devlink_health_reporter_ops *ops, + u64 graceful_period, void *priv) +{ + struct devlink_health_reporter *reporter; + + devl_lock(devlink); + reporter = devl_health_reporter_create(devlink, ops, + graceful_period, priv); + devl_unlock(devlink); + return reporter; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_create); + +static void +devlink_health_reporter_free(struct devlink_health_reporter *reporter) +{ + mutex_destroy(&reporter->dump_lock); + if (reporter->dump_fmsg) + devlink_fmsg_free(reporter->dump_fmsg); + kfree(reporter); +} + +/** + * devl_health_reporter_destroy() - destroy devlink health reporter + * + * @reporter: devlink health reporter to destroy + */ +void +devl_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + devl_assert_locked(reporter->devlink); + + list_del(&reporter->list); + devlink_health_reporter_free(reporter); +} +EXPORT_SYMBOL_GPL(devl_health_reporter_destroy); + +void +devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) +{ + struct devlink *devlink = reporter->devlink; + + devl_lock(devlink); + devl_health_reporter_destroy(reporter); + devl_unlock(devlink); +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); + +static int +devlink_nl_health_reporter_fill(struct sk_buff *msg, + struct devlink_health_reporter *reporter, + enum devlink_command cmd, u32 portid, + u32 seq, int flags) +{ + struct devlink *devlink = reporter->devlink; + struct nlattr *reporter_attr; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto genlmsg_cancel; + + if (reporter->devlink_port) { + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index)) + goto genlmsg_cancel; + } + reporter_attr = nla_nest_start_noflag(msg, + DEVLINK_ATTR_HEALTH_REPORTER); + if (!reporter_attr) + goto genlmsg_cancel; + if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, + reporter->ops->name)) + goto reporter_nest_cancel; + if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, + reporter->health_state)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, + reporter->error_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, + reporter->recovery_count, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->ops->recover && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, + reporter->graceful_period, + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->ops->recover && + nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, + reporter->auto_recover)) + goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, + jiffies_to_msecs(reporter->dump_ts), + DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->dump_fmsg && + nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, + reporter->dump_real_ts, DEVLINK_ATTR_PAD)) + goto reporter_nest_cancel; + if (reporter->ops->dump && + nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, + reporter->auto_dump)) + goto reporter_nest_cancel; + + nla_nest_end(msg, reporter_attr); + genlmsg_end(msg, hdr); + return 0; + +reporter_nest_cancel: + nla_nest_cancel(msg, reporter_attr); +genlmsg_cancel: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static struct devlink_health_reporter * +devlink_health_reporter_get_from_attrs(struct devlink *devlink, + struct nlattr **attrs) +{ + struct devlink_port *devlink_port; + char *reporter_name; + + if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) + return NULL; + + reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); + devlink_port = devlink_port_get_from_attrs(devlink, attrs); + if (IS_ERR(devlink_port)) + return devlink_health_reporter_find_by_name(devlink, + reporter_name); + else + return devlink_port_health_reporter_find_by_name(devlink_port, + reporter_name); +} + +static struct devlink_health_reporter * +devlink_health_reporter_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + return devlink_health_reporter_get_from_attrs(devlink, info->attrs); +} + +int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + struct sk_buff *msg; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_health_reporter_fill(msg, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + info->snd_portid, info->snd_seq, + 0); + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int +devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg, + struct devlink *devlink, + struct netlink_callback *cb) +{ + struct devlink_nl_dump_state *state = devlink_dump_state(cb); + struct devlink_health_reporter *reporter; + struct devlink_port *port; + unsigned long port_index; + int idx = 0; + int err; + + list_for_each_entry(reporter, &devlink->reporter_list, list) { + if (idx < state->idx) { + idx++; + continue; + } + err = devlink_nl_health_reporter_fill(msg, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + state->idx = idx; + return err; + } + idx++; + } + xa_for_each(&devlink->ports, port_index, port) { + list_for_each_entry(reporter, &port->reporter_list, list) { + if (idx < state->idx) { + idx++; + continue; + } + err = devlink_nl_health_reporter_fill(msg, reporter, + DEVLINK_CMD_HEALTH_REPORTER_GET, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + state->idx = idx; + return err; + } + idx++; + } + } + + return 0; +} + +const struct devlink_cmd devl_cmd_health_reporter_get = { + .dump_one = devlink_nl_cmd_health_reporter_get_dump_one, +}; + +int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->recover && + (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) + return -EOPNOTSUPP; + + if (!reporter->ops->dump && + info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) + return -EOPNOTSUPP; + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) + reporter->graceful_period = + nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]); + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]) + reporter->auto_recover = + nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); + + if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) + reporter->auto_dump = + nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]); + + return 0; +} + +static void devlink_recover_notify(struct devlink_health_reporter *reporter, + enum devlink_command cmd) +{ + struct devlink *devlink = reporter->devlink; + struct sk_buff *msg; + int err; + + WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); + WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0); + if (err) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, + 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); +} + +void +devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter) +{ + reporter->recovery_count++; + reporter->last_recovery_ts = jiffies; +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done); + +static int +devlink_health_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx, struct netlink_ext_ack *extack) +{ + int err; + + if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY) + return 0; + + if (!reporter->ops->recover) + return -EOPNOTSUPP; + + err = reporter->ops->recover(reporter, priv_ctx, extack); + if (err) + return err; + + devlink_health_reporter_recovery_done(reporter); + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); + + return 0; +} + +static void +devlink_health_dump_clear(struct devlink_health_reporter *reporter) +{ + if (!reporter->dump_fmsg) + return; + devlink_fmsg_free(reporter->dump_fmsg); + reporter->dump_fmsg = NULL; +} + +static int devlink_health_do_dump(struct devlink_health_reporter *reporter, + void *priv_ctx, + struct netlink_ext_ack *extack) +{ + int err; + + if (!reporter->ops->dump) + return 0; + + if (reporter->dump_fmsg) + return 0; + + reporter->dump_fmsg = devlink_fmsg_alloc(); + if (!reporter->dump_fmsg) { + err = -ENOMEM; + return err; + } + + err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg); + if (err) + goto dump_err; + + err = reporter->ops->dump(reporter, reporter->dump_fmsg, + priv_ctx, extack); + if (err) + goto dump_err; + + err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg); + if (err) + goto dump_err; + + reporter->dump_ts = jiffies; + reporter->dump_real_ts = ktime_get_real_ns(); + + return 0; + +dump_err: + devlink_health_dump_clear(reporter); + return err; +} + +int devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + enum devlink_health_reporter_state prev_health_state; + struct devlink *devlink = reporter->devlink; + unsigned long recover_ts_threshold; + int ret; + + /* write a log message of the current error */ + WARN_ON(!msg); + trace_devlink_health_report(devlink, reporter->ops->name, msg); + reporter->error_count++; + prev_health_state = reporter->health_state; + reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); + + /* abort if the previous error wasn't recovered */ + recover_ts_threshold = reporter->last_recovery_ts + + msecs_to_jiffies(reporter->graceful_period); + if (reporter->auto_recover && + (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || + (reporter->last_recovery_ts && reporter->recovery_count && + time_is_after_jiffies(recover_ts_threshold)))) { + trace_devlink_health_recover_aborted(devlink, + reporter->ops->name, + reporter->health_state, + jiffies - + reporter->last_recovery_ts); + return -ECANCELED; + } + + if (reporter->auto_dump) { + mutex_lock(&reporter->dump_lock); + /* store current dump of current error, for later analysis */ + devlink_health_do_dump(reporter, priv_ctx, NULL); + mutex_unlock(&reporter->dump_lock); + } + + if (!reporter->auto_recover) + return 0; + + devl_lock(devlink); + ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL); + devl_unlock(devlink); + + return ret; +} +EXPORT_SYMBOL_GPL(devlink_health_report); + +void +devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, + enum devlink_health_reporter_state state) +{ + if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY && + state != DEVLINK_HEALTH_REPORTER_STATE_ERROR)) + return; + + if (reporter->health_state == state) + return; + + reporter->health_state = state; + trace_devlink_health_reporter_state_update(reporter->devlink, + reporter->ops->name, state); + devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); +} +EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); + +int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + return devlink_health_reporter_recover(reporter, NULL, info->extack); +} + +static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, + int attrtype) +{ + struct devlink_fmsg_item *item; + + item = kzalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->attrtype = attrtype; + list_add_tail(&item->list, &fmsg->item_list); + + return 0; +} + +int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) +{ + if (fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start); + +static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) +{ + if (fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); +} + +int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) +{ + if (fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end); + +#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN) + +static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) +{ + struct devlink_fmsg_item *item; + + if (fmsg->putting_binary) + return -EINVAL; + + if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) + return -EMSGSIZE; + + item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->nla_type = NLA_NUL_STRING; + item->len = strlen(name) + 1; + item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME; + memcpy(&item->value, name, item->len); + list_add_tail(&item->list, &fmsg->item_list); + + return 0; +} + +int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) +{ + int err; + + if (fmsg->putting_binary) + return -EINVAL; + + err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); + if (err) + return err; + + err = devlink_fmsg_put_name(fmsg, name); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start); + +int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) +{ + if (fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end); + +int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) +{ + int err; + + if (fmsg->putting_binary) + return -EINVAL; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start); + +int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) +{ + int err; + + if (fmsg->putting_binary) + return -EINVAL; + + err = devlink_fmsg_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end); + +int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, + const char *name) +{ + int err; + + err = devlink_fmsg_arr_pair_nest_start(fmsg, name); + if (err) + return err; + + fmsg->putting_binary = true; + return err; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start); + +int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg) +{ + if (!fmsg->putting_binary) + return -EINVAL; + + fmsg->putting_binary = false; + return devlink_fmsg_arr_pair_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end); + +static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, + const void *value, u16 value_len, + u8 value_nla_type) +{ + struct devlink_fmsg_item *item; + + if (value_len > DEVLINK_FMSG_MAX_SIZE) + return -EMSGSIZE; + + item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL); + if (!item) + return -ENOMEM; + + item->nla_type = value_nla_type; + item->len = value_len; + item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; + memcpy(&item->value, value, item->len); + list_add_tail(&item->list, &fmsg->item_list); + + return 0; +} + +static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) +{ + if (fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); +} + +static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) +{ + if (fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); +} + +int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) +{ + if (fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); + +static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) +{ + if (fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); +} + +int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) +{ + if (fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, + NLA_NUL_STRING); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_string_put); + +int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, + u16 value_len) +{ + if (!fmsg->putting_binary) + return -EINVAL; + + return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put); + +int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, + bool value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_bool_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put); + +int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, + u8 value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_u8_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put); + +int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, + u32 value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_u32_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put); + +int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, + u64 value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_u64_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put); + +int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, + const char *value) +{ + int err; + + err = devlink_fmsg_pair_nest_start(fmsg, name); + if (err) + return err; + + err = devlink_fmsg_string_put(fmsg, value); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put); + +int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, + const void *value, u32 value_len) +{ + u32 data_size; + int end_err; + u32 offset; + int err; + + err = devlink_fmsg_binary_pair_nest_start(fmsg, name); + if (err) + return err; + + for (offset = 0; offset < value_len; offset += data_size) { + data_size = value_len - offset; + if (data_size > DEVLINK_FMSG_MAX_SIZE) + data_size = DEVLINK_FMSG_MAX_SIZE; + err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); + if (err) + break; + /* Exit from loop with a break (instead of + * return) to make sure putting_binary is turned off in + * devlink_fmsg_binary_pair_nest_end + */ + } + + end_err = devlink_fmsg_binary_pair_nest_end(fmsg); + if (end_err) + err = end_err; + + return err; +} +EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put); + +static int +devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb) +{ + switch (msg->nla_type) { + case NLA_FLAG: + case NLA_U8: + case NLA_U32: + case NLA_U64: + case NLA_NUL_STRING: + case NLA_BINARY: + return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, + msg->nla_type); + default: + return -EINVAL; + } +} + +static int +devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb) +{ + int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; + u8 tmp; + + switch (msg->nla_type) { + case NLA_FLAG: + /* Always provide flag data, regardless of its value */ + tmp = *(bool *)msg->value; + + return nla_put_u8(skb, attrtype, tmp); + case NLA_U8: + return nla_put_u8(skb, attrtype, *(u8 *)msg->value); + case NLA_U32: + return nla_put_u32(skb, attrtype, *(u32 *)msg->value); + case NLA_U64: + return nla_put_u64_64bit(skb, attrtype, *(u64 *)msg->value, + DEVLINK_ATTR_PAD); + case NLA_NUL_STRING: + return nla_put_string(skb, attrtype, (char *)&msg->value); + case NLA_BINARY: + return nla_put(skb, attrtype, msg->len, (void *)&msg->value); + default: + return -EINVAL; + } +} + +static int +devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb, + int *start) +{ + struct devlink_fmsg_item *item; + struct nlattr *fmsg_nlattr; + int err = 0; + int i = 0; + + fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG); + if (!fmsg_nlattr) + return -EMSGSIZE; + + list_for_each_entry(item, &fmsg->item_list, list) { + if (i < *start) { + i++; + continue; + } + + switch (item->attrtype) { + case DEVLINK_ATTR_FMSG_OBJ_NEST_START: + case DEVLINK_ATTR_FMSG_PAIR_NEST_START: + case DEVLINK_ATTR_FMSG_ARR_NEST_START: + case DEVLINK_ATTR_FMSG_NEST_END: + err = nla_put_flag(skb, item->attrtype); + break; + case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA: + err = devlink_fmsg_item_fill_type(item, skb); + if (err) + break; + err = devlink_fmsg_item_fill_data(item, skb); + break; + case DEVLINK_ATTR_FMSG_OBJ_NAME: + err = nla_put_string(skb, item->attrtype, + (char *)&item->value); + break; + default: + err = -EINVAL; + break; + } + if (!err) + *start = ++i; + else + break; + } + + nla_nest_end(skb, fmsg_nlattr); + return err; +} + +static int devlink_fmsg_snd(struct devlink_fmsg *fmsg, + struct genl_info *info, + enum devlink_command cmd, int flags) +{ + struct nlmsghdr *nlh; + struct sk_buff *skb; + bool last = false; + int index = 0; + void *hdr; + int err; + + while (!last) { + int tmp_index = index; + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, + &devlink_nl_family, flags | NLM_F_MULTI, cmd); + if (!hdr) { + err = -EMSGSIZE; + goto nla_put_failure; + } + + err = devlink_fmsg_prepare_skb(fmsg, skb, &index); + if (!err) + last = true; + else if (err != -EMSGSIZE || tmp_index == index) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + err = genlmsg_reply(skb, info); + if (err) + return err; + } + + skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, + NLMSG_DONE, 0, flags | NLM_F_MULTI); + if (!nlh) { + err = -EMSGSIZE; + goto nla_put_failure; + } + + return genlmsg_reply(skb, info); + +nla_put_failure: + nlmsg_free(skb); + return err; +} + +static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb, + struct netlink_callback *cb, + enum devlink_command cmd) +{ + struct devlink_nl_dump_state *state = devlink_dump_state(cb); + int index = state->idx; + int tmp_index = index; + void *hdr; + int err; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd); + if (!hdr) { + err = -EMSGSIZE; + goto nla_put_failure; + } + + err = devlink_fmsg_prepare_skb(fmsg, skb, &index); + if ((err && err != -EMSGSIZE) || tmp_index == index) + goto nla_put_failure; + + state->idx = index; + genlmsg_end(skb, hdr); + return skb->len; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return err; +} + +int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + struct devlink_fmsg *fmsg; + int err; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->diagnose) + return -EOPNOTSUPP; + + fmsg = devlink_fmsg_alloc(); + if (!fmsg) + return -ENOMEM; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + goto out; + + err = reporter->ops->diagnose(reporter, fmsg, info->extack); + if (err) + goto out; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + goto out; + + err = devlink_fmsg_snd(fmsg, info, + DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0); + +out: + devlink_fmsg_free(fmsg); + return err; +} + +static struct devlink_health_reporter * +devlink_health_reporter_get_from_cb(struct netlink_callback *cb) +{ + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct devlink_health_reporter *reporter; + struct nlattr **attrs = info->attrs; + struct devlink *devlink; + + devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); + if (IS_ERR(devlink)) + return NULL; + devl_unlock(devlink); + + reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); + devlink_put(devlink); + return reporter; +} + +int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct devlink_nl_dump_state *state = devlink_dump_state(cb); + struct devlink_health_reporter *reporter; + int err; + + reporter = devlink_health_reporter_get_from_cb(cb); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->dump) + return -EOPNOTSUPP; + + mutex_lock(&reporter->dump_lock); + if (!state->idx) { + err = devlink_health_do_dump(reporter, NULL, cb->extack); + if (err) + goto unlock; + state->dump_ts = reporter->dump_ts; + } + if (!reporter->dump_fmsg || state->dump_ts != reporter->dump_ts) { + NL_SET_ERR_MSG(cb->extack, "Dump trampled, please retry"); + err = -EAGAIN; + goto unlock; + } + + err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb, + DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET); +unlock: + mutex_unlock(&reporter->dump_lock); + return err; +} + +int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->dump) + return -EOPNOTSUPP; + + mutex_lock(&reporter->dump_lock); + devlink_health_dump_clear(reporter); + mutex_unlock(&reporter->dump_lock); + return 0; +} + +int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + struct devlink_health_reporter *reporter; + + reporter = devlink_health_reporter_get_from_info(devlink, info); + if (!reporter) + return -EINVAL; + + if (!reporter->ops->test) + return -EOPNOTSUPP; + + return reporter->ops->test(reporter, info->extack); +} diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index f05ab093d231..dffca2f9bfa7 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -156,8 +156,8 @@ static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, return xa_load(&devlink->ports, port_index); } -static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink, - struct nlattr **attrs) +struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink, + struct nlattr **attrs) { if (attrs[DEVLINK_ATTR_PORT_INDEX]) { u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); @@ -810,13 +810,12 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops, } if (!devlink_port_fn_state_valid(state)) { WARN_ON_ONCE(1); - NL_SET_ERR_MSG_MOD(extack, "Invalid state read from driver"); + NL_SET_ERR_MSG(extack, "Invalid state read from driver"); return -EINVAL; } if (!devlink_port_fn_opstate_valid(opstate)) { WARN_ON_ONCE(1); - NL_SET_ERR_MSG_MOD(extack, - "Invalid operational state read from driver"); + NL_SET_ERR_MSG(extack, "Invalid operational state read from driver"); return -EINVAL; } if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) || @@ -1111,24 +1110,18 @@ devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_port *devlink_port; unsigned long port_index; - int idx = 0; int err = 0; - xa_for_each(&devlink->ports, port_index, devlink_port) { - if (idx < state->idx) { - idx++; - continue; - } + xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) { err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); if (err) { - state->idx = idx; + state->idx = port_index; break; } - idx++; } return err; @@ -1171,16 +1164,16 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port, hw_addr = nla_data(attr); hw_addr_len = nla_len(attr); if (hw_addr_len > MAX_ADDR_LEN) { - NL_SET_ERR_MSG_MOD(extack, "Port function hardware address too long"); + NL_SET_ERR_MSG(extack, "Port function hardware address too long"); return -EINVAL; } if (port->type == DEVLINK_PORT_TYPE_ETH) { if (hw_addr_len != ETH_ALEN) { - NL_SET_ERR_MSG_MOD(extack, "Address must be 6 bytes for Ethernet device"); + NL_SET_ERR_MSG(extack, "Address must be 6 bytes for Ethernet device"); return -EINVAL; } if (!is_unicast_ether_addr(hw_addr)) { - NL_SET_ERR_MSG_MOD(extack, "Non-unicast hardware address unsupported"); + NL_SET_ERR_MSG(extack, "Non-unicast hardware address unsupported"); return -EINVAL; } } @@ -1256,7 +1249,7 @@ static int devlink_port_function_set(struct devlink_port *port, err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr, devlink_function_nl_policy, extack); if (err < 0) { - NL_SET_ERR_MSG_MOD(extack, "Fail to parse port function attributes"); + NL_SET_ERR_MSG(extack, "Fail to parse port function attributes"); return err; } @@ -1335,14 +1328,14 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, if (!devlink_port->attrs.splittable) { /* Split ports cannot be split. */ if (devlink_port->attrs.split) - NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split further"); + NL_SET_ERR_MSG(info->extack, "Port cannot be split further"); else - NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split"); + NL_SET_ERR_MSG(info->extack, "Port cannot be split"); return -EINVAL; } if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) { - NL_SET_ERR_MSG_MOD(info->extack, "Invalid split count"); + NL_SET_ERR_MSG(info->extack, "Invalid split count"); return -EINVAL; } @@ -1406,7 +1399,7 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] || !info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) { - NL_SET_ERR_MSG_MOD(extack, "Port flavour or PCI PF are not specified"); + NL_SET_ERR_MSG(extack, "Port flavour or PCI PF are not specified"); return -EINVAL; } new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]); @@ -1454,7 +1447,7 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, return -EOPNOTSUPP; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_INDEX)) { - NL_SET_ERR_MSG_MOD(extack, "Port index is not specified"); + NL_SET_ERR_MSG(extack, "Port index is not specified"); return -EINVAL; } port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); @@ -1496,13 +1489,13 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate, return -ENODEV; if (parent == devlink_rate) { - NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed"); + NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed"); return -EINVAL; } if (devlink_rate_is_node(devlink_rate) && devlink_rate_is_parent_node(devlink_rate, parent->parent)) { - NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node."); + NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node."); return -EEXIST; } @@ -1611,16 +1604,16 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, if (type == DEVLINK_RATE_TYPE_LEAF) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) { - NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs"); + NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) { - NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs"); + NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs"); return false; } if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && !ops->rate_leaf_parent_set) { - NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs"); + NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) { @@ -1637,16 +1630,16 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops, } } else if (type == DEVLINK_RATE_TYPE_NODE) { if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) { - NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes"); + NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) { - NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes"); + NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes"); return false; } if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] && !ops->rate_node_parent_set) { - NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes"); + NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes"); return false; } if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) { @@ -1697,7 +1690,7 @@ static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb, ops = devlink->ops; if (!ops || !ops->rate_node_new || !ops->rate_node_del) { - NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported"); + NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported"); return -EOPNOTSUPP; } @@ -1753,7 +1746,7 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb, int err; if (refcount_read(&rate_node->refcnt) > 1) { - NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node."); + NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node."); return -EBUSY; } @@ -1941,26 +1934,26 @@ static int devlink_linecard_type_set(struct devlink_linecard *linecard, mutex_lock(&linecard->state_lock); if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) { - NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned"); + NL_SET_ERR_MSG(extack, "Line card is currently being provisioned"); err = -EBUSY; goto out; } if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) { - NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned"); + NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned"); err = -EBUSY; goto out; } linecard_type = devlink_linecard_type_lookup(linecard, type); if (!linecard_type) { - NL_SET_ERR_MSG_MOD(extack, "Unsupported line card type provided"); + NL_SET_ERR_MSG(extack, "Unsupported line card type provided"); err = -EINVAL; goto out; } if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED && linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) { - NL_SET_ERR_MSG_MOD(extack, "Line card already provisioned"); + NL_SET_ERR_MSG(extack, "Line card already provisioned"); err = -EBUSY; /* Check if the line card is provisioned in the same * way the user asks. In case it is, make the operation @@ -2004,12 +1997,12 @@ static int devlink_linecard_type_unset(struct devlink_linecard *linecard, mutex_lock(&linecard->state_lock); if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) { - NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned"); + NL_SET_ERR_MSG(extack, "Line card is currently being provisioned"); err = -EBUSY; goto out; } if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) { - NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned"); + NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned"); err = -EBUSY; goto out; } @@ -2022,7 +2015,7 @@ static int devlink_linecard_type_unset(struct devlink_linecard *linecard, } if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) { - NL_SET_ERR_MSG_MOD(extack, "Line card is not provisioned"); + NL_SET_ERR_MSG(extack, "Line card is not provisioned"); err = 0; goto out; } @@ -2846,7 +2839,7 @@ int devlink_rate_nodes_check(struct devlink *devlink, u16 mode, list_for_each_entry(devlink_rate, &devlink->rate_list, list) if (devlink_rate_is_node(devlink_rate)) { - NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists."); + NL_SET_ERR_MSG(extack, "Rate node(s) exists."); return -EBUSY; } return 0; @@ -3612,18 +3605,18 @@ devlink_resource_validate_size(struct devlink_resource *resource, u64 size, int err = 0; if (size > resource->size_params.size_max) { - NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum"); + NL_SET_ERR_MSG(extack, "Size larger than maximum"); err = -EINVAL; } if (size < resource->size_params.size_min) { - NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum"); + NL_SET_ERR_MSG(extack, "Size smaller than minimum"); err = -EINVAL; } div64_u64_rem(size, resource->size_params.size_granularity, &reminder); if (reminder) { - NL_SET_ERR_MSG_MOD(extack, "Wrong granularity"); + NL_SET_ERR_MSG(extack, "Wrong granularity"); err = -EINVAL; } @@ -3960,26 +3953,22 @@ static int devlink_param_driver_verify(const struct devlink_param *param) } static struct devlink_param_item * -devlink_param_find_by_name(struct list_head *param_list, - const char *param_name) +devlink_param_find_by_name(struct xarray *params, const char *param_name) { struct devlink_param_item *param_item; + unsigned long param_id; - list_for_each_entry(param_item, param_list, list) + xa_for_each(params, param_id, param_item) { if (!strcmp(param_item->param->name, param_name)) return param_item; + } return NULL; } static struct devlink_param_item * -devlink_param_find_by_id(struct list_head *param_list, u32 param_id) +devlink_param_find_by_id(struct xarray *params, u32 param_id) { - struct devlink_param_item *param_item; - - list_for_each_entry(param_item, param_list, list) - if (param_item->param->id == param_id) - return param_item; - return NULL; + return xa_load(params, param_id); } static bool @@ -4098,9 +4087,12 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, if (!devlink_param_cmode_is_supported(param, i)) continue; if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) { - if (!param_item->driverinit_value_valid) + if (param_item->driverinit_value_new_valid) + param_value[i] = param_item->driverinit_value_new; + else if (param_item->driverinit_value_valid) + param_value[i] = param_item->driverinit_value; + else return -EOPNOTSUPP; - param_value[i] = param_item->driverinit_value; } else { ctx.cmode = i; err = devlink_param_get(devlink, param, &ctx); @@ -4205,14 +4197,10 @@ devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink, { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_param_item *param_item; - int idx = 0; + unsigned long param_id; int err = 0; - list_for_each_entry(param_item, &devlink->param_list, list) { - if (idx < state->idx) { - idx++; - continue; - } + xa_for_each_start(&devlink->params, param_id, param_item, state->idx) { err = devlink_nl_param_fill(msg, devlink, 0, param_item, DEVLINK_CMD_PARAM_GET, NETLINK_CB(cb->skb).portid, @@ -4221,10 +4209,9 @@ devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink, if (err == -EOPNOTSUPP) { err = 0; } else if (err) { - state->idx = idx; + state->idx = param_id; break; } - idx++; } return err; @@ -4310,8 +4297,7 @@ devlink_param_value_get_from_info(const struct devlink_param *param, } static struct devlink_param_item * -devlink_param_get_from_info(struct list_head *param_list, - struct genl_info *info) +devlink_param_get_from_info(struct xarray *params, struct genl_info *info) { char *param_name; @@ -4319,7 +4305,7 @@ devlink_param_get_from_info(struct list_head *param_list, return NULL; param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]); - return devlink_param_find_by_name(param_list, param_name); + return devlink_param_find_by_name(params, param_name); } static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, @@ -4330,7 +4316,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, struct sk_buff *msg; int err; - param_item = devlink_param_get_from_info(&devlink->param_list, info); + param_item = devlink_param_get_from_info(&devlink->params, info); if (!param_item) return -EINVAL; @@ -4351,7 +4337,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb, static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, unsigned int port_index, - struct list_head *param_list, + struct xarray *params, struct genl_info *info, enum devlink_command cmd) { @@ -4363,7 +4349,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, union devlink_param_value value; int err = 0; - param_item = devlink_param_get_from_info(param_list, info); + param_item = devlink_param_get_from_info(params, info); if (!param_item) return -EINVAL; param = param_item->param; @@ -4388,11 +4374,8 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, return -EOPNOTSUPP; if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) { - if (param->type == DEVLINK_PARAM_TYPE_STRING) - strcpy(param_item->driverinit_value.vstr, value.vstr); - else - param_item->driverinit_value = value; - param_item->driverinit_value_valid = true; + param_item->driverinit_value_new = value; + param_item->driverinit_value_new_valid = true; } else { if (!param->set) return -EOPNOTSUPP; @@ -4412,28 +4395,28 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb, { struct devlink *devlink = info->user_ptr[0]; - return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->param_list, + return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params, info, DEVLINK_CMD_PARAM_NEW); } static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { - NL_SET_ERR_MSG_MOD(cb->extack, "Port params are not supported"); + NL_SET_ERR_MSG(cb->extack, "Port params are not supported"); return msg->len; } static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb, struct genl_info *info) { - NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported"); + NL_SET_ERR_MSG(info->extack, "Port params are not supported"); return -EINVAL; } static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb, struct genl_info *info) { - NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported"); + NL_SET_ERR_MSG(info->extack, "Port params are not supported"); return -EINVAL; } @@ -5002,7 +4985,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) int err; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) { - NL_SET_ERR_MSG_MOD(info->extack, "No region name provided"); + NL_SET_ERR_MSG(info->extack, "No region name provided"); return -EINVAL; } @@ -5022,19 +5005,19 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) region = devlink_region_get_by_name(devlink, region_name); if (!region) { - NL_SET_ERR_MSG_MOD(info->extack, "The requested region does not exist"); + NL_SET_ERR_MSG(info->extack, "The requested region does not exist"); return -EINVAL; } if (!region->ops->snapshot) { - NL_SET_ERR_MSG_MOD(info->extack, "The requested region does not support taking an immediate snapshot"); + NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot"); return -EOPNOTSUPP; } mutex_lock(®ion->snapshot_lock); if (region->cur_snapshots == region->max_snapshots) { - NL_SET_ERR_MSG_MOD(info->extack, "The region has reached the maximum number of stored snapshots"); + NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots"); err = -ENOSPC; goto unlock; } @@ -5044,7 +5027,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) snapshot_id = nla_get_u32(snapshot_id_attr); if (devlink_region_snapshot_get_by_id(region, snapshot_id)) { - NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use"); + NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use"); err = -EEXIST; goto unlock; } @@ -5055,7 +5038,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info) } else { err = __devlink_region_snapshot_id_get(devlink, &snapshot_id); if (err) { - NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id"); + NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id"); goto unlock; } } @@ -5389,1332 +5372,6 @@ out_unlock: return err; } -struct devlink_fmsg_item { - struct list_head list; - int attrtype; - u8 nla_type; - u16 len; - int value[]; -}; - -struct devlink_fmsg { - struct list_head item_list; - bool putting_binary; /* This flag forces enclosing of binary data - * in an array brackets. It forces using - * of designated API: - * devlink_fmsg_binary_pair_nest_start() - * devlink_fmsg_binary_pair_nest_end() - */ -}; - -static struct devlink_fmsg *devlink_fmsg_alloc(void) -{ - struct devlink_fmsg *fmsg; - - fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL); - if (!fmsg) - return NULL; - - INIT_LIST_HEAD(&fmsg->item_list); - - return fmsg; -} - -static void devlink_fmsg_free(struct devlink_fmsg *fmsg) -{ - struct devlink_fmsg_item *item, *tmp; - - list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) { - list_del(&item->list); - kfree(item); - } - kfree(fmsg); -} - -static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg, - int attrtype) -{ - struct devlink_fmsg_item *item; - - item = kzalloc(sizeof(*item), GFP_KERNEL); - if (!item) - return -ENOMEM; - - item->attrtype = attrtype; - list_add_tail(&item->list, &fmsg->item_list); - - return 0; -} - -int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg) -{ - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START); -} -EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start); - -static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg) -{ - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END); -} - -int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg) -{ - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_nest_end(fmsg); -} -EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end); - -#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN) - -static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name) -{ - struct devlink_fmsg_item *item; - - if (fmsg->putting_binary) - return -EINVAL; - - if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE) - return -EMSGSIZE; - - item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL); - if (!item) - return -ENOMEM; - - item->nla_type = NLA_NUL_STRING; - item->len = strlen(name) + 1; - item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME; - memcpy(&item->value, name, item->len); - list_add_tail(&item->list, &fmsg->item_list); - - return 0; -} - -int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name) -{ - int err; - - if (fmsg->putting_binary) - return -EINVAL; - - err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START); - if (err) - return err; - - err = devlink_fmsg_put_name(fmsg, name); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start); - -int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg) -{ - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_nest_end(fmsg); -} -EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end); - -int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name) -{ - int err; - - if (fmsg->putting_binary) - return -EINVAL; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start); - -int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg) -{ - int err; - - if (fmsg->putting_binary) - return -EINVAL; - - err = devlink_fmsg_nest_end(fmsg); - if (err) - return err; - - err = devlink_fmsg_nest_end(fmsg); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end); - -int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg, - const char *name) -{ - int err; - - err = devlink_fmsg_arr_pair_nest_start(fmsg, name); - if (err) - return err; - - fmsg->putting_binary = true; - return err; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start); - -int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg) -{ - if (!fmsg->putting_binary) - return -EINVAL; - - fmsg->putting_binary = false; - return devlink_fmsg_arr_pair_nest_end(fmsg); -} -EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end); - -static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg, - const void *value, u16 value_len, - u8 value_nla_type) -{ - struct devlink_fmsg_item *item; - - if (value_len > DEVLINK_FMSG_MAX_SIZE) - return -EMSGSIZE; - - item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL); - if (!item) - return -ENOMEM; - - item->nla_type = value_nla_type; - item->len = value_len; - item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; - memcpy(&item->value, value, item->len); - list_add_tail(&item->list, &fmsg->item_list); - - return 0; -} - -static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value) -{ - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG); -} - -static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value) -{ - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8); -} - -int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value) -{ - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32); -} -EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put); - -static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value) -{ - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64); -} - -int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value) -{ - if (fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1, - NLA_NUL_STRING); -} -EXPORT_SYMBOL_GPL(devlink_fmsg_string_put); - -int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value, - u16 value_len) -{ - if (!fmsg->putting_binary) - return -EINVAL; - - return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY); -} -EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put); - -int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name, - bool value) -{ - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_bool_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put); - -int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name, - u8 value) -{ - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_u8_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put); - -int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name, - u32 value) -{ - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_u32_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put); - -int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name, - u64 value) -{ - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_u64_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put); - -int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name, - const char *value) -{ - int err; - - err = devlink_fmsg_pair_nest_start(fmsg, name); - if (err) - return err; - - err = devlink_fmsg_string_put(fmsg, value); - if (err) - return err; - - err = devlink_fmsg_pair_nest_end(fmsg); - if (err) - return err; - - return 0; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put); - -int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name, - const void *value, u32 value_len) -{ - u32 data_size; - int end_err; - u32 offset; - int err; - - err = devlink_fmsg_binary_pair_nest_start(fmsg, name); - if (err) - return err; - - for (offset = 0; offset < value_len; offset += data_size) { - data_size = value_len - offset; - if (data_size > DEVLINK_FMSG_MAX_SIZE) - data_size = DEVLINK_FMSG_MAX_SIZE; - err = devlink_fmsg_binary_put(fmsg, value + offset, data_size); - if (err) - break; - /* Exit from loop with a break (instead of - * return) to make sure putting_binary is turned off in - * devlink_fmsg_binary_pair_nest_end - */ - } - - end_err = devlink_fmsg_binary_pair_nest_end(fmsg); - if (end_err) - err = end_err; - - return err; -} -EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put); - -static int -devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb) -{ - switch (msg->nla_type) { - case NLA_FLAG: - case NLA_U8: - case NLA_U32: - case NLA_U64: - case NLA_NUL_STRING: - case NLA_BINARY: - return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE, - msg->nla_type); - default: - return -EINVAL; - } -} - -static int -devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb) -{ - int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA; - u8 tmp; - - switch (msg->nla_type) { - case NLA_FLAG: - /* Always provide flag data, regardless of its value */ - tmp = *(bool *) msg->value; - - return nla_put_u8(skb, attrtype, tmp); - case NLA_U8: - return nla_put_u8(skb, attrtype, *(u8 *) msg->value); - case NLA_U32: - return nla_put_u32(skb, attrtype, *(u32 *) msg->value); - case NLA_U64: - return nla_put_u64_64bit(skb, attrtype, *(u64 *) msg->value, - DEVLINK_ATTR_PAD); - case NLA_NUL_STRING: - return nla_put_string(skb, attrtype, (char *) &msg->value); - case NLA_BINARY: - return nla_put(skb, attrtype, msg->len, (void *) &msg->value); - default: - return -EINVAL; - } -} - -static int -devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb, - int *start) -{ - struct devlink_fmsg_item *item; - struct nlattr *fmsg_nlattr; - int err = 0; - int i = 0; - - fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG); - if (!fmsg_nlattr) - return -EMSGSIZE; - - list_for_each_entry(item, &fmsg->item_list, list) { - if (i < *start) { - i++; - continue; - } - - switch (item->attrtype) { - case DEVLINK_ATTR_FMSG_OBJ_NEST_START: - case DEVLINK_ATTR_FMSG_PAIR_NEST_START: - case DEVLINK_ATTR_FMSG_ARR_NEST_START: - case DEVLINK_ATTR_FMSG_NEST_END: - err = nla_put_flag(skb, item->attrtype); - break; - case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA: - err = devlink_fmsg_item_fill_type(item, skb); - if (err) - break; - err = devlink_fmsg_item_fill_data(item, skb); - break; - case DEVLINK_ATTR_FMSG_OBJ_NAME: - err = nla_put_string(skb, item->attrtype, - (char *) &item->value); - break; - default: - err = -EINVAL; - break; - } - if (!err) - *start = ++i; - else - break; - } - - nla_nest_end(skb, fmsg_nlattr); - return err; -} - -static int devlink_fmsg_snd(struct devlink_fmsg *fmsg, - struct genl_info *info, - enum devlink_command cmd, int flags) -{ - struct nlmsghdr *nlh; - struct sk_buff *skb; - bool last = false; - int index = 0; - void *hdr; - int err; - - while (!last) { - int tmp_index = index; - - skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, - &devlink_nl_family, flags | NLM_F_MULTI, cmd); - if (!hdr) { - err = -EMSGSIZE; - goto nla_put_failure; - } - - err = devlink_fmsg_prepare_skb(fmsg, skb, &index); - if (!err) - last = true; - else if (err != -EMSGSIZE || tmp_index == index) - goto nla_put_failure; - - genlmsg_end(skb, hdr); - err = genlmsg_reply(skb, info); - if (err) - return err; - } - - skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - return -ENOMEM; - nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq, - NLMSG_DONE, 0, flags | NLM_F_MULTI); - if (!nlh) { - err = -EMSGSIZE; - goto nla_put_failure; - } - - return genlmsg_reply(skb, info); - -nla_put_failure: - nlmsg_free(skb); - return err; -} - -static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb, - struct netlink_callback *cb, - enum devlink_command cmd) -{ - struct devlink_nl_dump_state *state = devlink_dump_state(cb); - int index = state->idx; - int tmp_index = index; - void *hdr; - int err; - - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd); - if (!hdr) { - err = -EMSGSIZE; - goto nla_put_failure; - } - - err = devlink_fmsg_prepare_skb(fmsg, skb, &index); - if ((err && err != -EMSGSIZE) || tmp_index == index) - goto nla_put_failure; - - state->idx = index; - genlmsg_end(skb, hdr); - return skb->len; - -nla_put_failure: - genlmsg_cancel(skb, hdr); - return err; -} - -struct devlink_health_reporter { - struct list_head list; - void *priv; - const struct devlink_health_reporter_ops *ops; - struct devlink *devlink; - struct devlink_port *devlink_port; - struct devlink_fmsg *dump_fmsg; - struct mutex dump_lock; /* lock parallel read/write from dump buffers */ - u64 graceful_period; - bool auto_recover; - bool auto_dump; - u8 health_state; - u64 dump_ts; - u64 dump_real_ts; - u64 error_count; - u64 recovery_count; - u64 last_recovery_ts; -}; - -void * -devlink_health_reporter_priv(struct devlink_health_reporter *reporter) -{ - return reporter->priv; -} -EXPORT_SYMBOL_GPL(devlink_health_reporter_priv); - -static struct devlink_health_reporter * -__devlink_health_reporter_find_by_name(struct list_head *reporter_list, - const char *reporter_name) -{ - struct devlink_health_reporter *reporter; - - list_for_each_entry(reporter, reporter_list, list) - if (!strcmp(reporter->ops->name, reporter_name)) - return reporter; - return NULL; -} - -static struct devlink_health_reporter * -devlink_health_reporter_find_by_name(struct devlink *devlink, - const char *reporter_name) -{ - return __devlink_health_reporter_find_by_name(&devlink->reporter_list, - reporter_name); -} - -static struct devlink_health_reporter * -devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port, - const char *reporter_name) -{ - return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list, - reporter_name); -} - -static struct devlink_health_reporter * -__devlink_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) -{ - struct devlink_health_reporter *reporter; - - if (WARN_ON(graceful_period && !ops->recover)) - return ERR_PTR(-EINVAL); - - reporter = kzalloc(sizeof(*reporter), GFP_KERNEL); - if (!reporter) - return ERR_PTR(-ENOMEM); - - reporter->priv = priv; - reporter->ops = ops; - reporter->devlink = devlink; - reporter->graceful_period = graceful_period; - reporter->auto_recover = !!ops->recover; - reporter->auto_dump = !!ops->dump; - mutex_init(&reporter->dump_lock); - return reporter; -} - -/** - * devl_port_health_reporter_create - create devlink health reporter for - * specified port instance - * - * @port: devlink_port which should contain the new reporter - * @ops: ops - * @graceful_period: to avoid recovery loops, in msecs - * @priv: priv - */ -struct devlink_health_reporter * -devl_port_health_reporter_create(struct devlink_port *port, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) -{ - struct devlink_health_reporter *reporter; - - devl_assert_locked(port->devlink); - - if (__devlink_health_reporter_find_by_name(&port->reporter_list, - ops->name)) - return ERR_PTR(-EEXIST); - - reporter = __devlink_health_reporter_create(port->devlink, ops, - graceful_period, priv); - if (IS_ERR(reporter)) - return reporter; - - reporter->devlink_port = port; - list_add_tail(&reporter->list, &port->reporter_list); - return reporter; -} -EXPORT_SYMBOL_GPL(devl_port_health_reporter_create); - -struct devlink_health_reporter * -devlink_port_health_reporter_create(struct devlink_port *port, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) -{ - struct devlink_health_reporter *reporter; - struct devlink *devlink = port->devlink; - - devl_lock(devlink); - reporter = devl_port_health_reporter_create(port, ops, - graceful_period, priv); - devl_unlock(devlink); - return reporter; -} -EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create); - -/** - * devl_health_reporter_create - create devlink health reporter - * - * @devlink: devlink - * @ops: ops - * @graceful_period: to avoid recovery loops, in msecs - * @priv: priv - */ -struct devlink_health_reporter * -devl_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) -{ - struct devlink_health_reporter *reporter; - - devl_assert_locked(devlink); - - if (devlink_health_reporter_find_by_name(devlink, ops->name)) - return ERR_PTR(-EEXIST); - - reporter = __devlink_health_reporter_create(devlink, ops, - graceful_period, priv); - if (IS_ERR(reporter)) - return reporter; - - list_add_tail(&reporter->list, &devlink->reporter_list); - return reporter; -} -EXPORT_SYMBOL_GPL(devl_health_reporter_create); - -struct devlink_health_reporter * -devlink_health_reporter_create(struct devlink *devlink, - const struct devlink_health_reporter_ops *ops, - u64 graceful_period, void *priv) -{ - struct devlink_health_reporter *reporter; - - devl_lock(devlink); - reporter = devl_health_reporter_create(devlink, ops, - graceful_period, priv); - devl_unlock(devlink); - return reporter; -} -EXPORT_SYMBOL_GPL(devlink_health_reporter_create); - -static void -devlink_health_reporter_free(struct devlink_health_reporter *reporter) -{ - mutex_destroy(&reporter->dump_lock); - if (reporter->dump_fmsg) - devlink_fmsg_free(reporter->dump_fmsg); - kfree(reporter); -} - -/** - * devl_health_reporter_destroy - destroy devlink health reporter - * - * @reporter: devlink health reporter to destroy - */ -void -devl_health_reporter_destroy(struct devlink_health_reporter *reporter) -{ - devl_assert_locked(reporter->devlink); - - list_del(&reporter->list); - devlink_health_reporter_free(reporter); -} -EXPORT_SYMBOL_GPL(devl_health_reporter_destroy); - -void -devlink_health_reporter_destroy(struct devlink_health_reporter *reporter) -{ - struct devlink *devlink = reporter->devlink; - - devl_lock(devlink); - devl_health_reporter_destroy(reporter); - devl_unlock(devlink); -} -EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy); - -static int -devlink_nl_health_reporter_fill(struct sk_buff *msg, - struct devlink_health_reporter *reporter, - enum devlink_command cmd, u32 portid, - u32 seq, int flags) -{ - struct devlink *devlink = reporter->devlink; - struct nlattr *reporter_attr; - void *hdr; - - hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); - if (!hdr) - return -EMSGSIZE; - - if (devlink_nl_put_handle(msg, devlink)) - goto genlmsg_cancel; - - if (reporter->devlink_port) { - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index)) - goto genlmsg_cancel; - } - reporter_attr = nla_nest_start_noflag(msg, - DEVLINK_ATTR_HEALTH_REPORTER); - if (!reporter_attr) - goto genlmsg_cancel; - if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME, - reporter->ops->name)) - goto reporter_nest_cancel; - if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE, - reporter->health_state)) - goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT, - reporter->error_count, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT, - reporter->recovery_count, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->ops->recover && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD, - reporter->graceful_period, - DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->ops->recover && - nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER, - reporter->auto_recover)) - goto reporter_nest_cancel; - if (reporter->dump_fmsg && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS, - jiffies_to_msecs(reporter->dump_ts), - DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->dump_fmsg && - nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, - reporter->dump_real_ts, DEVLINK_ATTR_PAD)) - goto reporter_nest_cancel; - if (reporter->ops->dump && - nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, - reporter->auto_dump)) - goto reporter_nest_cancel; - - nla_nest_end(msg, reporter_attr); - genlmsg_end(msg, hdr); - return 0; - -reporter_nest_cancel: - nla_nest_end(msg, reporter_attr); -genlmsg_cancel: - genlmsg_cancel(msg, hdr); - return -EMSGSIZE; -} - -static void devlink_recover_notify(struct devlink_health_reporter *reporter, - enum devlink_command cmd) -{ - struct devlink *devlink = reporter->devlink; - struct sk_buff *msg; - int err; - - WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER); - WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED)); - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return; - - err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0); - if (err) { - nlmsg_free(msg); - return; - } - - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg, - 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); -} - -void -devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter) -{ - reporter->recovery_count++; - reporter->last_recovery_ts = jiffies; -} -EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done); - -static int -devlink_health_reporter_recover(struct devlink_health_reporter *reporter, - void *priv_ctx, struct netlink_ext_ack *extack) -{ - int err; - - if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY) - return 0; - - if (!reporter->ops->recover) - return -EOPNOTSUPP; - - err = reporter->ops->recover(reporter, priv_ctx, extack); - if (err) - return err; - - devlink_health_reporter_recovery_done(reporter); - reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY; - devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); - - return 0; -} - -static void -devlink_health_dump_clear(struct devlink_health_reporter *reporter) -{ - if (!reporter->dump_fmsg) - return; - devlink_fmsg_free(reporter->dump_fmsg); - reporter->dump_fmsg = NULL; -} - -static int devlink_health_do_dump(struct devlink_health_reporter *reporter, - void *priv_ctx, - struct netlink_ext_ack *extack) -{ - int err; - - if (!reporter->ops->dump) - return 0; - - if (reporter->dump_fmsg) - return 0; - - reporter->dump_fmsg = devlink_fmsg_alloc(); - if (!reporter->dump_fmsg) { - err = -ENOMEM; - return err; - } - - err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg); - if (err) - goto dump_err; - - err = reporter->ops->dump(reporter, reporter->dump_fmsg, - priv_ctx, extack); - if (err) - goto dump_err; - - err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg); - if (err) - goto dump_err; - - reporter->dump_ts = jiffies; - reporter->dump_real_ts = ktime_get_real_ns(); - - return 0; - -dump_err: - devlink_health_dump_clear(reporter); - return err; -} - -int devlink_health_report(struct devlink_health_reporter *reporter, - const char *msg, void *priv_ctx) -{ - enum devlink_health_reporter_state prev_health_state; - struct devlink *devlink = reporter->devlink; - unsigned long recover_ts_threshold; - int ret; - - /* write a log message of the current error */ - WARN_ON(!msg); - trace_devlink_health_report(devlink, reporter->ops->name, msg); - reporter->error_count++; - prev_health_state = reporter->health_state; - reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR; - devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); - - /* abort if the previous error wasn't recovered */ - recover_ts_threshold = reporter->last_recovery_ts + - msecs_to_jiffies(reporter->graceful_period); - if (reporter->auto_recover && - (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || - (reporter->last_recovery_ts && reporter->recovery_count && - time_is_after_jiffies(recover_ts_threshold)))) { - trace_devlink_health_recover_aborted(devlink, - reporter->ops->name, - reporter->health_state, - jiffies - - reporter->last_recovery_ts); - return -ECANCELED; - } - - if (reporter->auto_dump) { - mutex_lock(&reporter->dump_lock); - /* store current dump of current error, for later analysis */ - devlink_health_do_dump(reporter, priv_ctx, NULL); - mutex_unlock(&reporter->dump_lock); - } - - if (!reporter->auto_recover) - return 0; - - devl_lock(devlink); - ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL); - devl_unlock(devlink); - - return ret; -} -EXPORT_SYMBOL_GPL(devlink_health_report); - -static struct devlink_health_reporter * -devlink_health_reporter_get_from_attrs(struct devlink *devlink, - struct nlattr **attrs) -{ - struct devlink_port *devlink_port; - char *reporter_name; - - if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]) - return NULL; - - reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]); - devlink_port = devlink_port_get_from_attrs(devlink, attrs); - if (IS_ERR(devlink_port)) - return devlink_health_reporter_find_by_name(devlink, - reporter_name); - else - return devlink_port_health_reporter_find_by_name(devlink_port, - reporter_name); -} - -static struct devlink_health_reporter * -devlink_health_reporter_get_from_info(struct devlink *devlink, - struct genl_info *info) -{ - return devlink_health_reporter_get_from_attrs(devlink, info->attrs); -} - -static struct devlink_health_reporter * -devlink_health_reporter_get_from_cb(struct netlink_callback *cb) -{ - const struct genl_dumpit_info *info = genl_dumpit_info(cb); - struct devlink_health_reporter *reporter; - struct nlattr **attrs = info->attrs; - struct devlink *devlink; - - devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs); - if (IS_ERR(devlink)) - return NULL; - devl_unlock(devlink); - - reporter = devlink_health_reporter_get_from_attrs(devlink, attrs); - devlink_put(devlink); - return reporter; -} - -void -devlink_health_reporter_state_update(struct devlink_health_reporter *reporter, - enum devlink_health_reporter_state state) -{ - if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY && - state != DEVLINK_HEALTH_REPORTER_STATE_ERROR)) - return; - - if (reporter->health_state == state) - return; - - reporter->health_state = state; - trace_devlink_health_reporter_state_update(reporter->devlink, - reporter->ops->name, state); - devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); -} -EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update); - -static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - struct sk_buff *msg; - int err; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - err = devlink_nl_health_reporter_fill(msg, reporter, - DEVLINK_CMD_HEALTH_REPORTER_GET, - info->snd_portid, info->snd_seq, - 0); - if (err) { - nlmsg_free(msg); - return err; - } - - return genlmsg_reply(msg, info); -} - -static int -devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg, - struct devlink *devlink, - struct netlink_callback *cb) -{ - struct devlink_nl_dump_state *state = devlink_dump_state(cb); - struct devlink_health_reporter *reporter; - struct devlink_port *port; - unsigned long port_index; - int idx = 0; - int err; - - list_for_each_entry(reporter, &devlink->reporter_list, list) { - if (idx < state->idx) { - idx++; - continue; - } - err = devlink_nl_health_reporter_fill(msg, reporter, - DEVLINK_CMD_HEALTH_REPORTER_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err) { - state->idx = idx; - return err; - } - idx++; - } - xa_for_each(&devlink->ports, port_index, port) { - list_for_each_entry(reporter, &port->reporter_list, list) { - if (idx < state->idx) { - idx++; - continue; - } - err = devlink_nl_health_reporter_fill(msg, reporter, - DEVLINK_CMD_HEALTH_REPORTER_GET, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI); - if (err) { - state->idx = idx; - return err; - } - idx++; - } - } - - return 0; -} - -const struct devlink_cmd devl_cmd_health_reporter_get = { - .dump_one = devlink_nl_cmd_health_reporter_get_dump_one, -}; - -static int -devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - if (!reporter->ops->recover && - (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] || - info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) - return -EOPNOTSUPP; - - if (!reporter->ops->dump && - info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) - return -EOPNOTSUPP; - - if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]) - reporter->graceful_period = - nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]); - - if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]) - reporter->auto_recover = - nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]); - - if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) - reporter->auto_dump = - nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]); - - return 0; -} - -static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - return devlink_health_reporter_recover(reporter, NULL, info->extack); -} - -static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - struct devlink_fmsg *fmsg; - int err; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - if (!reporter->ops->diagnose) - return -EOPNOTSUPP; - - fmsg = devlink_fmsg_alloc(); - if (!fmsg) - return -ENOMEM; - - err = devlink_fmsg_obj_nest_start(fmsg); - if (err) - goto out; - - err = reporter->ops->diagnose(reporter, fmsg, info->extack); - if (err) - goto out; - - err = devlink_fmsg_obj_nest_end(fmsg); - if (err) - goto out; - - err = devlink_fmsg_snd(fmsg, info, - DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0); - -out: - devlink_fmsg_free(fmsg); - return err; -} - -static int -devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb, - struct netlink_callback *cb) -{ - struct devlink_nl_dump_state *state = devlink_dump_state(cb); - struct devlink_health_reporter *reporter; - int err; - - reporter = devlink_health_reporter_get_from_cb(cb); - if (!reporter) - return -EINVAL; - - if (!reporter->ops->dump) - return -EOPNOTSUPP; - - mutex_lock(&reporter->dump_lock); - if (!state->idx) { - err = devlink_health_do_dump(reporter, NULL, cb->extack); - if (err) - goto unlock; - state->dump_ts = reporter->dump_ts; - } - if (!reporter->dump_fmsg || state->dump_ts != reporter->dump_ts) { - NL_SET_ERR_MSG_MOD(cb->extack, "Dump trampled, please retry"); - err = -EAGAIN; - goto unlock; - } - - err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb, - DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET); -unlock: - mutex_unlock(&reporter->dump_lock); - return err; -} - -static int -devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - if (!reporter->ops->dump) - return -EOPNOTSUPP; - - mutex_lock(&reporter->dump_lock); - devlink_health_dump_clear(reporter); - mutex_unlock(&reporter->dump_lock); - return 0; -} - -static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb, - struct genl_info *info) -{ - struct devlink *devlink = info->user_ptr[0]; - struct devlink_health_reporter *reporter; - - reporter = devlink_health_reporter_get_from_info(devlink, info); - if (!reporter) - return -EINVAL; - - if (!reporter->ops->test) - return -EOPNOTSUPP; - - return reporter->ops->test(reporter, info->extack); -} - struct devlink_stats { u64_stats_t rx_bytes; u64_stats_t rx_packets; @@ -7025,7 +5682,7 @@ static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb, trap_item = devlink_trap_item_get_from_info(devlink, info); if (!trap_item) { - NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap"); + NL_SET_ERR_MSG(extack, "Device did not register this trap"); return -ENOENT; } @@ -7088,7 +5745,7 @@ static int __devlink_trap_action_set(struct devlink *devlink, if (trap_item->action != trap_action && trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) { - NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping"); + NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping"); return 0; } @@ -7114,7 +5771,7 @@ static int devlink_trap_action_set(struct devlink *devlink, err = devlink_trap_action_get_from_info(info, &trap_action); if (err) { - NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action"); + NL_SET_ERR_MSG(info->extack, "Invalid trap action"); return -EINVAL; } @@ -7134,7 +5791,7 @@ static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, trap_item = devlink_trap_item_get_from_info(devlink, info); if (!trap_item) { - NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap"); + NL_SET_ERR_MSG(extack, "Device did not register this trap"); return -ENOENT; } @@ -7236,7 +5893,7 @@ static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb, group_item = devlink_trap_group_item_get_from_info(devlink, info); if (!group_item) { - NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group"); + NL_SET_ERR_MSG(extack, "Device did not register this trap group"); return -ENOENT; } @@ -7345,7 +6002,7 @@ devlink_trap_group_action_set(struct devlink *devlink, err = devlink_trap_action_get_from_info(info, &trap_action); if (err) { - NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action"); + NL_SET_ERR_MSG(info->extack, "Invalid trap action"); return -EINVAL; } @@ -7379,7 +6036,7 @@ static int devlink_trap_group_set(struct devlink *devlink, policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]); policer_item = devlink_trap_policer_item_lookup(devlink, policer_id); if (policer_id && !policer_item) { - NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer"); + NL_SET_ERR_MSG(extack, "Device did not register this trap policer"); return -ENOENT; } policer = policer_item ? policer_item->policer : NULL; @@ -7408,7 +6065,7 @@ static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb, group_item = devlink_trap_group_item_get_from_info(devlink, info); if (!group_item) { - NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group"); + NL_SET_ERR_MSG(extack, "Device did not register this trap group"); return -ENOENT; } @@ -7425,7 +6082,7 @@ static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb, err_trap_group_set: if (modified) - NL_SET_ERR_MSG_MOD(extack, "Trap group set failed, but some changes were committed already"); + NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already"); return err; } @@ -7530,7 +6187,7 @@ static int devlink_nl_cmd_trap_policer_get_doit(struct sk_buff *skb, policer_item = devlink_trap_policer_item_get_from_info(devlink, info); if (!policer_item) { - NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer"); + NL_SET_ERR_MSG(extack, "Device did not register this trap policer"); return -ENOENT; } @@ -7605,22 +6262,22 @@ devlink_trap_policer_set(struct devlink *devlink, burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]); if (rate < policer_item->policer->min_rate) { - NL_SET_ERR_MSG_MOD(extack, "Policer rate lower than limit"); + NL_SET_ERR_MSG(extack, "Policer rate lower than limit"); return -EINVAL; } if (rate > policer_item->policer->max_rate) { - NL_SET_ERR_MSG_MOD(extack, "Policer rate higher than limit"); + NL_SET_ERR_MSG(extack, "Policer rate higher than limit"); return -EINVAL; } if (burst < policer_item->policer->min_burst) { - NL_SET_ERR_MSG_MOD(extack, "Policer burst size lower than limit"); + NL_SET_ERR_MSG(extack, "Policer burst size lower than limit"); return -EINVAL; } if (burst > policer_item->policer->max_burst) { - NL_SET_ERR_MSG_MOD(extack, "Policer burst size higher than limit"); + NL_SET_ERR_MSG(extack, "Policer burst size higher than limit"); return -EINVAL; } @@ -7650,7 +6307,7 @@ static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb, policer_item = devlink_trap_policer_item_get_from_info(devlink, info); if (!policer_item) { - NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer"); + NL_SET_ERR_MSG(extack, "Device did not register this trap policer"); return -ENOENT; } @@ -8044,6 +6701,7 @@ void devlink_notify_register(struct devlink *devlink) struct devlink_rate *rate_node; struct devlink_region *region; unsigned long port_index; + unsigned long param_id; devlink_notify(devlink, DEVLINK_CMD_NEW); list_for_each_entry(linecard, &devlink->linecard_list, list) @@ -8069,7 +6727,7 @@ void devlink_notify_register(struct devlink *devlink) list_for_each_entry(region, &devlink->region_list, list) devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW); - list_for_each_entry(param_item, &devlink->param_list, list) + xa_for_each(&devlink->params, param_id, param_item) devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); } @@ -8084,8 +6742,9 @@ void devlink_notify_unregister(struct devlink *devlink) struct devlink_rate *rate_node; struct devlink_region *region; unsigned long port_index; + unsigned long param_id; - list_for_each_entry_reverse(param_item, &devlink->param_list, list) + xa_for_each(&devlink->params, param_id, param_item) devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL); @@ -9512,9 +8171,10 @@ static int devlink_param_register(struct devlink *devlink, const struct devlink_param *param) { struct devlink_param_item *param_item; + int err; WARN_ON(devlink_param_verify(param)); - WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name)); + WARN_ON(devlink_param_find_by_name(&devlink->params, param->name)); if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT)) WARN_ON(param->get || param->set); @@ -9527,9 +8187,16 @@ static int devlink_param_register(struct devlink *devlink, param_item->param = param; - list_add_tail(¶m_item->list, &devlink->param_list); + err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL); + if (err) + goto err_xa_insert; + devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); return 0; + +err_xa_insert: + kfree(param_item); + return err; } static void devlink_param_unregister(struct devlink *devlink, @@ -9537,12 +8204,11 @@ static void devlink_param_unregister(struct devlink *devlink, { struct devlink_param_item *param_item; - param_item = - devlink_param_find_by_name(&devlink->param_list, param->name); + param_item = devlink_param_find_by_id(&devlink->params, param->id); if (WARN_ON(!param_item)) return; devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL); - list_del(¶m_item->list); + xa_erase(&devlink->params, param->id); kfree(param_item); } @@ -9630,22 +8296,32 @@ EXPORT_SYMBOL_GPL(devlink_params_unregister); * * @devlink: devlink * @param_id: parameter ID - * @init_val: value of parameter in driverinit configuration mode + * @val: pointer to store the value of parameter in driverinit + * configuration mode * * This function should be used by the driver to get driverinit * configuration for initialization after reload command. + * + * Note that lockless call of this function relies on the + * driver to maintain following basic sane behavior: + * 1) Driver ensures a call to this function cannot race with + * registering/unregistering the parameter with the same parameter ID. + * 2) Driver ensures a call to this function cannot race with + * devl_param_driverinit_value_set() call with the same parameter ID. + * 3) Driver ensures a call to this function cannot race with + * reload operation. + * If the driver is not able to comply, it has to take the devlink->lock + * while calling this. */ int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id, - union devlink_param_value *init_val) + union devlink_param_value *val) { struct devlink_param_item *param_item; - lockdep_assert_held(&devlink->lock); - if (WARN_ON(!devlink_reload_supported(devlink->ops))) return -EOPNOTSUPP; - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); + param_item = devlink_param_find_by_id(&devlink->params, param_id); if (!param_item) return -EINVAL; @@ -9656,10 +8332,7 @@ int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id, DEVLINK_PARAM_CMODE_DRIVERINIT))) return -EOPNOTSUPP; - if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) - strcpy(init_val->vstr, param_item->driverinit_value.vstr); - else - *init_val = param_item->driverinit_value; + *val = param_item->driverinit_value; return 0; } @@ -9682,7 +8355,9 @@ void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id, { struct devlink_param_item *param_item; - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); + devl_assert_locked(devlink); + + param_item = devlink_param_find_by_id(&devlink->params, param_id); if (WARN_ON(!param_item)) return; @@ -9690,16 +8365,29 @@ void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id, DEVLINK_PARAM_CMODE_DRIVERINIT))) return; - if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING) - strcpy(param_item->driverinit_value.vstr, init_val.vstr); - else - param_item->driverinit_value = init_val; + param_item->driverinit_value = init_val; param_item->driverinit_value_valid = true; devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); } EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set); +void devlink_params_driverinit_load_new(struct devlink *devlink) +{ + struct devlink_param_item *param_item; + unsigned long param_id; + + xa_for_each(&devlink->params, param_id, param_item) { + if (!devlink_param_cmode_is_supported(param_item->param, + DEVLINK_PARAM_CMODE_DRIVERINIT) || + !param_item->driverinit_value_new_valid) + continue; + param_item->driverinit_value = param_item->driverinit_value_new; + param_item->driverinit_value_valid = true; + param_item->driverinit_value_new_valid = false; + } +} + /** * devl_param_value_changed - notify devlink on a parameter's value * change. Should be called by the driver @@ -9716,7 +8404,7 @@ void devl_param_value_changed(struct devlink *devlink, u32 param_id) { struct devlink_param_item *param_item; - param_item = devlink_param_find_by_id(&devlink->param_list, param_id); + param_item = devlink_param_find_by_id(&devlink->params, param_id); WARN_ON(!param_item); devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index ae0732460e88..f7b189ed96b2 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -413,7 +413,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_PUSH + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 2a2d3539630c..f358cd57d094 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -56,7 +56,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */ nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */ nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */ - nla_total_size(sizeof(u8))); /* _RINGS_TX_PUSH */ + nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */ + nla_total_size(sizeof(u8))); /* _RINGS_RX_PUSH */ } static int rings_fill_reply(struct sk_buff *skb, @@ -96,7 +97,8 @@ static int rings_fill_reply(struct sk_buff *skb, kr->tcp_data_split))) || (kr->cqe_size && (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) || - nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push)) + nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push) || + nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push)) return -EMSGSIZE; return 0; @@ -114,6 +116,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1), [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), + [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), }; static int @@ -147,6 +150,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, return -EOPNOTSUPP; } + if (tb[ETHTOOL_A_RINGS_RX_PUSH] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_PUSH)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_RX_PUSH], + "setting rx push not supported"); + return -EOPNOTSUPP; + } + return ops->get_ringparam && ops->set_ringparam ? 1 : -EOPNOTSUPP; } @@ -176,6 +187,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod); ethnl_update_u8(&kernel_ringparam.tx_push, tb[ETHTOOL_A_RINGS_TX_PUSH], &mod); + ethnl_update_u8(&kernel_ringparam.rx_push, + tb[ETHTOOL_A_RINGS_RX_PUSH], &mod); if (!mod) return 0; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2c778b013cb0..8db6747f892f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -156,7 +156,6 @@ void inet_sock_destruct(struct sock *sk) kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1)); - sk_refcnt_debug_dec(sk); } EXPORT_SYMBOL(inet_sock_destruct); @@ -357,8 +356,6 @@ lookup_protocol: inet->mc_list = NULL; inet->rcv_tos = 0; - sk_refcnt_debug_inc(sk); - if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7d206a10ad14..eedcf4146d29 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1199,8 +1199,6 @@ void inet_csk_destroy_sock(struct sock *sk) xfrm_sk_free_policy(sk); - sk_refcnt_debug_release(sk); - this_cpu_dec(*sk->sk_prot->orphan_count); sock_put(sk); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index beed32fff484..40052414c7c7 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -77,9 +77,6 @@ void inet_twsk_free(struct inet_timewait_sock *tw) { struct module *owner = tw->tw_prot->owner; twsk_destructor((struct sock *)tw); -#ifdef SOCK_REFCNT_DEBUG - pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw); -#endif kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw); module_put(owner); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 847934763868..38689bedfce7 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -239,16 +239,6 @@ lookup_protocol: inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; - /* - * Increment only the relevant sk_prot->socks debug field, this changes - * the previous behaviour of incrementing both the equivalent to - * answer->prot->socks (inet6_sock_nr) and inet_sock_nr. - * - * This allows better debug granularity as we'll know exactly how many - * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6 - * transport protocol socks. -acme - */ - sk_refcnt_debug_inc(sk); if (inet->inet_num) { /* It assumes that any protocol which allows diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index c9346515e24d..f32bc98155bf 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -813,16 +813,19 @@ out_bh_enable: local_bh_enable(); } -void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) +enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, + u8 code, __be32 info) { struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); const struct inet6_protocol *ipprot; + enum skb_drop_reason reason; int inner_offset; __be16 frag_off; u8 nexthdr; - struct net *net = dev_net(skb->dev); - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr)); + if (reason != SKB_NOT_DROPPED_YET) goto out; seg6_icmp_srh(skb, opt); @@ -832,14 +835,17 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) /* now skip over extension headers */ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if (inner_offset < 0) + if (inner_offset < 0) { + SKB_DR_SET(reason, IPV6_BAD_EXTHDR); goto out; + } } else { inner_offset = sizeof(struct ipv6hdr); } /* Checkin header including 8 bytes of inner protocol header. */ - if (!pskb_may_pull(skb, inner_offset+8)) + reason = pskb_may_pull_reason(skb, inner_offset + 8); + if (reason != SKB_NOT_DROPPED_YET) goto out; /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. @@ -854,10 +860,11 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) ipprot->err_handler(skb, opt, type, code, inner_offset, info); raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); - return; + return SKB_CONSUMED; out: __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + return reason; } /* @@ -953,7 +960,8 @@ static int icmpv6_rcv(struct sk_buff *skb) case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: - icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); + reason = icmpv6_notify(skb, type, hdr->icmp6_code, + hdr->icmp6_mtu); break; case NDISC_ROUTER_SOLICITATION: @@ -961,7 +969,7 @@ static int icmpv6_rcv(struct sk_buff *skb) case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: - ndisc_rcv(skb); + reason = ndisc_rcv(skb); break; case ICMPV6_MGM_QUERY: @@ -995,7 +1003,8 @@ static int icmpv6_rcv(struct sk_buff *skb) * must pass to upper level */ - icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); + reason = icmpv6_notify(skb, type, hdr->icmp6_code, + hdr->icmp6_mtu); } /* until the v6 path can be better sorted assume failure and diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9ce51680290b..2917dd8d198c 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -464,13 +464,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, __ipv6_sock_mc_close(sk); __ipv6_sock_ac_close(sk); - /* - * Sock is moving from IPv6 to IPv4 (sk_prot), so - * remove it from the refcnt debug socks count in the - * original family... - */ - sk_refcnt_debug_dec(sk); - if (sk->sk_protocol == IPPROTO_TCP) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -507,11 +500,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, inet6_cleanup_sock(sk); - /* - * ... and add it to the refcnt debug socks count - * in the new family. -acme - */ - sk_refcnt_debug_inc(sk); module_put(THIS_MODULE); retv = 0; break; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3a553494ff16..9548b5a44714 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1804,15 +1804,16 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) return false; } -int ndisc_rcv(struct sk_buff *skb) +enum skb_drop_reason ndisc_rcv(struct sk_buff *skb) { struct nd_msg *msg; + SKB_DR(reason); if (ndisc_suppress_frag_ndisc(skb)) - return 0; + return SKB_DROP_REASON_IPV6_NDISC_FRAG; if (skb_linearize(skb)) - return 0; + return SKB_DROP_REASON_NOMEM; msg = (struct nd_msg *)skb_transport_header(skb); @@ -1821,13 +1822,13 @@ int ndisc_rcv(struct sk_buff *skb) if (ipv6_hdr(skb)->hop_limit != 255) { ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit); - return 0; + return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT; } if (msg->icmph.icmp6_code != 0) { ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code); - return 0; + return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE; } switch (msg->icmph.icmp6_type) { @@ -1853,7 +1854,7 @@ int ndisc_rcv(struct sk_buff *skb) break; } - return 0; + return reason; } static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 487f8e98deaa..dd433cc265c8 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -109,8 +109,15 @@ struct bpf_lwt_prog { #define next_csid_chk_lcnode_fn_bits(flen) \ next_csid_chk_lcblock_bits(flen) +#define SEG6_F_LOCAL_FLV_OP(flvname) BIT(SEG6_LOCAL_FLV_OP_##flvname) +#define SEG6_F_LOCAL_FLV_PSP SEG6_F_LOCAL_FLV_OP(PSP) + +/* Supported RFC8986 Flavor operations are reported in this bitmask */ +#define SEG6_LOCAL_FLV8986_SUPP_OPS SEG6_F_LOCAL_FLV_PSP + /* Supported Flavor operations are reported in this bitmask */ -#define SEG6_LOCAL_FLV_SUPP_OPS (BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID)) +#define SEG6_LOCAL_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_OP(NEXT_CSID) | \ + SEG6_LOCAL_FLV8986_SUPP_OPS) struct seg6_flavors_info { /* Flavor operations */ @@ -364,6 +371,14 @@ static void seg6_next_csid_advance_arg(struct in6_addr *addr, memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects); } +static int input_action_end_finish(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + seg6_lookup_nexthop(skb, NULL, 0); + + return dst_input(skb); +} + static int input_action_end_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) { @@ -375,9 +390,7 @@ static int input_action_end_core(struct sk_buff *skb, advance_nextseg(srh, &ipv6_hdr(skb)->daddr); - seg6_lookup_nexthop(skb, NULL, 0); - - return dst_input(skb); + return input_action_end_finish(skb, slwt); drop: kfree_skb(skb); @@ -395,9 +408,7 @@ static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) /* update DA */ seg6_next_csid_advance_arg(daddr, finfo); - seg6_lookup_nexthop(skb, NULL, 0); - - return dst_input(skb); + return input_action_end_finish(skb, slwt); } static bool seg6_next_csid_enabled(__u32 fops) @@ -405,15 +416,331 @@ static bool seg6_next_csid_enabled(__u32 fops) return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID); } +/* We describe the packet state in relation to the absence/presence of the SRH + * and the Segment Left (SL) field. + * For our purposes, it is not necessary to record the exact value of the SL + * when the SID List consists of two or more segments. + */ +enum seg6_local_pktinfo { + /* the order really matters! */ + SEG6_LOCAL_PKTINFO_NOHDR = 0, + SEG6_LOCAL_PKTINFO_SL_ZERO, + SEG6_LOCAL_PKTINFO_SL_ONE, + SEG6_LOCAL_PKTINFO_SL_MORE, + __SEG6_LOCAL_PKTINFO_MAX, +}; + +#define SEG6_LOCAL_PKTINFO_MAX (__SEG6_LOCAL_PKTINFO_MAX - 1) + +static enum seg6_local_pktinfo seg6_get_srh_pktinfo(struct ipv6_sr_hdr *srh) +{ + __u8 sgl; + + if (!srh) + return SEG6_LOCAL_PKTINFO_NOHDR; + + sgl = srh->segments_left; + if (sgl < 2) + return SEG6_LOCAL_PKTINFO_SL_ZERO + sgl; + + return SEG6_LOCAL_PKTINFO_SL_MORE; +} + +enum seg6_local_flv_action { + SEG6_LOCAL_FLV_ACT_UNSPEC = 0, + SEG6_LOCAL_FLV_ACT_END, + SEG6_LOCAL_FLV_ACT_PSP, + SEG6_LOCAL_FLV_ACT_USP, + SEG6_LOCAL_FLV_ACT_USD, + __SEG6_LOCAL_FLV_ACT_MAX +}; + +#define SEG6_LOCAL_FLV_ACT_MAX (__SEG6_LOCAL_FLV_ACT_MAX - 1) + +/* The action table for RFC8986 flavors (see the flv8986_act_tbl below) + * contains the actions (i.e. processing operations) to be applied on packets + * when flavors are configured for an End* behavior. + * By combining the pkinfo data and from the flavors mask, the macro + * computes the index used to access the elements (actions) stored in the + * action table. The index is structured as follows: + * + * index + * _______________/\________________ + * / \ + * +----------------+----------------+ + * | pf | afm | + * +----------------+----------------+ + * ph-1 ... p1 p0 fk-1 ... f1 f0 + * MSB LSB + * + * where: + * - 'afm' (adjusted flavor mask) is the mask containing a combination of the + * RFC8986 flavors currently supported. 'afm' corresponds to the @fm + * argument of the macro whose value is righ-shifted by 1 bit. By doing so, + * we discard the SEG6_LOCAL_FLV_OP_UNSPEC flag (bit 0 in @fm) which is + * never used here; + * - 'pf' encodes the packet info (pktinfo) regarding the presence/absence of + * the SRH, SL = 0, etc. 'pf' is set with the value of @pf provided as + * argument to the macro. + */ +#define flv8986_act_tbl_idx(pf, fm) \ + ((((pf) << bits_per(SEG6_LOCAL_FLV8986_SUPP_OPS)) | \ + ((fm) & SEG6_LOCAL_FLV8986_SUPP_OPS)) >> SEG6_LOCAL_FLV_OP_PSP) + +/* We compute the size of the action table by considering the RFC8986 flavors + * actually supported by the kernel. In this way, the size is automatically + * adjusted when new flavors are supported. + */ +#define FLV8986_ACT_TBL_SIZE \ + roundup_pow_of_two(flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_MAX, \ + SEG6_LOCAL_FLV8986_SUPP_OPS)) + +/* tbl_cfg(act, pf, fm) macro is used to easily configure the action + * table; it accepts 3 arguments: + * i) @act, the suffix from SEG6_LOCAL_FLV_ACT_{act} representing + * the action that should be applied on the packet; + * ii) @pf, the suffix from SEG6_LOCAL_PKTINFO_{pf} reporting the packet + * info about the lack/presence of SRH, SRH with SL = 0, etc; + * iii) @fm, the mask of flavors. + */ +#define tbl_cfg(act, pf, fm) \ + [flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_##pf, \ + (fm))] = SEG6_LOCAL_FLV_ACT_##act + +/* shorthand for improving readability */ +#define F_PSP SEG6_F_LOCAL_FLV_PSP + +/* The table contains, for each combination of the pktinfo data and + * flavors, the action that should be taken on a packet (e.g. + * "standard" Endpoint processing, Penultimate Segment Pop, etc). + * + * By default, table entries not explicitly configured are initialized with the + * SEG6_LOCAL_FLV_ACT_UNSPEC action, which generally has the effect of + * discarding the processed packet. + */ +static const u8 flv8986_act_tbl[FLV8986_ACT_TBL_SIZE] = { + /* PSP variant for packet where SRH with SL = 1 */ + tbl_cfg(PSP, SL_ONE, F_PSP), + /* End for packet where the SRH with SL > 1*/ + tbl_cfg(END, SL_MORE, F_PSP), +}; + +#undef F_PSP +#undef tbl_cfg + +/* For each flavor defined in RFC8986 (or a combination of them) an action is + * performed on the packet. The specific action depends on: + * - info extracted from the packet (i.e. pktinfo data) regarding the + * lack/presence of the SRH, and if the SRH is available, on the value of + * Segment Left field; + * - the mask of flavors configured for the specific SRv6 End* behavior. + * + * The function combines both the pkinfo and the flavors mask to evaluate the + * corresponding action to be taken on the packet. + */ +static enum seg6_local_flv_action +seg6_local_flv8986_act_lookup(enum seg6_local_pktinfo pinfo, __u32 flvmask) +{ + unsigned long index; + + /* check if the provided mask of flavors is supported */ + if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS)) + return SEG6_LOCAL_FLV_ACT_UNSPEC; + + index = flv8986_act_tbl_idx(pinfo, flvmask); + if (unlikely(index >= FLV8986_ACT_TBL_SIZE)) + return SEG6_LOCAL_FLV_ACT_UNSPEC; + + return flv8986_act_tbl[index]; +} + +/* skb->data must be aligned with skb->network_header */ +static bool seg6_pop_srh(struct sk_buff *skb, int srhoff) +{ + struct ipv6_sr_hdr *srh; + struct ipv6hdr *iph; + __u8 srh_nexthdr; + int thoff = -1; + int srhlen; + int nhlen; + + if (unlikely(srhoff < sizeof(*iph) || + !pskb_may_pull(skb, srhoff + sizeof(*srh)))) + return false; + + srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); + srhlen = ipv6_optlen(srh); + + /* we are about to mangle the pkt, let's check if we can write on it */ + if (unlikely(skb_ensure_writable(skb, srhoff + srhlen))) + return false; + + /* skb_ensure_writable() may change skb pointers; evaluate srh again */ + srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); + srh_nexthdr = srh->nexthdr; + + if (unlikely(!skb_transport_header_was_set(skb))) + goto pull; + + nhlen = skb_network_header_len(skb); + /* we have to deal with the transport header: it could be set before + * the SRH, after the SRH, or within it (which is considered wrong, + * however). + */ + if (likely(nhlen <= srhoff)) + thoff = nhlen; + else if (nhlen >= srhoff + srhlen) + /* transport_header is set after the SRH */ + thoff = nhlen - srhlen; + else + /* transport_header falls inside the SRH; hence, we can't + * restore the transport_header pointer properly after + * SRH removing operation. + */ + return false; +pull: + /* we need to pop the SRH: + * 1) first of all, we pull out everything from IPv6 header up to SRH + * (included) evaluating also the rcsum; + * 2) we overwrite (and then remove) the SRH by properly moving the + * IPv6 along with any extension header that precedes the SRH; + * 3) At the end, we push back the pulled headers (except for SRH, + * obviously). + */ + skb_pull_rcsum(skb, srhoff + srhlen); + memmove(skb_network_header(skb) + srhlen, skb_network_header(skb), + srhoff); + skb_push(skb, srhoff); + + skb_reset_network_header(skb); + skb_mac_header_rebuild(skb); + if (likely(thoff >= 0)) + skb_set_transport_header(skb, thoff); + + iph = ipv6_hdr(skb); + if (iph->nexthdr == NEXTHDR_ROUTING) { + iph->nexthdr = srh_nexthdr; + } else { + /* we must look for the extension header (EXTH, for short) that + * immediately precedes the SRH we have just removed. + * Then, we update the value of the EXTH nexthdr with the one + * contained in the SRH nexthdr. + */ + unsigned int off = sizeof(*iph); + struct ipv6_opt_hdr *hp, _hdr; + __u8 nexthdr = iph->nexthdr; + + for (;;) { + if (unlikely(!ipv6_ext_hdr(nexthdr) || + nexthdr == NEXTHDR_NONE)) + return false; + + hp = skb_header_pointer(skb, off, sizeof(_hdr), &_hdr); + if (unlikely(!hp)) + return false; + + if (hp->nexthdr == NEXTHDR_ROUTING) { + hp->nexthdr = srh_nexthdr; + break; + } + + switch (nexthdr) { + case NEXTHDR_FRAGMENT: + fallthrough; + case NEXTHDR_AUTH: + /* we expect SRH before FRAG and AUTH */ + return false; + default: + off += ipv6_optlen(hp); + break; + } + + nexthdr = hp->nexthdr; + } + } + + iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + + skb_postpush_rcsum(skb, iph, srhoff); + + return true; +} + +/* process the packet on the basis of the RFC8986 flavors set for the given + * SRv6 End behavior instance. + */ +static int end_flv8986_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + const struct seg6_flavors_info *finfo = &slwt->flv_info; + enum seg6_local_flv_action action; + enum seg6_local_pktinfo pinfo; + struct ipv6_sr_hdr *srh; + __u32 flvmask; + int srhoff; + + srh = seg6_get_srh(skb, 0); + srhoff = srh ? ((unsigned char *)srh - skb->data) : 0; + pinfo = seg6_get_srh_pktinfo(srh); +#ifdef CONFIG_IPV6_SEG6_HMAC + if (srh && !seg6_hmac_validate_skb(skb)) + goto drop; +#endif + flvmask = finfo->flv_ops; + if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS)) { + pr_warn_once("seg6local: invalid RFC8986 flavors\n"); + goto drop; + } + + /* retrieve the action triggered by the combination of pktinfo data and + * the flavors mask. + */ + action = seg6_local_flv8986_act_lookup(pinfo, flvmask); + switch (action) { + case SEG6_LOCAL_FLV_ACT_END: + /* process the packet as the "standard" End behavior */ + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); + break; + case SEG6_LOCAL_FLV_ACT_PSP: + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); + + if (unlikely(!seg6_pop_srh(skb, srhoff))) + goto drop; + break; + case SEG6_LOCAL_FLV_ACT_UNSPEC: + fallthrough; + default: + /* by default, we drop the packet since we could not find a + * suitable action. + */ + goto drop; + } + + return input_action_end_finish(skb, slwt); + +drop: + kfree_skb(skb); + return -EINVAL; +} + /* regular endpoint function */ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) { const struct seg6_flavors_info *finfo = &slwt->flv_info; + __u32 fops = finfo->flv_ops; - if (seg6_next_csid_enabled(finfo->flv_ops)) + if (!fops) + return input_action_end_core(skb, slwt); + + /* check for the presence of NEXT-C-SID since it applies first */ + if (seg6_next_csid_enabled(fops)) return end_next_csid_core(skb, slwt); - return input_action_end_core(skb, slwt); + /* the specific processing function to be performed on the packet + * depends on the combination of flavors defined in RFC8986 and some + * information extracted from the packet, e.g. presence/absence of SRH, + * Segment Left = 0, etc. + */ + return end_flv8986_core(skb, slwt); } /* regular endpoint, and forward to specified nexthop */ @@ -2300,6 +2627,13 @@ int __init seg6_local_init(void) BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS)); BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS)); + /* To be memory efficient, we use 'u8' to represent the different + * actions related to RFC8986 flavors. If the kernel build stops here, + * it means that it is not possible to correctly encode these actions + * with the data type chosen for the action table. + */ + BUILD_BUG_ON(SEG6_LOCAL_FLV_ACT_MAX > (typeof(flv8986_act_tbl[0]))~0U); + return lwtunnel_encap_add_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL); } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c9817aa0f413..3ad9c46202fc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2875,7 +2875,6 @@ static void __mptcp_destroy_sock(struct sock *sk) sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); - sk_refcnt_debug_release(sk); sock_put(sk); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8ffb19c643ab..d4e76e2ae153 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1335,8 +1335,6 @@ static void packet_sock_destruct(struct sock *sk) pr_err("Attempt to release alive packet socket: %p\n", sk); return; } - - sk_refcnt_debug_dec(sk); } static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb) @@ -3174,7 +3172,6 @@ static int packet_release(struct socket *sock) skb_queue_purge(&sk->sk_receive_queue); packet_free_pending(po); - sk_refcnt_debug_release(sk); sock_put(sk); return 0; @@ -3364,7 +3361,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; - sk_refcnt_debug_inc(sk); /* * Attach a protocol block diff --git a/net/rds/message.c b/net/rds/message.c index c19c93561227..7af59d2443e5 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -118,7 +118,7 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, ck = &info->zcookies; memset(ck, 0, sizeof(*ck)); WARN_ON(!rds_zcookie_add(info, cookie)); - list_add_tail(&q->zcookie_head, &info->rs_zcookie_next); + list_add_tail(&info->rs_zcookie_next, &q->zcookie_head); spin_unlock_irqrestore(&q->lock, flags); /* caller invokes rds_wake_sk_sleep() */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 4f7b52f5a11c..4b95cb1ac435 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -45,23 +45,6 @@ if NET_SCHED comment "Queueing/Scheduling" -config NET_SCH_CBQ - tristate "Class Based Queueing (CBQ)" - help - Say Y here if you want to use the Class-Based Queueing (CBQ) packet - scheduling algorithm. This algorithm classifies the waiting packets - into a tree-like hierarchy of classes; the leaves of this tree are - in turn scheduled by separate algorithms. - - See the top of <file:net/sched/sch_cbq.c> for more details. - - CBQ is a commonly used scheduler, so if you're unsure, you should - say Y here. Then say Y to all the queueing algorithms below that you - want to use as leaf disciplines. - - To compile this code as a module, choose M here: the - module will be called sch_cbq. - config NET_SCH_HTB tristate "Hierarchical Token Bucket (HTB)" help @@ -85,20 +68,6 @@ config NET_SCH_HFSC To compile this code as a module, choose M here: the module will be called sch_hfsc. -config NET_SCH_ATM - tristate "ATM Virtual Circuits (ATM)" - depends on ATM - help - Say Y here if you want to use the ATM pseudo-scheduler. This - provides a framework for invoking classifiers, which in turn - select classes of this queuing discipline. Each class maps - the flow(s) it is handling to a given virtual circuit. - - See the top of <file:net/sched/sch_atm.c> for more details. - - To compile this code as a module, choose M here: the - module will be called sch_atm. - config NET_SCH_PRIO tristate "Multi Band Priority Queueing (PRIO)" help @@ -223,17 +192,6 @@ config NET_SCH_GRED To compile this code as a module, choose M here: the module will be called sch_gred. -config NET_SCH_DSMARK - tristate "Differentiated Services marker (DSMARK)" - help - Say Y if you want to schedule packets according to the - Differentiated Services architecture proposed in RFC 2475. - Technical information on this method, with pointers to associated - RFCs, is available at <http://www.gta.ufrj.br/diffserv/>. - - To compile this code as a module, choose M here: the - module will be called sch_dsmark. - config NET_SCH_NETEM tristate "Network emulator (NETEM)" help @@ -510,17 +468,6 @@ config NET_CLS_BASIC To compile this code as a module, choose M here: the module will be called cls_basic. -config NET_CLS_TCINDEX - tristate "Traffic-Control Index (TCINDEX)" - select NET_CLS - help - Say Y here if you want to be able to classify packets based on - traffic control indices. You will want this feature if you want - to implement Differentiated Services together with DSMARK. - - To compile this code as a module, choose M here: the - module will be called cls_tcindex. - config NET_CLS_ROUTE4 tristate "Routing decision (ROUTE)" depends on INET @@ -566,34 +513,6 @@ config CLS_U32_MARK help Say Y here to be able to use netfilter marks as u32 key. -config NET_CLS_RSVP - tristate "IPv4 Resource Reservation Protocol (RSVP)" - select NET_CLS - help - The Resource Reservation Protocol (RSVP) permits end systems to - request a minimum and maximum data flow rate for a connection; this - is important for real time data such as streaming sound or video. - - Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests. - - To compile this code as a module, choose M here: the - module will be called cls_rsvp. - -config NET_CLS_RSVP6 - tristate "IPv6 Resource Reservation Protocol (RSVP6)" - select NET_CLS - help - The Resource Reservation Protocol (RSVP) permits end systems to - request a minimum and maximum data flow rate for a connection; this - is important for real time data such as streaming sound or video. - - Say Y here if you want to be able to classify outgoing packets based - on their RSVP requests and you are using the IPv6 protocol. - - To compile this code as a module, choose M here: the - module will be called cls_rsvp6. - config NET_CLS_FLOW tristate "Flow classifier" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index 7911eec09837..b5fd49641d91 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -33,20 +33,17 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_ACT_CT) += act_ct.o obj-$(CONFIG_NET_ACT_GATE) += act_gate.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o -obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o obj-$(CONFIG_NET_SCH_RED) += sch_red.o obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o -obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o -obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o @@ -70,9 +67,6 @@ obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o -obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o -obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o -obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cd09ef49df22..eda58b78da13 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -169,11 +169,6 @@ static bool tc_act_skip_sw(u32 flags) return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false; } -static bool tc_act_in_hw(struct tc_action *act) -{ - return !!act->in_hw_count; -} - /* SKIP_HW and SKIP_SW are mutually exclusive flags. */ static bool tc_act_flags_valid(u32 flags) { @@ -192,6 +187,7 @@ static int offload_action_init(struct flow_offload_action *fl_action, fl_action->extack = extack; fl_action->command = cmd; fl_action->index = act->tcfa_index; + fl_action->cookie = (unsigned long)act; if (act->ops->offload_act_setup) { spin_lock_bh(&act->tcfa_lock); @@ -307,9 +303,6 @@ int tcf_action_update_hw_stats(struct tc_action *action) struct flow_offload_action fl_act = {}; int err; - if (!tc_act_in_hw(action)) - return -EOPNOTSUPP; - err = offload_action_init(&fl_act, action, FLOW_ACT_STATS, NULL); if (err) return err; @@ -539,6 +532,8 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, (unsigned long)p->tcfa_tm.lastuse)) continue; + tcf_action_update_hw_stats(p); + nest = nla_nest_start_noflag(skb, n_i); if (!nest) { index--; @@ -1539,9 +1534,6 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, if (p == NULL) goto errout; - /* update hw stats for this action */ - tcf_action_update_hw_stats(p); - /* compat_mode being true specifies a call that is supposed * to add additional backward compatibility statistic TLVs. */ diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 7e63ff7e3ed7..8dabfb52ea3d 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -36,13 +36,15 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb, struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; struct tcf_connmark_info *ca = to_connmark(a); + struct tcf_connmark_parms *parms; struct nf_conntrack_zone zone; struct nf_conn *c; int proto; - spin_lock(&ca->tcf_lock); tcf_lastuse_update(&ca->tcf_tm); - bstats_update(&ca->tcf_bstats, skb); + tcf_action_update_bstats(&ca->common, skb); + + parms = rcu_dereference_bh(ca->parms); switch (skb_protocol(skb, true)) { case htons(ETH_P_IP): @@ -64,31 +66,29 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb, c = nf_ct_get(skb, &ctinfo); if (c) { skb->mark = READ_ONCE(c->mark); - /* using overlimits stats to count how many packets marked */ - ca->tcf_qstats.overlimits++; - goto out; + goto count; } - if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - proto, ca->net, &tuple)) + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, parms->net, + &tuple)) goto out; - zone.id = ca->zone; + zone.id = parms->zone; zone.dir = NF_CT_DEFAULT_ZONE_DIR; - thash = nf_conntrack_find_get(ca->net, &zone, &tuple); + thash = nf_conntrack_find_get(parms->net, &zone, &tuple); if (!thash) goto out; c = nf_ct_tuplehash_to_ctrack(thash); - /* using overlimits stats to count how many packets marked */ - ca->tcf_qstats.overlimits++; skb->mark = READ_ONCE(c->mark); nf_ct_put(c); +count: + /* using overlimits stats to count how many packets marked */ + tcf_action_inc_overlimit_qstats(&ca->common); out: - spin_unlock(&ca->tcf_lock); - return ca->tcf_action; + return READ_ONCE(ca->tcf_action); } static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { @@ -101,6 +101,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); + struct tcf_connmark_parms *nparms, *oparms; struct nlattr *tb[TCA_CONNMARK_MAX + 1]; bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_chain *goto_ch = NULL; @@ -120,52 +121,66 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, if (!tb[TCA_CONNMARK_PARMS]) return -EINVAL; + nparms = kzalloc(sizeof(*nparms), GFP_KERNEL); + if (!nparms) + return -ENOMEM; + parm = nla_data(tb[TCA_CONNMARK_PARMS]); index = parm->index; ret = tcf_idr_check_alloc(tn, &index, a, bind); if (!ret) { - ret = tcf_idr_create(tn, index, est, a, - &act_connmark_ops, bind, false, flags); + ret = tcf_idr_create_from_flags(tn, index, est, a, + &act_connmark_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); - return ret; + err = ret; + goto out_free; } ci = to_connmark(*a); - err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, - extack); - if (err < 0) - goto release_idr; - tcf_action_set_ctrlact(*a, parm->action, goto_ch); - ci->net = net; - ci->zone = parm->zone; + + nparms->net = net; + nparms->zone = parm->zone; ret = ACT_P_CREATED; } else if (ret > 0) { ci = to_connmark(*a); - if (bind) - return 0; - if (!(flags & TCA_ACT_FLAGS_REPLACE)) { - tcf_idr_release(*a, bind); - return -EEXIST; + if (bind) { + err = 0; + goto out_free; } - err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, - extack); - if (err < 0) + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { + err = -EEXIST; goto release_idr; - /* replacing action and zone */ - spin_lock_bh(&ci->tcf_lock); - goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); - ci->zone = parm->zone; - spin_unlock_bh(&ci->tcf_lock); - if (goto_ch) - tcf_chain_put_by_act(goto_ch); + } + + nparms->net = rtnl_dereference(ci->parms)->net; + nparms->zone = parm->zone; + ret = 0; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + + spin_lock_bh(&ci->tcf_lock); + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + oparms = rcu_replace_pointer(ci->parms, nparms, lockdep_is_held(&ci->tcf_lock)); + spin_unlock_bh(&ci->tcf_lock); + + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + + if (oparms) + kfree_rcu(oparms, rcu); + return ret; + release_idr: tcf_idr_release(*a, bind); +out_free: + kfree(nparms); return err; } @@ -179,11 +194,14 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, .refcnt = refcount_read(&ci->tcf_refcnt) - ref, .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, }; + struct tcf_connmark_parms *parms; struct tcf_t t; spin_lock_bh(&ci->tcf_lock); + parms = rcu_dereference_protected(ci->parms, lockdep_is_held(&ci->tcf_lock)); + opt.action = ci->tcf_action; - opt.zone = ci->zone; + opt.zone = parms->zone; if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -201,6 +219,16 @@ nla_put_failure: return -1; } +static void tcf_connmark_cleanup(struct tc_action *a) +{ + struct tcf_connmark_info *ci = to_connmark(a); + struct tcf_connmark_parms *parms; + + parms = rcu_dereference_protected(ci->parms, 1); + if (parms) + kfree_rcu(parms, rcu); +} + static struct tc_action_ops act_connmark_ops = { .kind = "connmark", .id = TCA_ID_CONNMARK, @@ -208,6 +236,7 @@ static struct tc_action_ops act_connmark_ops = { .act = tcf_connmark_act, .dump = tcf_connmark_dump, .init = tcf_connmark_init, + .cleanup = tcf_connmark_cleanup, .size = sizeof(struct tcf_connmark_info), }; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 9b8def0be41e..c9a811f4c7ee 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -119,35 +119,37 @@ TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb, struct tcf_result *res) { struct tcf_gate *gact = to_gate(a); - - spin_lock(&gact->tcf_lock); + int action = READ_ONCE(gact->tcf_action); tcf_lastuse_update(&gact->tcf_tm); - bstats_update(&gact->tcf_bstats, skb); + tcf_action_update_bstats(&gact->common, skb); + spin_lock(&gact->tcf_lock); if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) { spin_unlock(&gact->tcf_lock); - return gact->tcf_action; + return action; } - if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN)) + if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN)) { + spin_unlock(&gact->tcf_lock); goto drop; + } if (gact->current_max_octets >= 0) { gact->current_entry_octets += qdisc_pkt_len(skb); if (gact->current_entry_octets > gact->current_max_octets) { - gact->tcf_qstats.overlimits++; - goto drop; + spin_unlock(&gact->tcf_lock); + goto overlimit; } } - spin_unlock(&gact->tcf_lock); - return gact->tcf_action; -drop: - gact->tcf_qstats.drops++; - spin_unlock(&gact->tcf_lock); + return action; +overlimit: + tcf_action_inc_overlimit_qstats(&gact->common); +drop: + tcf_action_inc_drop_qstats(&gact->common); return TC_ACT_SHOT; } @@ -357,8 +359,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, return 0; if (!err) { - ret = tcf_idr_create(tn, index, est, a, - &act_gate_ops, bind, false, flags); + ret = tcf_idr_create_from_flags(tn, index, est, a, + &act_gate_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 74c74be33048..4184af5abbf3 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -38,6 +38,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, { struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; + struct tcf_nat_parms *nparm, *oparm; struct nlattr *tb[TCA_NAT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_nat *parm; @@ -59,8 +60,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - ret = tcf_idr_create(tn, index, est, a, - &act_nat_ops, bind, false, flags); + ret = tcf_idr_create_from_flags(tn, index, est, a, &act_nat_ops, + bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; @@ -79,19 +80,31 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; + + nparm = kzalloc(sizeof(*nparm), GFP_KERNEL); + if (!nparm) { + err = -ENOMEM; + goto release_idr; + } + + nparm->old_addr = parm->old_addr; + nparm->new_addr = parm->new_addr; + nparm->mask = parm->mask; + nparm->flags = parm->flags; + p = to_tcf_nat(*a); spin_lock_bh(&p->tcf_lock); - p->old_addr = parm->old_addr; - p->new_addr = parm->new_addr; - p->mask = parm->mask; - p->flags = parm->flags; - goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + oparm = rcu_replace_pointer(p->parms, nparm, lockdep_is_held(&p->tcf_lock)); spin_unlock_bh(&p->tcf_lock); + if (goto_ch) tcf_chain_put_by_act(goto_ch); + if (oparm) + kfree_rcu(oparm, rcu); + return ret; release_idr: tcf_idr_release(*a, bind); @@ -103,6 +116,7 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb, struct tcf_result *res) { struct tcf_nat *p = to_tcf_nat(a); + struct tcf_nat_parms *parms; struct iphdr *iph; __be32 old_addr; __be32 new_addr; @@ -113,18 +127,16 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb, int ihl; int noff; - spin_lock(&p->tcf_lock); - tcf_lastuse_update(&p->tcf_tm); - old_addr = p->old_addr; - new_addr = p->new_addr; - mask = p->mask; - egress = p->flags & TCA_NAT_FLAG_EGRESS; - action = p->tcf_action; + tcf_action_update_bstats(&p->common, skb); - bstats_update(&p->tcf_bstats, skb); + action = READ_ONCE(p->tcf_action); - spin_unlock(&p->tcf_lock); + parms = rcu_dereference_bh(p->parms); + old_addr = parms->old_addr; + new_addr = parms->new_addr; + mask = parms->mask; + egress = parms->flags & TCA_NAT_FLAG_EGRESS; if (unlikely(action == TC_ACT_SHOT)) goto drop; @@ -248,9 +260,7 @@ out: return action; drop: - spin_lock(&p->tcf_lock); - p->tcf_qstats.drops++; - spin_unlock(&p->tcf_lock); + tcf_action_inc_drop_qstats(&p->common); return TC_ACT_SHOT; } @@ -264,15 +274,20 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, .refcnt = refcount_read(&p->tcf_refcnt) - ref, .bindcnt = atomic_read(&p->tcf_bindcnt) - bind, }; + struct tcf_nat_parms *parms; struct tcf_t t; spin_lock_bh(&p->tcf_lock); - opt.old_addr = p->old_addr; - opt.new_addr = p->new_addr; - opt.mask = p->mask; - opt.flags = p->flags; + opt.action = p->tcf_action; + parms = rcu_dereference_protected(p->parms, lockdep_is_held(&p->tcf_lock)); + + opt.old_addr = parms->old_addr; + opt.new_addr = parms->new_addr; + opt.mask = parms->mask; + opt.flags = parms->flags; + if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt)) goto nla_put_failure; @@ -289,6 +304,16 @@ nla_put_failure: return -1; } +static void tcf_nat_cleanup(struct tc_action *a) +{ + struct tcf_nat *p = to_tcf_nat(a); + struct tcf_nat_parms *parms; + + parms = rcu_dereference_protected(p->parms, 1); + if (parms) + kfree_rcu(parms, rcu); +} + static struct tc_action_ops act_nat_ops = { .kind = "nat", .id = TCA_ID_NAT, @@ -296,6 +321,7 @@ static struct tc_action_ops act_nat_ops = { .act = tcf_nat_act, .dump = tcf_nat_dump, .init = tcf_nat_init, + .cleanup = tcf_nat_cleanup, .size = sizeof(struct tcf_nat), }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index c42fcc47dd6d..77d288d384ae 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -443,9 +443,7 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, goto done; bad: - spin_lock(&p->tcf_lock); - p->tcf_qstats.overlimits++; - spin_unlock(&p->tcf_lock); + tcf_action_inc_overlimit_qstats(&p->common); done: return p->tcf_action; } @@ -545,7 +543,28 @@ static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, } *index_inc = k; } else { - return -EOPNOTSUPP; + struct flow_offload_action *fl_action = entry_data; + u32 cmd = tcf_pedit_cmd(act, 0); + int k; + + switch (cmd) { + case TCA_PEDIT_KEY_EX_CMD_SET: + fl_action->id = FLOW_ACTION_MANGLE; + break; + case TCA_PEDIT_KEY_EX_CMD_ADD: + fl_action->id = FLOW_ACTION_ADD; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload"); + return -EOPNOTSUPP; + } + + for (k = 1; k < tcf_pedit_nkeys(act); k++) { + if (cmd != tcf_pedit_cmd(act, k)) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload"); + return -EOPNOTSUPP; + } + } } return 0; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5b4a95e8a1ee..bfabc9c95fa9 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3577,6 +3577,7 @@ int tc_setup_action(struct flow_action *flow_action, for (k = 0; k < index ; k++) { entry[k].hw_stats = tc_act_hw_stats(act->hw_stats); entry[k].hw_index = act->tcfa_index; + entry[k].act_cookie = (unsigned long)act; } j += index; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 0b15698b3531..885c95191ccf 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -502,12 +502,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, rtnl_held); - tcf_exts_hw_stats_update(&f->exts, cls_flower.stats.bytes, - cls_flower.stats.pkts, - cls_flower.stats.drops, - cls_flower.stats.lastused, - cls_flower.stats.used_hw_stats, - cls_flower.stats.used_hw_stats_valid); + tcf_exts_hw_stats_update(&f->exts, &cls_flower.stats, cls_flower.use_act_stats); } static void __fl_put(struct cls_fl_filter *f) diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 705f63da2c21..fa3bbd187eb9 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -331,11 +331,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp, tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true); - tcf_exts_hw_stats_update(&head->exts, cls_mall.stats.bytes, - cls_mall.stats.pkts, cls_mall.stats.drops, - cls_mall.stats.lastused, - cls_mall.stats.used_hw_stats, - cls_mall.stats.used_hw_stats_valid); + tcf_exts_hw_stats_update(&head->exts, &cls_mall.stats, cls_mall.use_act_stats); } static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c deleted file mode 100644 index 03d8619bd9c6..000000000000 --- a/net/sched/cls_rsvp.c +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/act_api.h> -#include <net/pkt_cls.h> -#include <net/tc_wrapper.h> - -#define RSVP_DST_LEN 1 -#define RSVP_ID "rsvp" -#define RSVP_OPS cls_rsvp_ops -#define RSVP_CLS rsvp_classify - -#include "cls_rsvp.h" -MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h deleted file mode 100644 index 869efba9f834..000000000000 --- a/net/sched/cls_rsvp.h +++ /dev/null @@ -1,764 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -/* - Comparing to general packet classification problem, - RSVP needs only several relatively simple rules: - - * (dst, protocol) are always specified, - so that we are able to hash them. - * src may be exact, or may be wildcard, so that - we can keep a hash table plus one wildcard entry. - * source port (or flow label) is important only if src is given. - - IMPLEMENTATION. - - We use a two level hash table: The top level is keyed by - destination address and protocol ID, every bucket contains a list - of "rsvp sessions", identified by destination address, protocol and - DPI(="Destination Port ID"): triple (key, mask, offset). - - Every bucket has a smaller hash table keyed by source address - (cf. RSVP flowspec) and one wildcard entry for wildcard reservations. - Every bucket is again a list of "RSVP flows", selected by - source address and SPI(="Source Port ID" here rather than - "security parameter index"): triple (key, mask, offset). - - - NOTE 1. All the packets with IPv6 extension headers (but AH and ESP) - and all fragmented packets go to the best-effort traffic class. - - - NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires - only one "Generalized Port Identifier". So that for classic - ah, esp (and udp,tcp) both *pi should coincide or one of them - should be wildcard. - - At first sight, this redundancy is just a waste of CPU - resources. But DPI and SPI add the possibility to assign different - priorities to GPIs. Look also at note 4 about tunnels below. - - - NOTE 3. One complication is the case of tunneled packets. - We implement it as following: if the first lookup - matches a special session with "tunnelhdr" value not zero, - flowid doesn't contain the true flow ID, but the tunnel ID (1...255). - In this case, we pull tunnelhdr bytes and restart lookup - with tunnel ID added to the list of keys. Simple and stupid 8)8) - It's enough for PIMREG and IPIP. - - - NOTE 4. Two GPIs make it possible to parse even GRE packets. - F.e. DPI can select ETH_P_IP (and necessary flags to make - tunnelhdr correct) in GRE protocol field and SPI matches - GRE key. Is it not nice? 8)8) - - - Well, as result, despite its simplicity, we get a pretty - powerful classification engine. */ - - -struct rsvp_head { - u32 tmap[256/32]; - u32 hgenerator; - u8 tgenerator; - struct rsvp_session __rcu *ht[256]; - struct rcu_head rcu; -}; - -struct rsvp_session { - struct rsvp_session __rcu *next; - __be32 dst[RSVP_DST_LEN]; - struct tc_rsvp_gpi dpi; - u8 protocol; - u8 tunnelid; - /* 16 (src,sport) hash slots, and one wildcard source slot */ - struct rsvp_filter __rcu *ht[16 + 1]; - struct rcu_head rcu; -}; - - -struct rsvp_filter { - struct rsvp_filter __rcu *next; - __be32 src[RSVP_DST_LEN]; - struct tc_rsvp_gpi spi; - u8 tunnelhdr; - - struct tcf_result res; - struct tcf_exts exts; - - u32 handle; - struct rsvp_session *sess; - struct rcu_work rwork; -}; - -static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) -{ - unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1]; - - h ^= h>>16; - h ^= h>>8; - return (h ^ protocol ^ tunnelid) & 0xFF; -} - -static inline unsigned int hash_src(__be32 *src) -{ - unsigned int h = (__force __u32)src[RSVP_DST_LEN-1]; - - h ^= h>>16; - h ^= h>>8; - h ^= h>>4; - return h & 0xF; -} - -#define RSVP_APPLY_RESULT() \ -{ \ - int r = tcf_exts_exec(skb, &f->exts, res); \ - if (r < 0) \ - continue; \ - else if (r > 0) \ - return r; \ -} - -TC_INDIRECT_SCOPE int RSVP_CLS(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) -{ - struct rsvp_head *head = rcu_dereference_bh(tp->root); - struct rsvp_session *s; - struct rsvp_filter *f; - unsigned int h1, h2; - __be32 *dst, *src; - u8 protocol; - u8 tunnelid = 0; - u8 *xprt; -#if RSVP_DST_LEN == 4 - struct ipv6hdr *nhptr; - - if (!pskb_network_may_pull(skb, sizeof(*nhptr))) - return -1; - nhptr = ipv6_hdr(skb); -#else - struct iphdr *nhptr; - - if (!pskb_network_may_pull(skb, sizeof(*nhptr))) - return -1; - nhptr = ip_hdr(skb); -#endif -restart: - -#if RSVP_DST_LEN == 4 - src = &nhptr->saddr.s6_addr32[0]; - dst = &nhptr->daddr.s6_addr32[0]; - protocol = nhptr->nexthdr; - xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr); -#else - src = &nhptr->saddr; - dst = &nhptr->daddr; - protocol = nhptr->protocol; - xprt = ((u8 *)nhptr) + (nhptr->ihl<<2); - if (ip_is_fragment(nhptr)) - return -1; -#endif - - h1 = hash_dst(dst, protocol, tunnelid); - h2 = hash_src(src); - - for (s = rcu_dereference_bh(head->ht[h1]); s; - s = rcu_dereference_bh(s->next)) { - if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] && - protocol == s->protocol && - !(s->dpi.mask & - (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) && -#if RSVP_DST_LEN == 4 - dst[0] == s->dst[0] && - dst[1] == s->dst[1] && - dst[2] == s->dst[2] && -#endif - tunnelid == s->tunnelid) { - - for (f = rcu_dereference_bh(s->ht[h2]); f; - f = rcu_dereference_bh(f->next)) { - if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] && - !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key)) -#if RSVP_DST_LEN == 4 - && - src[0] == f->src[0] && - src[1] == f->src[1] && - src[2] == f->src[2] -#endif - ) { - *res = f->res; - RSVP_APPLY_RESULT(); - -matched: - if (f->tunnelhdr == 0) - return 0; - - tunnelid = f->res.classid; - nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr)); - goto restart; - } - } - - /* And wildcard bucket... */ - for (f = rcu_dereference_bh(s->ht[16]); f; - f = rcu_dereference_bh(f->next)) { - *res = f->res; - RSVP_APPLY_RESULT(); - goto matched; - } - return -1; - } - } - return -1; -} - -static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_session *s; - struct rsvp_filter __rcu **ins; - struct rsvp_filter *pins; - unsigned int h1 = h & 0xFF; - unsigned int h2 = (h >> 8) & 0xFF; - - for (s = rtnl_dereference(head->ht[h1]); s; - s = rtnl_dereference(s->next)) { - for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ; - ins = &pins->next, pins = rtnl_dereference(*ins)) { - if (pins->handle == h) { - RCU_INIT_POINTER(n->next, pins->next); - rcu_assign_pointer(*ins, n); - return; - } - } - } - - /* Something went wrong if we are trying to replace a non-existent - * node. Mind as well halt instead of silently failing. - */ - BUG_ON(1); -} - -static void *rsvp_get(struct tcf_proto *tp, u32 handle) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_session *s; - struct rsvp_filter *f; - unsigned int h1 = handle & 0xFF; - unsigned int h2 = (handle >> 8) & 0xFF; - - if (h2 > 16) - return NULL; - - for (s = rtnl_dereference(head->ht[h1]); s; - s = rtnl_dereference(s->next)) { - for (f = rtnl_dereference(s->ht[h2]); f; - f = rtnl_dereference(f->next)) { - if (f->handle == handle) - return f; - } - } - return NULL; -} - -static int rsvp_init(struct tcf_proto *tp) -{ - struct rsvp_head *data; - - data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL); - if (data) { - rcu_assign_pointer(tp->root, data); - return 0; - } - return -ENOBUFS; -} - -static void __rsvp_delete_filter(struct rsvp_filter *f) -{ - tcf_exts_destroy(&f->exts); - tcf_exts_put_net(&f->exts); - kfree(f); -} - -static void rsvp_delete_filter_work(struct work_struct *work) -{ - struct rsvp_filter *f = container_of(to_rcu_work(work), - struct rsvp_filter, - rwork); - rtnl_lock(); - __rsvp_delete_filter(f); - rtnl_unlock(); -} - -static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) -{ - tcf_unbind_filter(tp, &f->res); - /* all classifiers are required to call tcf_exts_destroy() after rcu - * grace period, since converted-to-rcu actions are relying on that - * in cleanup() callback - */ - if (tcf_exts_get_net(&f->exts)) - tcf_queue_work(&f->rwork, rsvp_delete_filter_work); - else - __rsvp_delete_filter(f); -} - -static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held, - struct netlink_ext_ack *extack) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - int h1, h2; - - if (data == NULL) - return; - - for (h1 = 0; h1 < 256; h1++) { - struct rsvp_session *s; - - while ((s = rtnl_dereference(data->ht[h1])) != NULL) { - RCU_INIT_POINTER(data->ht[h1], s->next); - - for (h2 = 0; h2 <= 16; h2++) { - struct rsvp_filter *f; - - while ((f = rtnl_dereference(s->ht[h2])) != NULL) { - rcu_assign_pointer(s->ht[h2], f->next); - rsvp_delete_filter(tp, f); - } - } - kfree_rcu(s, rcu); - } - } - kfree_rcu(data, rcu); -} - -static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last, - bool rtnl_held, struct netlink_ext_ack *extack) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_filter *nfp, *f = arg; - struct rsvp_filter __rcu **fp; - unsigned int h = f->handle; - struct rsvp_session __rcu **sp; - struct rsvp_session *nsp, *s = f->sess; - int i, h1; - - fp = &s->ht[(h >> 8) & 0xFF]; - for (nfp = rtnl_dereference(*fp); nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) { - if (nfp == f) { - RCU_INIT_POINTER(*fp, f->next); - rsvp_delete_filter(tp, f); - - /* Strip tree */ - - for (i = 0; i <= 16; i++) - if (s->ht[i]) - goto out; - - /* OK, session has no flows */ - sp = &head->ht[h & 0xFF]; - for (nsp = rtnl_dereference(*sp); nsp; - sp = &nsp->next, nsp = rtnl_dereference(*sp)) { - if (nsp == s) { - RCU_INIT_POINTER(*sp, s->next); - kfree_rcu(s, rcu); - goto out; - } - } - - break; - } - } - -out: - *last = true; - for (h1 = 0; h1 < 256; h1++) { - if (rcu_access_pointer(head->ht[h1])) { - *last = false; - break; - } - } - - return 0; -} - -static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - int i = 0xFFFF; - - while (i-- > 0) { - u32 h; - - if ((data->hgenerator += 0x10000) == 0) - data->hgenerator = 0x10000; - h = data->hgenerator|salt; - if (!rsvp_get(tp, h)) - return h; - } - return 0; -} - -static int tunnel_bts(struct rsvp_head *data) -{ - int n = data->tgenerator >> 5; - u32 b = 1 << (data->tgenerator & 0x1F); - - if (data->tmap[n] & b) - return 0; - data->tmap[n] |= b; - return 1; -} - -static void tunnel_recycle(struct rsvp_head *data) -{ - struct rsvp_session __rcu **sht = data->ht; - u32 tmap[256/32]; - int h1, h2; - - memset(tmap, 0, sizeof(tmap)); - - for (h1 = 0; h1 < 256; h1++) { - struct rsvp_session *s; - for (s = rtnl_dereference(sht[h1]); s; - s = rtnl_dereference(s->next)) { - for (h2 = 0; h2 <= 16; h2++) { - struct rsvp_filter *f; - - for (f = rtnl_dereference(s->ht[h2]); f; - f = rtnl_dereference(f->next)) { - if (f->tunnelhdr == 0) - continue; - data->tgenerator = f->res.classid; - tunnel_bts(data); - } - } - } - } - - memcpy(data->tmap, tmap, sizeof(tmap)); -} - -static u32 gen_tunnel(struct rsvp_head *data) -{ - int i, k; - - for (k = 0; k < 2; k++) { - for (i = 255; i > 0; i--) { - if (++data->tgenerator == 0) - data->tgenerator = 1; - if (tunnel_bts(data)) - return data->tgenerator; - } - tunnel_recycle(data); - } - return 0; -} - -static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { - [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, -}; - -static int rsvp_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, - u32 handle, struct nlattr **tca, - void **arg, u32 flags, - struct netlink_ext_ack *extack) -{ - struct rsvp_head *data = rtnl_dereference(tp->root); - struct rsvp_filter *f, *nfp; - struct rsvp_filter __rcu **fp; - struct rsvp_session *nsp, *s; - struct rsvp_session __rcu **sp; - struct tc_rsvp_pinfo *pinfo = NULL; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_RSVP_MAX + 1]; - struct tcf_exts e; - unsigned int h1, h2; - __be32 *dst; - int err; - - if (opt == NULL) - return handle ? -EINVAL : 0; - - err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy, - NULL); - if (err < 0) - return err; - - err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) - return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags, - extack); - if (err < 0) - goto errout2; - - f = *arg; - if (f) { - /* Node exists: adjust only classid */ - struct rsvp_filter *n; - - if (f->handle != handle && handle) - goto errout2; - - n = kmemdup(f, sizeof(*f), GFP_KERNEL); - if (!n) { - err = -ENOMEM; - goto errout2; - } - - err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT, - TCA_RSVP_POLICE); - if (err < 0) { - kfree(n); - goto errout2; - } - - if (tb[TCA_RSVP_CLASSID]) { - n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - tcf_bind_filter(tp, &n->res, base); - } - - tcf_exts_change(&n->exts, &e); - rsvp_replace(tp, n, handle); - return 0; - } - - /* Now more serious part... */ - err = -EINVAL; - if (handle) - goto errout2; - if (tb[TCA_RSVP_DST] == NULL) - goto errout2; - - err = -ENOBUFS; - f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL); - if (f == NULL) - goto errout2; - - err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE); - if (err < 0) - goto errout; - h2 = 16; - if (tb[TCA_RSVP_SRC]) { - memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); - h2 = hash_src(f->src); - } - if (tb[TCA_RSVP_PINFO]) { - pinfo = nla_data(tb[TCA_RSVP_PINFO]); - f->spi = pinfo->spi; - f->tunnelhdr = pinfo->tunnelhdr; - } - if (tb[TCA_RSVP_CLASSID]) - f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); - - dst = nla_data(tb[TCA_RSVP_DST]); - h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0); - - err = -ENOMEM; - if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0) - goto errout; - - if (f->tunnelhdr) { - err = -EINVAL; - if (f->res.classid > 255) - goto errout; - - err = -ENOMEM; - if (f->res.classid == 0 && - (f->res.classid = gen_tunnel(data)) == 0) - goto errout; - } - - for (sp = &data->ht[h1]; - (s = rtnl_dereference(*sp)) != NULL; - sp = &s->next) { - if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && - pinfo && pinfo->protocol == s->protocol && - memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && -#if RSVP_DST_LEN == 4 - dst[0] == s->dst[0] && - dst[1] == s->dst[1] && - dst[2] == s->dst[2] && -#endif - pinfo->tunnelid == s->tunnelid) { - -insert: - /* OK, we found appropriate session */ - - fp = &s->ht[h2]; - - f->sess = s; - if (f->tunnelhdr == 0) - tcf_bind_filter(tp, &f->res, base); - - tcf_exts_change(&f->exts, &e); - - fp = &s->ht[h2]; - for (nfp = rtnl_dereference(*fp); nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) { - __u32 mask = nfp->spi.mask & f->spi.mask; - - if (mask != f->spi.mask) - break; - } - RCU_INIT_POINTER(f->next, nfp); - rcu_assign_pointer(*fp, f); - - *arg = f; - return 0; - } - } - - /* No session found. Create new one. */ - - err = -ENOBUFS; - s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL); - if (s == NULL) - goto errout; - memcpy(s->dst, dst, sizeof(s->dst)); - - if (pinfo) { - s->dpi = pinfo->dpi; - s->protocol = pinfo->protocol; - s->tunnelid = pinfo->tunnelid; - } - sp = &data->ht[h1]; - for (nsp = rtnl_dereference(*sp); nsp; - sp = &nsp->next, nsp = rtnl_dereference(*sp)) { - if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask) - break; - } - RCU_INIT_POINTER(s->next, nsp); - rcu_assign_pointer(*sp, s); - - goto insert; - -errout: - tcf_exts_destroy(&f->exts); - kfree(f); -errout2: - tcf_exts_destroy(&e); - return err; -} - -static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg, - bool rtnl_held) -{ - struct rsvp_head *head = rtnl_dereference(tp->root); - unsigned int h, h1; - - if (arg->stop) - return; - - for (h = 0; h < 256; h++) { - struct rsvp_session *s; - - for (s = rtnl_dereference(head->ht[h]); s; - s = rtnl_dereference(s->next)) { - for (h1 = 0; h1 <= 16; h1++) { - struct rsvp_filter *f; - - for (f = rtnl_dereference(s->ht[h1]); f; - f = rtnl_dereference(f->next)) { - if (!tc_cls_stats_dump(tp, arg, f)) - return; - } - } - } - } -} - -static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) -{ - struct rsvp_filter *f = fh; - struct rsvp_session *s; - struct nlattr *nest; - struct tc_rsvp_pinfo pinfo; - - if (f == NULL) - return skb->len; - s = f->sess; - - t->tcm_handle = f->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst)) - goto nla_put_failure; - pinfo.dpi = s->dpi; - pinfo.spi = f->spi; - pinfo.protocol = s->protocol; - pinfo.tunnelid = s->tunnelid; - pinfo.tunnelhdr = f->tunnelhdr; - pinfo.pad = 0; - if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo)) - goto nla_put_failure; - if (f->res.classid && - nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid)) - goto nla_put_failure; - if (((f->handle >> 8) & 0xFF) != 16 && - nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src)) - goto nla_put_failure; - - if (tcf_exts_dump(skb, &f->exts) < 0) - goto nla_put_failure; - - nla_nest_end(skb, nest); - - if (tcf_exts_dump_stats(skb, &f->exts) < 0) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q, - unsigned long base) -{ - struct rsvp_filter *f = fh; - - tc_cls_bind_class(classid, cl, q, &f->res, base); -} - -static struct tcf_proto_ops RSVP_OPS __read_mostly = { - .kind = RSVP_ID, - .classify = RSVP_CLS, - .init = rsvp_init, - .destroy = rsvp_destroy, - .get = rsvp_get, - .change = rsvp_change, - .delete = rsvp_delete, - .walk = rsvp_walk, - .dump = rsvp_dump, - .bind_class = rsvp_bind_class, - .owner = THIS_MODULE, -}; - -static int __init init_rsvp(void) -{ - return register_tcf_proto_ops(&RSVP_OPS); -} - -static void __exit exit_rsvp(void) -{ - unregister_tcf_proto_ops(&RSVP_OPS); -} - -module_init(init_rsvp) -module_exit(exit_rsvp) diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c deleted file mode 100644 index e627cc32d633..000000000000 --- a/net/sched/cls_rsvp6.c +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/ipv6.h> -#include <linux/skbuff.h> -#include <net/act_api.h> -#include <net/pkt_cls.h> -#include <net/netlink.h> -#include <net/tc_wrapper.h> - -#define RSVP_DST_LEN 4 -#define RSVP_ID "rsvp6" -#define RSVP_OPS cls_rsvp6_ops -#define RSVP_CLS rsvp6_classify - -#include "cls_rsvp.h" -MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c deleted file mode 100644 index ee2a050c887b..000000000000 --- a/net/sched/cls_tcindex.c +++ /dev/null @@ -1,716 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * net/sched/cls_tcindex.c Packet classifier for skb->tc_index - * - * Written 1998,1999 by Werner Almesberger, EPFL ICA - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/skbuff.h> -#include <linux/errno.h> -#include <linux/slab.h> -#include <linux/refcount.h> -#include <net/act_api.h> -#include <net/netlink.h> -#include <net/pkt_cls.h> -#include <net/sch_generic.h> -#include <net/tc_wrapper.h> - -/* - * Passing parameters to the root seems to be done more awkwardly than really - * necessary. At least, u32 doesn't seem to use such dirty hacks. To be - * verified. FIXME. - */ - -#define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */ -#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */ - - -struct tcindex_data; - -struct tcindex_filter_result { - struct tcf_exts exts; - struct tcf_result res; - struct tcindex_data *p; - struct rcu_work rwork; -}; - -struct tcindex_filter { - u16 key; - struct tcindex_filter_result result; - struct tcindex_filter __rcu *next; - struct rcu_work rwork; -}; - - -struct tcindex_data { - struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */ - struct tcindex_filter __rcu **h; /* imperfect hash; */ - struct tcf_proto *tp; - u16 mask; /* AND key with mask */ - u32 shift; /* shift ANDed key to the right */ - u32 hash; /* hash table size; 0 if undefined */ - u32 alloc_hash; /* allocated size */ - u32 fall_through; /* 0: only classify if explicit match */ - refcount_t refcnt; /* a temporary refcnt for perfect hash */ - struct rcu_work rwork; -}; - -static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) -{ - return tcf_exts_has_actions(&r->exts) || r->res.classid; -} - -static void tcindex_data_get(struct tcindex_data *p) -{ - refcount_inc(&p->refcnt); -} - -static void tcindex_data_put(struct tcindex_data *p) -{ - if (refcount_dec_and_test(&p->refcnt)) { - kfree(p->perfect); - kfree(p->h); - kfree(p); - } -} - -static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, - u16 key) -{ - if (p->perfect) { - struct tcindex_filter_result *f = p->perfect + key; - - return tcindex_filter_is_set(f) ? f : NULL; - } else if (p->h) { - struct tcindex_filter __rcu **fp; - struct tcindex_filter *f; - - fp = &p->h[key % p->hash]; - for (f = rcu_dereference_bh_rtnl(*fp); - f; - fp = &f->next, f = rcu_dereference_bh_rtnl(*fp)) - if (f->key == key) - return &f->result; - } - - return NULL; -} - -TC_INDIRECT_SCOPE int tcindex_classify(struct sk_buff *skb, - const struct tcf_proto *tp, - struct tcf_result *res) -{ - struct tcindex_data *p = rcu_dereference_bh(tp->root); - struct tcindex_filter_result *f; - int key = (skb->tc_index & p->mask) >> p->shift; - - pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n", - skb, tp, res, p); - - f = tcindex_lookup(p, key); - if (!f) { - struct Qdisc *q = tcf_block_q(tp->chain->block); - - if (!p->fall_through) - return -1; - res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key); - res->class = 0; - pr_debug("alg 0x%x\n", res->classid); - return 0; - } - *res = f->res; - pr_debug("map 0x%x\n", res->classid); - - return tcf_exts_exec(skb, &f->exts, res); -} - - -static void *tcindex_get(struct tcf_proto *tp, u32 handle) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r; - - pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); - if (p->perfect && handle >= p->alloc_hash) - return NULL; - r = tcindex_lookup(p, handle); - return r && tcindex_filter_is_set(r) ? r : NULL; -} - -static int tcindex_init(struct tcf_proto *tp) -{ - struct tcindex_data *p; - - pr_debug("tcindex_init(tp %p)\n", tp); - p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL); - if (!p) - return -ENOMEM; - - p->mask = 0xffff; - p->hash = DEFAULT_HASH_SIZE; - p->fall_through = 1; - refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */ - - rcu_assign_pointer(tp->root, p); - return 0; -} - -static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) -{ - tcf_exts_destroy(&r->exts); - tcf_exts_put_net(&r->exts); - tcindex_data_put(r->p); -} - -static void tcindex_destroy_rexts_work(struct work_struct *work) -{ - struct tcindex_filter_result *r; - - r = container_of(to_rcu_work(work), - struct tcindex_filter_result, - rwork); - rtnl_lock(); - __tcindex_destroy_rexts(r); - rtnl_unlock(); -} - -static void __tcindex_destroy_fexts(struct tcindex_filter *f) -{ - tcf_exts_destroy(&f->result.exts); - tcf_exts_put_net(&f->result.exts); - kfree(f); -} - -static void tcindex_destroy_fexts_work(struct work_struct *work) -{ - struct tcindex_filter *f = container_of(to_rcu_work(work), - struct tcindex_filter, - rwork); - - rtnl_lock(); - __tcindex_destroy_fexts(f); - rtnl_unlock(); -} - -static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last, - bool rtnl_held, struct netlink_ext_ack *extack) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = arg; - struct tcindex_filter __rcu **walk; - struct tcindex_filter *f = NULL; - - pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p); - if (p->perfect) { - if (!r->res.class) - return -ENOENT; - } else { - int i; - - for (i = 0; i < p->hash; i++) { - walk = p->h + i; - for (f = rtnl_dereference(*walk); f; - walk = &f->next, f = rtnl_dereference(*walk)) { - if (&f->result == r) - goto found; - } - } - return -ENOENT; - -found: - rcu_assign_pointer(*walk, rtnl_dereference(f->next)); - } - tcf_unbind_filter(tp, &r->res); - /* all classifiers are required to call tcf_exts_destroy() after rcu - * grace period, since converted-to-rcu actions are relying on that - * in cleanup() callback - */ - if (f) { - if (tcf_exts_get_net(&f->result.exts)) - tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work); - else - __tcindex_destroy_fexts(f); - } else { - tcindex_data_get(p); - - if (tcf_exts_get_net(&r->exts)) - tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work); - else - __tcindex_destroy_rexts(r); - } - - *last = false; - return 0; -} - -static void tcindex_destroy_work(struct work_struct *work) -{ - struct tcindex_data *p = container_of(to_rcu_work(work), - struct tcindex_data, - rwork); - - tcindex_data_put(p); -} - -static inline int -valid_perfect_hash(struct tcindex_data *p) -{ - return p->hash > (p->mask >> p->shift); -} - -static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { - [TCA_TCINDEX_HASH] = { .type = NLA_U32 }, - [TCA_TCINDEX_MASK] = { .type = NLA_U16 }, - [TCA_TCINDEX_SHIFT] = { .type = NLA_U32 }, - [TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 }, - [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 }, -}; - -static int tcindex_filter_result_init(struct tcindex_filter_result *r, - struct tcindex_data *p, - struct net *net) -{ - memset(r, 0, sizeof(*r)); - r->p = p; - return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT, - TCA_TCINDEX_POLICE); -} - -static void tcindex_free_perfect_hash(struct tcindex_data *cp); - -static void tcindex_partial_destroy_work(struct work_struct *work) -{ - struct tcindex_data *p = container_of(to_rcu_work(work), - struct tcindex_data, - rwork); - - rtnl_lock(); - if (p->perfect) - tcindex_free_perfect_hash(p); - kfree(p); - rtnl_unlock(); -} - -static void tcindex_free_perfect_hash(struct tcindex_data *cp) -{ - int i; - - for (i = 0; i < cp->hash; i++) - tcf_exts_destroy(&cp->perfect[i].exts); - kfree(cp->perfect); -} - -static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) -{ - int i, err = 0; - - cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result), - GFP_KERNEL | __GFP_NOWARN); - if (!cp->perfect) - return -ENOMEM; - - for (i = 0; i < cp->hash; i++) { - err = tcf_exts_init(&cp->perfect[i].exts, net, - TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - if (err < 0) - goto errout; - cp->perfect[i].p = cp; - } - - return 0; - -errout: - tcindex_free_perfect_hash(cp); - return err; -} - -static int -tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, - u32 handle, struct tcindex_data *p, - struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) -{ - struct tcindex_filter_result new_filter_result; - struct tcindex_data *cp = NULL, *oldp; - struct tcindex_filter *f = NULL; /* make gcc behave */ - struct tcf_result cr = {}; - int err, balloc = 0; - struct tcf_exts e; - - err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - if (err < 0) - return err; - err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack); - if (err < 0) - goto errout; - - err = -ENOMEM; - /* tcindex_data attributes must look atomic to classifier/lookup so - * allocate new tcindex data and RCU assign it onto root. Keeping - * perfect hash and hash pointers from old data. - */ - cp = kzalloc(sizeof(*cp), GFP_KERNEL); - if (!cp) - goto errout; - - cp->mask = p->mask; - cp->shift = p->shift; - cp->hash = p->hash; - cp->alloc_hash = p->alloc_hash; - cp->fall_through = p->fall_through; - cp->tp = tp; - refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */ - - if (tb[TCA_TCINDEX_HASH]) - cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); - - if (tb[TCA_TCINDEX_MASK]) - cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); - - if (tb[TCA_TCINDEX_SHIFT]) { - cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); - if (cp->shift > 16) { - err = -EINVAL; - goto errout; - } - } - if (!cp->hash) { - /* Hash not specified, use perfect hash if the upper limit - * of the hashing index is below the threshold. - */ - if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) - cp->hash = (cp->mask >> cp->shift) + 1; - else - cp->hash = DEFAULT_HASH_SIZE; - } - - if (p->perfect) { - int i; - - if (tcindex_alloc_perfect_hash(net, cp) < 0) - goto errout; - cp->alloc_hash = cp->hash; - for (i = 0; i < min(cp->hash, p->hash); i++) - cp->perfect[i].res = p->perfect[i].res; - balloc = 1; - } - cp->h = p->h; - - err = tcindex_filter_result_init(&new_filter_result, cp, net); - if (err < 0) - goto errout_alloc; - if (r) - cr = r->res; - - err = -EBUSY; - - /* Hash already allocated, make sure that we still meet the - * requirements for the allocated hash. - */ - if (cp->perfect) { - if (!valid_perfect_hash(cp) || - cp->hash > cp->alloc_hash) - goto errout_alloc; - } else if (cp->h && cp->hash != cp->alloc_hash) { - goto errout_alloc; - } - - err = -EINVAL; - if (tb[TCA_TCINDEX_FALL_THROUGH]) - cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - - if (!cp->perfect && !cp->h) - cp->alloc_hash = cp->hash; - - /* Note: this could be as restrictive as if (handle & ~(mask >> shift)) - * but then, we'd fail handles that may become valid after some future - * mask change. While this is extremely unlikely to ever matter, - * the check below is safer (and also more backwards-compatible). - */ - if (cp->perfect || valid_perfect_hash(cp)) - if (handle >= cp->alloc_hash) - goto errout_alloc; - - - err = -ENOMEM; - if (!cp->perfect && !cp->h) { - if (valid_perfect_hash(cp)) { - if (tcindex_alloc_perfect_hash(net, cp) < 0) - goto errout_alloc; - balloc = 1; - } else { - struct tcindex_filter __rcu **hash; - - hash = kcalloc(cp->hash, - sizeof(struct tcindex_filter *), - GFP_KERNEL); - - if (!hash) - goto errout_alloc; - - cp->h = hash; - balloc = 2; - } - } - - if (cp->perfect) - r = cp->perfect + handle; - else - r = tcindex_lookup(cp, handle) ? : &new_filter_result; - - if (r == &new_filter_result) { - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) - goto errout_alloc; - f->key = handle; - f->next = NULL; - err = tcindex_filter_result_init(&f->result, cp, net); - if (err < 0) { - kfree(f); - goto errout_alloc; - } - } - - if (tb[TCA_TCINDEX_CLASSID]) { - cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); - tcf_bind_filter(tp, &cr, base); - } - - oldp = p; - r->res = cr; - tcf_exts_change(&r->exts, &e); - - rcu_assign_pointer(tp->root, cp); - - if (r == &new_filter_result) { - struct tcindex_filter *nfp; - struct tcindex_filter __rcu **fp; - - f->result.res = r->res; - tcf_exts_change(&f->result.exts, &r->exts); - - fp = cp->h + (handle % cp->hash); - for (nfp = rtnl_dereference(*fp); - nfp; - fp = &nfp->next, nfp = rtnl_dereference(*fp)) - ; /* nothing */ - - rcu_assign_pointer(*fp, f); - } else { - tcf_exts_destroy(&new_filter_result.exts); - } - - if (oldp) - tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work); - return 0; - -errout_alloc: - if (balloc == 1) - tcindex_free_perfect_hash(cp); - else if (balloc == 2) - kfree(cp->h); - tcf_exts_destroy(&new_filter_result.exts); -errout: - kfree(cp); - tcf_exts_destroy(&e); - return err; -} - -static int -tcindex_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, u32 flags, - struct netlink_ext_ack *extack) -{ - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_TCINDEX_MAX + 1]; - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = *arg; - int err; - - pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p," - "p %p,r %p,*arg %p\n", - tp, handle, tca, arg, opt, p, r, *arg); - - if (!opt) - return 0; - - err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt, - tcindex_policy, NULL); - if (err < 0) - return err; - - return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE], flags, extack); -} - -static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, - bool rtnl_held) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter *f, *next; - int i; - - pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p); - if (p->perfect) { - for (i = 0; i < p->hash; i++) { - if (!p->perfect[i].res.class) - continue; - if (!tc_cls_stats_dump(tp, walker, p->perfect + i)) - return; - } - } - if (!p->h) - return; - for (i = 0; i < p->hash; i++) { - for (f = rtnl_dereference(p->h[i]); f; f = next) { - next = rtnl_dereference(f->next); - if (!tc_cls_stats_dump(tp, walker, &f->result)) - return; - } - } -} - -static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held, - struct netlink_ext_ack *extack) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - int i; - - pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); - - if (p->perfect) { - for (i = 0; i < p->hash; i++) { - struct tcindex_filter_result *r = p->perfect + i; - - /* tcf_queue_work() does not guarantee the ordering we - * want, so we have to take this refcnt temporarily to - * ensure 'p' is freed after all tcindex_filter_result - * here. Imperfect hash does not need this, because it - * uses linked lists rather than an array. - */ - tcindex_data_get(p); - - tcf_unbind_filter(tp, &r->res); - if (tcf_exts_get_net(&r->exts)) - tcf_queue_work(&r->rwork, - tcindex_destroy_rexts_work); - else - __tcindex_destroy_rexts(r); - } - } - - for (i = 0; p->h && i < p->hash; i++) { - struct tcindex_filter *f, *next; - bool last; - - for (f = rtnl_dereference(p->h[i]); f; f = next) { - next = rtnl_dereference(f->next); - tcindex_delete(tp, &f->result, &last, rtnl_held, NULL); - } - } - - tcf_queue_work(&p->rwork, tcindex_destroy_work); -} - - -static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, - struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) -{ - struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = fh; - struct nlattr *nest; - - pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n", - tp, fh, skb, t, p, r); - pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h); - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (!fh) { - t->tcm_handle = ~0; /* whatever ... */ - if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) || - nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) || - nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) || - nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through)) - goto nla_put_failure; - nla_nest_end(skb, nest); - } else { - if (p->perfect) { - t->tcm_handle = r - p->perfect; - } else { - struct tcindex_filter *f; - struct tcindex_filter __rcu **fp; - int i; - - t->tcm_handle = 0; - for (i = 0; !t->tcm_handle && i < p->hash; i++) { - fp = &p->h[i]; - for (f = rtnl_dereference(*fp); - !t->tcm_handle && f; - fp = &f->next, f = rtnl_dereference(*fp)) { - if (&f->result == r) - t->tcm_handle = f->key; - } - } - } - pr_debug("handle = %d\n", t->tcm_handle); - if (r->res.class && - nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid)) - goto nla_put_failure; - - if (tcf_exts_dump(skb, &r->exts) < 0) - goto nla_put_failure; - nla_nest_end(skb, nest); - - if (tcf_exts_dump_stats(skb, &r->exts) < 0) - goto nla_put_failure; - } - - return skb->len; - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl, - void *q, unsigned long base) -{ - struct tcindex_filter_result *r = fh; - - tc_cls_bind_class(classid, cl, q, &r->res, base); -} - -static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { - .kind = "tcindex", - .classify = tcindex_classify, - .init = tcindex_init, - .destroy = tcindex_destroy, - .get = tcindex_get, - .change = tcindex_change, - .delete = tcindex_delete, - .walk = tcindex_walk, - .dump = tcindex_dump, - .bind_class = tcindex_bind_class, - .owner = THIS_MODULE, -}; - -static int __init init_tcindex(void) -{ - return register_tcf_proto_ops(&cls_tcindex_ops); -} - -static void __exit exit_tcindex(void) -{ - unregister_tcf_proto_ops(&cls_tcindex_ops); -} - -module_init(init_tcindex) -module_exit(exit_tcindex) -MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e9780631b5b5..aba789c30a2e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1286,7 +1286,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, stab = qdisc_get_stab(tca[TCA_STAB], extack); if (IS_ERR(stab)) { err = PTR_ERR(stab); - goto err_out4; + goto err_out3; } rcu_assign_pointer(sch->stab, stab); } @@ -1294,14 +1294,14 @@ static struct Qdisc *qdisc_create(struct net_device *dev, if (ops->init) { err = ops->init(sch, tca[TCA_OPTIONS], extack); if (err != 0) - goto err_out5; + goto err_out4; } if (tca[TCA_RATE]) { err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) { NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); - goto err_out5; + goto err_out4; } err = gen_new_estimator(&sch->bstats, @@ -1312,7 +1312,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); - goto err_out5; + goto err_out4; } } @@ -1321,12 +1321,13 @@ static struct Qdisc *qdisc_create(struct net_device *dev, return sch; -err_out5: - qdisc_put_stab(rtnl_dereference(sch->stab)); err_out4: - /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ + /* Even if ops->init() failed, we call ops->destroy() + * like qdisc_create_dflt(). + */ if (ops->destroy) ops->destroy(sch); + qdisc_put_stab(rtnl_dereference(sch->stab)); err_out3: netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c deleted file mode 100644 index 4a981ca90b0b..000000000000 --- a/net/sched/sch_atm.c +++ /dev/null @@ -1,706 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <linux/atmdev.h> -#include <linux/atmclip.h> -#include <linux/rtnetlink.h> -#include <linux/file.h> /* for fput */ -#include <net/netlink.h> -#include <net/pkt_sched.h> -#include <net/pkt_cls.h> - -/* - * The ATM queuing discipline provides a framework for invoking classifiers - * (aka "filters"), which in turn select classes of this queuing discipline. - * Each class maps the flow(s) it is handling to a given VC. Multiple classes - * may share the same VC. - * - * When creating a class, VCs are specified by passing the number of the open - * socket descriptor by which the calling process references the VC. The kernel - * keeps the VC open at least until all classes using it are removed. - * - * In this file, most functions are named atm_tc_* to avoid confusion with all - * the atm_* in net/atm. This naming convention differs from what's used in the - * rest of net/sched. - * - * Known bugs: - * - sometimes messes up the IP stack - * - any manipulations besides the few operations described in the README, are - * untested and likely to crash the system - * - should lock the flow while there is data in the queue (?) - */ - -#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) - -struct atm_flow_data { - struct Qdisc_class_common common; - struct Qdisc *q; /* FIFO, TBF, etc. */ - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ - void (*old_pop)(struct atm_vcc *vcc, - struct sk_buff *skb); /* chaining */ - struct atm_qdisc_data *parent; /* parent qdisc */ - struct socket *sock; /* for closing */ - int ref; /* reference count */ - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct list_head list; - struct atm_flow_data *excess; /* flow for excess traffic; - NULL to set CLP instead */ - int hdr_len; - unsigned char hdr[]; /* header data; MUST BE LAST */ -}; - -struct atm_qdisc_data { - struct atm_flow_data link; /* unclassified skbs go here */ - struct list_head flows; /* NB: "link" is also on this - list */ - struct tasklet_struct task; /* dequeue tasklet */ -}; - -/* ------------------------- Class/flow operations ------------------------- */ - -static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - list_for_each_entry(flow, &p->flows, list) { - if (flow->common.classid == classid) - return flow; - } - return NULL; -} - -static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", - sch, p, flow, new, old); - if (list_empty(&flow->list)) - return -EINVAL; - if (!new) - new = &noop_qdisc; - *old = flow->q; - flow->q = new; - if (*old) - qdisc_reset(*old); - return 0; -} - -static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow); - return flow ? flow->q : NULL; -} - -static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -static unsigned long atm_tc_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); - flow = lookup_flow(sch, classid); - if (flow) - flow->ref++; - pr_debug("%s: flow %p\n", __func__, flow); - return (unsigned long)flow; -} - -/* - * atm_tc_put handles all destructions, including the ones that are explicitly - * requested (atm_tc_destroy, etc.). The assumption here is that we never drop - * anything that still seems to be in use. - */ -static void atm_tc_put(struct Qdisc *sch, unsigned long cl) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (--flow->ref) - return; - pr_debug("atm_tc_put: destroying\n"); - list_del_init(&flow->list); - pr_debug("atm_tc_put: qdisc %p\n", flow->q); - qdisc_put(flow->q); - tcf_block_put(flow->block); - if (flow->sock) { - pr_debug("atm_tc_put: f_count %ld\n", - file_count(flow->sock->file)); - flow->vcc->pop = flow->old_pop; - sockfd_put(flow->sock); - } - if (flow->excess) - atm_tc_put(sch, (unsigned long)flow->excess); - if (flow != &p->link) - kfree(flow); - /* - * If flow == &p->link, the qdisc no longer works at this point and - * needs to be removed. (By the caller of atm_tc_put.) - */ -} - -static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb) -{ - struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent; - - pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p); - VCC2FLOW(vcc)->old_pop(vcc, skb); - tasklet_schedule(&p->task); -} - -static const u8 llc_oui_ip[] = { - 0xaa, /* DSAP: non-ISO */ - 0xaa, /* SSAP: non-ISO */ - 0x03, /* Ctrl: Unnumbered Information Command PDU */ - 0x00, /* OUI: EtherType */ - 0x00, 0x00, - 0x08, 0x00 -}; /* Ethertype IP (0800) */ - -static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = { - [TCA_ATM_FD] = { .type = NLA_U32 }, - [TCA_ATM_EXCESS] = { .type = NLA_U32 }, -}; - -static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)*arg; - struct atm_flow_data *excess = NULL; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_ATM_MAX + 1]; - struct socket *sock; - int fd, error, hdr_len; - void *hdr; - - pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x," - "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt); - /* - * The concept of parents doesn't apply for this qdisc. - */ - if (parent && parent != TC_H_ROOT && parent != sch->handle) - return -EINVAL; - /* - * ATM classes cannot be changed. In order to change properties of the - * ATM connection, that socket needs to be modified directly (via the - * native ATM API. In order to send a flow to a different VC, the old - * class needs to be removed and a new one added. (This may be changed - * later.) - */ - if (flow) - return -EBUSY; - if (opt == NULL) - return -EINVAL; - - error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy, - NULL); - if (error < 0) - return error; - - if (!tb[TCA_ATM_FD]) - return -EINVAL; - fd = nla_get_u32(tb[TCA_ATM_FD]); - pr_debug("atm_tc_change: fd %d\n", fd); - if (tb[TCA_ATM_HDR]) { - hdr_len = nla_len(tb[TCA_ATM_HDR]); - hdr = nla_data(tb[TCA_ATM_HDR]); - } else { - hdr_len = RFC1483LLC_LEN; - hdr = NULL; /* default LLC/SNAP for IP */ - } - if (!tb[TCA_ATM_EXCESS]) - excess = NULL; - else { - excess = (struct atm_flow_data *) - atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); - if (!excess) - return -ENOENT; - } - pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n", - opt->nla_type, nla_len(opt), hdr_len); - sock = sockfd_lookup(fd, &error); - if (!sock) - return error; /* f_count++ */ - pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file)); - if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) { - error = -EPROTOTYPE; - goto err_out; - } - /* @@@ should check if the socket is really operational or we'll crash - on vcc->send */ - if (classid) { - if (TC_H_MAJ(classid ^ sch->handle)) { - pr_debug("atm_tc_change: classid mismatch\n"); - error = -EINVAL; - goto err_out; - } - } else { - int i; - unsigned long cl; - - for (i = 1; i < 0x8000; i++) { - classid = TC_H_MAKE(sch->handle, 0x8000 | i); - cl = atm_tc_find(sch, classid); - if (!cl) - break; - } - } - pr_debug("atm_tc_change: new id %x\n", classid); - flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL); - pr_debug("atm_tc_change: flow %p\n", flow); - if (!flow) { - error = -ENOBUFS; - goto err_out; - } - - error = tcf_block_get(&flow->block, &flow->filter_list, sch, - extack); - if (error) { - kfree(flow); - goto err_out; - } - - flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - extack); - if (!flow->q) - flow->q = &noop_qdisc; - pr_debug("atm_tc_change: qdisc %p\n", flow->q); - flow->sock = sock; - flow->vcc = ATM_SD(sock); /* speedup */ - flow->vcc->user_back = flow; - pr_debug("atm_tc_change: vcc %p\n", flow->vcc); - flow->old_pop = flow->vcc->pop; - flow->parent = p; - flow->vcc->pop = sch_atm_pop; - flow->common.classid = classid; - flow->ref = 1; - flow->excess = excess; - list_add(&flow->list, &p->link.list); - flow->hdr_len = hdr_len; - if (hdr) - memcpy(flow->hdr, hdr, hdr_len); - else - memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip)); - *arg = (unsigned long)flow; - return 0; -err_out: - sockfd_put(sock); - return error; -} - -static int atm_tc_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - if (list_empty(&flow->list)) - return -EINVAL; - if (rcu_access_pointer(flow->filter_list) || flow == &p->link) - return -EBUSY; - /* - * Reference count must be 2: one for "keepalive" (set at class - * creation), and one for the reference held when calling delete. - */ - if (flow->ref < 2) { - pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref); - return -EINVAL; - } - if (flow->ref > 2) - return -EBUSY; /* catch references via excess, etc. */ - atm_tc_put(sch, arg); - return 0; -} - -static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); - if (walker->stop) - return; - list_for_each_entry(flow, &p->flows, list) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker)) - break; - } -} - -static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - - pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); - return flow ? flow->block : p->link.block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - struct tcf_result res; - int result; - int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - - pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - result = TC_ACT_OK; /* be nice to gcc */ - flow = NULL; - if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) { - struct tcf_proto *fl; - - list_for_each_entry(flow, &p->flows, list) { - fl = rcu_dereference_bh(flow->filter_list); - if (fl) { - result = tcf_classify(skb, NULL, fl, &res, true); - if (result < 0) - continue; - if (result == TC_ACT_SHOT) - goto done; - - flow = (struct atm_flow_data *)res.class; - if (!flow) - flow = lookup_flow(sch, res.classid); - goto drop; - } - } - flow = NULL; -done: - ; - } - if (!flow) { - flow = &p->link; - } else { - if (flow->vcc) - ATM_SKB(skb)->atm_options = flow->vcc->atm_options; - /*@@@ looks good ... but it's not supposed to work :-) */ -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - case TC_ACT_SHOT: - __qdisc_drop(skb, to_free); - goto drop; - case TC_ACT_RECLASSIFY: - if (flow->excess) - flow = flow->excess; - else - ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP; - break; - } -#endif - } - - ret = qdisc_enqueue(skb, flow->q, to_free); - if (ret != NET_XMIT_SUCCESS) { -drop: __maybe_unused - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - if (flow) - flow->qstats.drops++; - } - return ret; - } - /* - * Okay, this may seem weird. We pretend we've dropped the packet if - * it goes via ATM. The reason for this is that the outer qdisc - * expects to be able to q->dequeue the packet later on if we return - * success at this place. Also, sch->q.qdisc needs to reflect whether - * there is a packet egligible for dequeuing or not. Note that the - * statistics of the outer qdisc are necessarily wrong because of all - * this. There's currently no correct solution for this. - */ - if (flow == &p->link) { - sch->q.qlen++; - return NET_XMIT_SUCCESS; - } - tasklet_schedule(&p->task); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -/* - * Dequeue packets and send them over ATM. Note that we quite deliberately - * avoid checking net_device's flow control here, simply because sch_atm - * uses its own channels, which have nothing to do with any CLIP/LANE/or - * non-ATM interfaces. - */ - -static void sch_atm_dequeue(struct tasklet_struct *t) -{ - struct atm_qdisc_data *p = from_tasklet(p, t, task); - struct Qdisc *sch = qdisc_from_priv(p); - struct atm_flow_data *flow; - struct sk_buff *skb; - - pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - if (flow == &p->link) - continue; - /* - * If traffic is properly shaped, this won't generate nasty - * little bursts. Otherwise, it may ... (but that's okay) - */ - while ((skb = flow->q->ops->peek(flow->q))) { - if (!atm_may_send(flow->vcc, skb->truesize)) - break; - - skb = qdisc_dequeue_peeked(flow->q); - if (unlikely(!skb)) - break; - - qdisc_bstats_update(sch, skb); - bstats_update(&flow->bstats, skb); - pr_debug("atm_tc_dequeue: sending on class %p\n", flow); - /* remove any LL header somebody else has attached */ - skb_pull(skb, skb_network_offset(skb)); - if (skb_headroom(skb) < flow->hdr_len) { - struct sk_buff *new; - - new = skb_realloc_headroom(skb, flow->hdr_len); - dev_kfree_skb(skb); - if (!new) - continue; - skb = new; - } - pr_debug("sch_atm_dequeue: ip %p, data %p\n", - skb_network_header(skb), skb->data); - ATM_SKB(skb)->vcc = flow->vcc; - memcpy(skb_push(skb, flow->hdr_len), flow->hdr, - flow->hdr_len); - refcount_add(skb->truesize, - &sk_atm(flow->vcc)->sk_wmem_alloc); - /* atm.atm_options are already set by atm_tc_enqueue */ - flow->vcc->send(flow->vcc, skb); - } - } -} - -static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - - pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); - tasklet_schedule(&p->task); - skb = qdisc_dequeue_peeked(p->link.q); - if (skb) - sch->q.qlen--; - return skb; -} - -static struct sk_buff *atm_tc_peek(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - - pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p); - - return p->link.q->ops->peek(p->link.q); -} - -static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); - INIT_LIST_HEAD(&p->flows); - INIT_LIST_HEAD(&p->link.list); - gnet_stats_basic_sync_init(&p->link.bstats); - list_add(&p->link.list, &p->flows); - p->link.q = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, sch->handle, extack); - if (!p->link.q) - p->link.q = &noop_qdisc; - pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - p->link.vcc = NULL; - p->link.sock = NULL; - p->link.common.classid = sch->handle; - p->link.ref = 1; - - err = tcf_block_get(&p->link.block, &p->link.filter_list, sch, - extack); - if (err) - return err; - - tasklet_setup(&p->task, sch_atm_dequeue); - return 0; -} - -static void atm_tc_reset(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow; - - pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) - qdisc_reset(flow->q); -} - -static void atm_tc_destroy(struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow, *tmp; - - pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) { - tcf_block_put(flow->block); - flow->block = NULL; - } - - list_for_each_entry_safe(flow, tmp, &p->flows, list) { - if (flow->ref > 1) - pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref); - atm_tc_put(sch, (unsigned long)flow); - } - tasklet_kill(&p->task); -} - -static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - struct atm_flow_data *flow = (struct atm_flow_data *)cl; - struct nlattr *nest; - - pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", - sch, p, flow, skb, tcm); - if (list_empty(&flow->list)) - return -EINVAL; - tcm->tcm_handle = flow->common.classid; - tcm->tcm_info = flow->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - - if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr)) - goto nla_put_failure; - if (flow->vcc) { - struct sockaddr_atmpvc pvc; - int state; - - memset(&pvc, 0, sizeof(pvc)); - pvc.sap_family = AF_ATMPVC; - pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; - pvc.sap_addr.vpi = flow->vcc->vpi; - pvc.sap_addr.vci = flow->vcc->vci; - if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc)) - goto nla_put_failure; - state = ATM_VF2VS(flow->vcc->flags); - if (nla_put_u32(skb, TCA_ATM_STATE, state)) - goto nla_put_failure; - } - if (flow->excess) { - if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid)) - goto nla_put_failure; - } else { - if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) - goto nla_put_failure; - } - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} -static int -atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct atm_flow_data *flow = (struct atm_flow_data *)arg; - - if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 || - gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0) - return -1; - - return 0; -} - -static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - return 0; -} - -static const struct Qdisc_class_ops atm_class_ops = { - .graft = atm_tc_graft, - .leaf = atm_tc_leaf, - .find = atm_tc_find, - .change = atm_tc_change, - .delete = atm_tc_delete, - .walk = atm_tc_walk, - .tcf_block = atm_tc_tcf_block, - .bind_tcf = atm_tc_bind_filter, - .unbind_tcf = atm_tc_put, - .dump = atm_tc_dump_class, - .dump_stats = atm_tc_dump_class_stats, -}; - -static struct Qdisc_ops atm_qdisc_ops __read_mostly = { - .cl_ops = &atm_class_ops, - .id = "atm", - .priv_size = sizeof(struct atm_qdisc_data), - .enqueue = atm_tc_enqueue, - .dequeue = atm_tc_dequeue, - .peek = atm_tc_peek, - .init = atm_tc_init, - .reset = atm_tc_reset, - .destroy = atm_tc_destroy, - .dump = atm_tc_dump, - .owner = THIS_MODULE, -}; - -static int __init atm_init(void) -{ - return register_qdisc(&atm_qdisc_ops); -} - -static void __exit atm_exit(void) -{ - unregister_qdisc(&atm_qdisc_ops); -} - -module_init(atm_init) -module_exit(atm_exit) -MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c deleted file mode 100644 index 36db5f6782f2..000000000000 --- a/net/sched/sch_cbq.c +++ /dev/null @@ -1,1727 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * net/sched/sch_cbq.c Class-Based Queueing discipline. - * - * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> - */ - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <net/netlink.h> -#include <net/pkt_sched.h> -#include <net/pkt_cls.h> - - -/* Class-Based Queueing (CBQ) algorithm. - ======================================= - - Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource - Management Models for Packet Networks", - IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 - - [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995 - - [3] Sally Floyd, "Notes on Class-Based Queueing: Setting - Parameters", 1996 - - [4] Sally Floyd and Michael Speer, "Experimental Results - for Class-Based Queueing", 1998, not published. - - ----------------------------------------------------------------------- - - Algorithm skeleton was taken from NS simulator cbq.cc. - If someone wants to check this code against the LBL version, - he should take into account that ONLY the skeleton was borrowed, - the implementation is different. Particularly: - - --- The WRR algorithm is different. Our version looks more - reasonable (I hope) and works when quanta are allowed to be - less than MTU, which is always the case when real time classes - have small rates. Note, that the statement of [3] is - incomplete, delay may actually be estimated even if class - per-round allotment is less than MTU. Namely, if per-round - allotment is W*r_i, and r_1+...+r_k = r < 1 - - delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B - - In the worst case we have IntServ estimate with D = W*r+k*MTU - and C = MTU*r. The proof (if correct at all) is trivial. - - - --- It seems that cbq-2.0 is not very accurate. At least, I cannot - interpret some places, which look like wrong translations - from NS. Anyone is advised to find these differences - and explain to me, why I am wrong 8). - - --- Linux has no EOI event, so that we cannot estimate true class - idle time. Workaround is to consider the next dequeue event - as sign that previous packet is finished. This is wrong because of - internal device queueing, but on a permanently loaded link it is true. - Moreover, combined with clock integrator, this scheme looks - very close to an ideal solution. */ - -struct cbq_sched_data; - - -struct cbq_class { - struct Qdisc_class_common common; - struct cbq_class *next_alive; /* next class with backlog in this priority band */ - -/* Parameters */ - unsigned char priority; /* class priority */ - unsigned char priority2; /* priority to be used after overlimit */ - unsigned char ewma_log; /* time constant for idle time calculation */ - - u32 defmap; - - /* Link-sharing scheduler parameters */ - long maxidle; /* Class parameters: see below. */ - long offtime; - long minidle; - u32 avpkt; - struct qdisc_rate_table *R_tab; - - /* General scheduler (WRR) parameters */ - long allot; - long quantum; /* Allotment per WRR round */ - long weight; /* Relative allotment: see below */ - - struct Qdisc *qdisc; /* Ptr to CBQ discipline */ - struct cbq_class *split; /* Ptr to split node */ - struct cbq_class *share; /* Ptr to LS parent in the class tree */ - struct cbq_class *tparent; /* Ptr to tree parent in the class tree */ - struct cbq_class *borrow; /* NULL if class is bandwidth limited; - parent otherwise */ - struct cbq_class *sibling; /* Sibling chain */ - struct cbq_class *children; /* Pointer to children chain */ - - struct Qdisc *q; /* Elementary queueing discipline */ - - -/* Variables */ - unsigned char cpriority; /* Effective priority */ - unsigned char delayed; - unsigned char level; /* level of the class in hierarchy: - 0 for leaf classes, and maximal - level of children + 1 for nodes. - */ - - psched_time_t last; /* Last end of service */ - psched_time_t undertime; - long avgidle; - long deficit; /* Saved deficit for WRR */ - psched_time_t penalized; - struct gnet_stats_basic_sync bstats; - struct gnet_stats_queue qstats; - struct net_rate_estimator __rcu *rate_est; - struct tc_cbq_xstats xstats; - - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - - int filters; - - struct cbq_class *defaults[TC_PRIO_MAX + 1]; -}; - -struct cbq_sched_data { - struct Qdisc_class_hash clhash; /* Hash table of all classes */ - int nclasses[TC_CBQ_MAXPRIO + 1]; - unsigned int quanta[TC_CBQ_MAXPRIO + 1]; - - struct cbq_class link; - - unsigned int activemask; - struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes - with backlog */ - -#ifdef CONFIG_NET_CLS_ACT - struct cbq_class *rx_class; -#endif - struct cbq_class *tx_class; - struct cbq_class *tx_borrowed; - int tx_len; - psched_time_t now; /* Cached timestamp */ - unsigned int pmask; - - struct qdisc_watchdog watchdog; /* Watchdog timer, - started when CBQ has - backlog, but cannot - transmit just now */ - psched_tdiff_t wd_expires; - int toplevel; - u32 hgenerator; -}; - - -#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len) - -static inline struct cbq_class * -cbq_class_lookup(struct cbq_sched_data *q, u32 classid) -{ - struct Qdisc_class_common *clc; - - clc = qdisc_class_find(&q->clhash, classid); - if (clc == NULL) - return NULL; - return container_of(clc, struct cbq_class, common); -} - -#ifdef CONFIG_NET_CLS_ACT - -static struct cbq_class * -cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) -{ - struct cbq_class *cl; - - for (cl = this->tparent; cl; cl = cl->tparent) { - struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT]; - - if (new != NULL && new != this) - return new; - } - return NULL; -} - -#endif - -/* Classify packet. The procedure is pretty complicated, but - * it allows us to combine link sharing and priority scheduling - * transparently. - * - * Namely, you can put link sharing rules (f.e. route based) at root of CBQ, - * so that it resolves to split nodes. Then packets are classified - * by logical priority, or a more specific classifier may be attached - * to the split node. - */ - -static struct cbq_class * -cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *head = &q->link; - struct cbq_class **defmap; - struct cbq_class *cl = NULL; - u32 prio = skb->priority; - struct tcf_proto *fl; - struct tcf_result res; - - /* - * Step 1. If skb->priority points to one of our classes, use it. - */ - if (TC_H_MAJ(prio ^ sch->handle) == 0 && - (cl = cbq_class_lookup(q, prio)) != NULL) - return cl; - - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - for (;;) { - int result = 0; - defmap = head->defaults; - - fl = rcu_dereference_bh(head->filter_list); - /* - * Step 2+n. Apply classifier. - */ - result = tcf_classify(skb, NULL, fl, &res, true); - if (!fl || result < 0) - goto fallback; - if (result == TC_ACT_SHOT) - return NULL; - - cl = (void *)res.class; - if (!cl) { - if (TC_H_MAJ(res.classid)) - cl = cbq_class_lookup(q, res.classid); - else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) - cl = defmap[TC_PRIO_BESTEFFORT]; - - if (cl == NULL) - goto fallback; - } - if (cl->level >= head->level) - goto fallback; -#ifdef CONFIG_NET_CLS_ACT - switch (result) { - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - fallthrough; - case TC_ACT_RECLASSIFY: - return cbq_reclassify(skb, cl); - } -#endif - if (cl->level == 0) - return cl; - - /* - * Step 3+n. If classifier selected a link sharing class, - * apply agency specific classifier. - * Repeat this procedure until we hit a leaf node. - */ - head = cl; - } - -fallback: - cl = head; - - /* - * Step 4. No success... - */ - if (TC_H_MAJ(prio) == 0 && - !(cl = head->defaults[prio & TC_PRIO_MAX]) && - !(cl = head->defaults[TC_PRIO_BESTEFFORT])) - return head; - - return cl; -} - -/* - * A packet has just been enqueued on the empty class. - * cbq_activate_class adds it to the tail of active class list - * of its priority band. - */ - -static inline void cbq_activate_class(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - int prio = cl->cpriority; - struct cbq_class *cl_tail; - - cl_tail = q->active[prio]; - q->active[prio] = cl; - - if (cl_tail != NULL) { - cl->next_alive = cl_tail->next_alive; - cl_tail->next_alive = cl; - } else { - cl->next_alive = cl; - q->activemask |= (1<<prio); - } -} - -/* - * Unlink class from active chain. - * Note that this same procedure is done directly in cbq_dequeue* - * during round-robin procedure. - */ - -static void cbq_deactivate_class(struct cbq_class *this) -{ - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - int prio = this->cpriority; - struct cbq_class *cl; - struct cbq_class *cl_prev = q->active[prio]; - - do { - cl = cl_prev->next_alive; - if (cl == this) { - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - if (cl == q->active[prio]) { - q->active[prio] = cl_prev; - if (cl == q->active[prio]) { - q->active[prio] = NULL; - q->activemask &= ~(1<<prio); - return; - } - } - return; - } - } while ((cl_prev = cl) != q->active[prio]); -} - -static void -cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) -{ - int toplevel = q->toplevel; - - if (toplevel > cl->level) { - psched_time_t now = psched_get_time(); - - do { - if (cl->undertime < now) { - q->toplevel = cl->level; - return; - } - } while ((cl = cl->borrow) != NULL && toplevel > cl->level); - } -} - -static int -cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - int ret; - struct cbq_class *cl = cbq_classify(skb, sch, &ret); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; -#endif - if (cl == NULL) { - if (ret & __NET_XMIT_BYPASS) - qdisc_qstats_drop(sch); - __qdisc_drop(skb, to_free); - return ret; - } - - ret = qdisc_enqueue(skb, cl->q, to_free); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - cbq_mark_toplevel(q, cl); - if (!cl->next_alive) - cbq_activate_class(cl); - return ret; - } - - if (net_xmit_drop_count(ret)) { - qdisc_qstats_drop(sch); - cbq_mark_toplevel(q, cl); - cl->qstats.drops++; - } - return ret; -} - -/* Overlimit action: penalize leaf class by adding offtime */ -static void cbq_overlimit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - psched_tdiff_t delay = cl->undertime - q->now; - - if (!cl->delayed) { - delay += cl->offtime; - - /* - * Class goes to sleep, so that it will have no - * chance to work avgidle. Let's forgive it 8) - * - * BTW cbq-2.0 has a crap in this - * place, apparently they forgot to shift it by cl->ewma_log. - */ - if (cl->avgidle < 0) - delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); - if (cl->avgidle < cl->minidle) - cl->avgidle = cl->minidle; - if (delay <= 0) - delay = 1; - cl->undertime = q->now + delay; - - cl->xstats.overactions++; - cl->delayed = 1; - } - if (q->wd_expires == 0 || q->wd_expires > delay) - q->wd_expires = delay; - - /* Dirty work! We must schedule wakeups based on - * real available rate, rather than leaf rate, - * which may be tiny (even zero). - */ - if (q->toplevel == TC_CBQ_MAXLEVEL) { - struct cbq_class *b; - psched_tdiff_t base_delay = q->wd_expires; - - for (b = cl->borrow; b; b = b->borrow) { - delay = b->undertime - q->now; - if (delay < base_delay) { - if (delay <= 0) - delay = 1; - base_delay = delay; - } - } - - q->wd_expires = base_delay; - } -} - -/* - * It is mission critical procedure. - * - * We "regenerate" toplevel cutoff, if transmitting class - * has backlog and it is not regulated. It is not part of - * original CBQ description, but looks more reasonable. - * Probably, it is wrong. This question needs further investigation. - */ - -static inline void -cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, - struct cbq_class *borrowed) -{ - if (cl && q->toplevel >= borrowed->level) { - if (cl->q->q.qlen > 1) { - do { - if (borrowed->undertime == PSCHED_PASTPERFECT) { - q->toplevel = borrowed->level; - return; - } - } while ((borrowed = borrowed->borrow) != NULL); - } -#if 0 - /* It is not necessary now. Uncommenting it - will save CPU cycles, but decrease fairness. - */ - q->toplevel = TC_CBQ_MAXLEVEL; -#endif - } -} - -static void -cbq_update(struct cbq_sched_data *q) -{ - struct cbq_class *this = q->tx_class; - struct cbq_class *cl = this; - int len = q->tx_len; - psched_time_t now; - - q->tx_class = NULL; - /* Time integrator. We calculate EOS time - * by adding expected packet transmission time. - */ - now = q->now + L2T(&q->link, len); - - for ( ; cl; cl = cl->share) { - long avgidle = cl->avgidle; - long idle; - - _bstats_update(&cl->bstats, len, 1); - - /* - * (now - last) is total time between packet right edges. - * (last_pktlen/rate) is "virtual" busy time, so that - * - * idle = (now - last) - last_pktlen/rate - */ - - idle = now - cl->last; - if ((unsigned long)idle > 128*1024*1024) { - avgidle = cl->maxidle; - } else { - idle -= L2T(cl, len); - - /* true_avgidle := (1-W)*true_avgidle + W*idle, - * where W=2^{-ewma_log}. But cl->avgidle is scaled: - * cl->avgidle == true_avgidle/W, - * hence: - */ - avgidle += idle - (avgidle>>cl->ewma_log); - } - - if (avgidle <= 0) { - /* Overlimit or at-limit */ - - if (avgidle < cl->minidle) - avgidle = cl->minidle; - - cl->avgidle = avgidle; - - /* Calculate expected time, when this class - * will be allowed to send. - * It will occur, when: - * (1-W)*true_avgidle + W*delay = 0, i.e. - * idle = (1/W - 1)*(-true_avgidle) - * or - * idle = (1 - W)*(-cl->avgidle); - */ - idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); - - /* - * That is not all. - * To maintain the rate allocated to the class, - * we add to undertime virtual clock, - * necessary to complete transmitted packet. - * (len/phys_bandwidth has been already passed - * to the moment of cbq_update) - */ - - idle -= L2T(&q->link, len); - idle += L2T(cl, len); - - cl->undertime = now + idle; - } else { - /* Underlimit */ - - cl->undertime = PSCHED_PASTPERFECT; - if (avgidle > cl->maxidle) - cl->avgidle = cl->maxidle; - else - cl->avgidle = avgidle; - } - if ((s64)(now - cl->last) > 0) - cl->last = now; - } - - cbq_update_toplevel(q, this, q->tx_borrowed); -} - -static inline struct cbq_class * -cbq_under_limit(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *this_cl = cl; - - if (cl->tparent == NULL) - return cl; - - if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) { - cl->delayed = 0; - return cl; - } - - do { - /* It is very suspicious place. Now overlimit - * action is generated for not bounded classes - * only if link is completely congested. - * Though it is in agree with ancestor-only paradigm, - * it looks very stupid. Particularly, - * it means that this chunk of code will either - * never be called or result in strong amplification - * of burstiness. Dangerous, silly, and, however, - * no another solution exists. - */ - cl = cl->borrow; - if (!cl) { - this_cl->qstats.overlimits++; - cbq_overlimit(this_cl); - return NULL; - } - if (cl->level > q->toplevel) - return NULL; - } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime); - - cl->delayed = 0; - return cl; -} - -static inline struct sk_buff * -cbq_dequeue_prio(struct Qdisc *sch, int prio) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl_tail, *cl_prev, *cl; - struct sk_buff *skb; - int deficit; - - cl_tail = cl_prev = q->active[prio]; - cl = cl_prev->next_alive; - - do { - deficit = 0; - - /* Start round */ - do { - struct cbq_class *borrow = cl; - - if (cl->q->q.qlen && - (borrow = cbq_under_limit(cl)) == NULL) - goto skip_class; - - if (cl->deficit <= 0) { - /* Class exhausted its allotment per - * this round. Switch to the next one. - */ - deficit = 1; - cl->deficit += cl->quantum; - goto next_class; - } - - skb = cl->q->dequeue(cl->q); - - /* Class did not give us any skb :-( - * It could occur even if cl->q->q.qlen != 0 - * f.e. if cl->q == "tbf" - */ - if (skb == NULL) - goto skip_class; - - cl->deficit -= qdisc_pkt_len(skb); - q->tx_class = cl; - q->tx_borrowed = borrow; - if (borrow != cl) { -#ifndef CBQ_XSTATS_BORROWS_BYTES - borrow->xstats.borrows++; - cl->xstats.borrows++; -#else - borrow->xstats.borrows += qdisc_pkt_len(skb); - cl->xstats.borrows += qdisc_pkt_len(skb); -#endif - } - q->tx_len = qdisc_pkt_len(skb); - - if (cl->deficit <= 0) { - q->active[prio] = cl; - cl = cl->next_alive; - cl->deficit += cl->quantum; - } - return skb; - -skip_class: - if (cl->q->q.qlen == 0 || prio != cl->cpriority) { - /* Class is empty or penalized. - * Unlink it from active chain. - */ - cl_prev->next_alive = cl->next_alive; - cl->next_alive = NULL; - - /* Did cl_tail point to it? */ - if (cl == cl_tail) { - /* Repair it! */ - cl_tail = cl_prev; - - /* Was it the last class in this band? */ - if (cl == cl_tail) { - /* Kill the band! */ - q->active[prio] = NULL; - q->activemask &= ~(1<<prio); - if (cl->q->q.qlen) - cbq_activate_class(cl); - return NULL; - } - - q->active[prio] = cl_tail; - } - if (cl->q->q.qlen) - cbq_activate_class(cl); - - cl = cl_prev; - } - -next_class: - cl_prev = cl; - cl = cl->next_alive; - } while (cl_prev != cl_tail); - } while (deficit); - - q->active[prio] = cl_prev; - - return NULL; -} - -static inline struct sk_buff * -cbq_dequeue_1(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct sk_buff *skb; - unsigned int activemask; - - activemask = q->activemask & 0xFF; - while (activemask) { - int prio = ffz(~activemask); - activemask &= ~(1<<prio); - skb = cbq_dequeue_prio(sch, prio); - if (skb) - return skb; - } - return NULL; -} - -static struct sk_buff * -cbq_dequeue(struct Qdisc *sch) -{ - struct sk_buff *skb; - struct cbq_sched_data *q = qdisc_priv(sch); - psched_time_t now; - - now = psched_get_time(); - - if (q->tx_class) - cbq_update(q); - - q->now = now; - - for (;;) { - q->wd_expires = 0; - - skb = cbq_dequeue_1(sch); - if (skb) { - qdisc_bstats_update(sch, skb); - sch->q.qlen--; - return skb; - } - - /* All the classes are overlimit. - * - * It is possible, if: - * - * 1. Scheduler is empty. - * 2. Toplevel cutoff inhibited borrowing. - * 3. Root class is overlimit. - * - * Reset 2d and 3d conditions and retry. - * - * Note, that NS and cbq-2.0 are buggy, peeking - * an arbitrary class is appropriate for ancestor-only - * sharing, but not for toplevel algorithm. - * - * Our version is better, but slower, because it requires - * two passes, but it is unavoidable with top-level sharing. - */ - - if (q->toplevel == TC_CBQ_MAXLEVEL && - q->link.undertime == PSCHED_PASTPERFECT) - break; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->link.undertime = PSCHED_PASTPERFECT; - } - - /* No packets in scheduler or nobody wants to give them to us :-( - * Sigh... start watchdog timer in the last case. - */ - - if (sch->q.qlen) { - qdisc_qstats_overlimit(sch); - if (q->wd_expires) - qdisc_watchdog_schedule(&q->watchdog, - now + q->wd_expires); - } - return NULL; -} - -/* CBQ class maintenance routines */ - -static void cbq_adjust_levels(struct cbq_class *this) -{ - if (this == NULL) - return; - - do { - int level = 0; - struct cbq_class *cl; - - cl = this->children; - if (cl) { - do { - if (cl->level > level) - level = cl->level; - } while ((cl = cl->sibling) != this->children); - } - this->level = level + 1; - } while ((this = this->tparent) != NULL); -} - -static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) -{ - struct cbq_class *cl; - unsigned int h; - - if (q->quanta[prio] == 0) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - /* BUGGGG... Beware! This expression suffer of - * arithmetic overflows! - */ - if (cl->priority == prio) { - cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ - q->quanta[prio]; - } - if (cl->quantum <= 0 || - cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) { - pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n", - cl->common.classid, cl->quantum); - cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; - } - } - } -} - -static void cbq_sync_defmap(struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - struct cbq_class *split = cl->split; - unsigned int h; - int i; - - if (split == NULL) - return; - - for (i = 0; i <= TC_PRIO_MAX; i++) { - if (split->defaults[i] == cl && !(cl->defmap & (1<<i))) - split->defaults[i] = NULL; - } - - for (i = 0; i <= TC_PRIO_MAX; i++) { - int level = split->level; - - if (split->defaults[i]) - continue; - - for (h = 0; h < q->clhash.hashsize; h++) { - struct cbq_class *c; - - hlist_for_each_entry(c, &q->clhash.hash[h], - common.hnode) { - if (c->split == split && c->level < level && - c->defmap & (1<<i)) { - split->defaults[i] = c; - level = c->level; - } - } - } - } -} - -static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask) -{ - struct cbq_class *split = NULL; - - if (splitid == 0) { - split = cl->split; - if (!split) - return; - splitid = split->common.classid; - } - - if (split == NULL || split->common.classid != splitid) { - for (split = cl->tparent; split; split = split->tparent) - if (split->common.classid == splitid) - break; - } - - if (split == NULL) - return; - - if (cl->split != split) { - cl->defmap = 0; - cbq_sync_defmap(cl); - cl->split = split; - cl->defmap = def & mask; - } else - cl->defmap = (cl->defmap & ~mask) | (def & mask); - - cbq_sync_defmap(cl); -} - -static void cbq_unlink_class(struct cbq_class *this) -{ - struct cbq_class *cl, **clp; - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - - qdisc_class_hash_remove(&q->clhash, &this->common); - - if (this->tparent) { - clp = &this->sibling; - cl = *clp; - do { - if (cl == this) { - *clp = cl->sibling; - break; - } - clp = &cl->sibling; - } while ((cl = *clp) != this->sibling); - - if (this->tparent->children == this) { - this->tparent->children = this->sibling; - if (this->sibling == this) - this->tparent->children = NULL; - } - } else { - WARN_ON(this->sibling != this); - } -} - -static void cbq_link_class(struct cbq_class *this) -{ - struct cbq_sched_data *q = qdisc_priv(this->qdisc); - struct cbq_class *parent = this->tparent; - - this->sibling = this; - qdisc_class_hash_insert(&q->clhash, &this->common); - - if (parent == NULL) - return; - - if (parent->children == NULL) { - parent->children = this; - } else { - this->sibling = parent->children->sibling; - parent->children->sibling = this; - } -} - -static void -cbq_reset(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int prio; - unsigned int h; - - q->activemask = 0; - q->pmask = 0; - q->tx_class = NULL; - q->tx_borrowed = NULL; - qdisc_watchdog_cancel(&q->watchdog); - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++) - q->active[prio] = NULL; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - qdisc_reset(cl->q); - - cl->next_alive = NULL; - cl->undertime = PSCHED_PASTPERFECT; - cl->avgidle = cl->maxidle; - cl->deficit = cl->quantum; - cl->cpriority = cl->priority; - } - } -} - - -static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) -{ - if (lss->change & TCF_CBQ_LSS_FLAGS) { - cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; - cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; - } - if (lss->change & TCF_CBQ_LSS_EWMA) - cl->ewma_log = lss->ewma_log; - if (lss->change & TCF_CBQ_LSS_AVPKT) - cl->avpkt = lss->avpkt; - if (lss->change & TCF_CBQ_LSS_MINIDLE) - cl->minidle = -(long)lss->minidle; - if (lss->change & TCF_CBQ_LSS_MAXIDLE) { - cl->maxidle = lss->maxidle; - cl->avgidle = lss->maxidle; - } - if (lss->change & TCF_CBQ_LSS_OFFTIME) - cl->offtime = lss->offtime; -} - -static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]--; - q->quanta[cl->priority] -= cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl) -{ - q->nclasses[cl->priority]++; - q->quanta[cl->priority] += cl->weight; - cbq_normalize_quanta(q, cl->priority); -} - -static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) -{ - struct cbq_sched_data *q = qdisc_priv(cl->qdisc); - - if (wrr->allot) - cl->allot = wrr->allot; - if (wrr->weight) - cl->weight = wrr->weight; - if (wrr->priority) { - cl->priority = wrr->priority - 1; - cl->cpriority = cl->priority; - if (cl->priority >= cl->priority2) - cl->priority2 = TC_CBQ_MAXPRIO - 1; - } - - cbq_addprio(q, cl); - return 0; -} - -static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt) -{ - cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange); - return 0; -} - -static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = { - [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) }, - [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) }, - [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) }, - [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) }, - [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) }, - [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, - [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) }, -}; - -static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], - struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - int err; - - if (!opt) { - NL_SET_ERR_MSG(extack, "CBQ options are required for this operation"); - return -EINVAL; - } - - err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt, - cbq_policy, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_WRROPT]) { - const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]); - - if (wrr->priority > TC_CBQ_MAXPRIO) { - NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO"); - err = -EINVAL; - } - } - return err; -} - -static int cbq_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct tc_ratespec *r; - int err; - - qdisc_watchdog_init(&q->watchdog, sch); - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) { - NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete"); - return -EINVAL; - } - - r = nla_data(tb[TCA_CBQ_RATE]); - - q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack); - if (!q->link.R_tab) - return -EINVAL; - - err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack); - if (err) - goto put_rtab; - - err = qdisc_class_hash_init(&q->clhash); - if (err < 0) - goto put_block; - - q->link.sibling = &q->link; - q->link.common.classid = sch->handle; - q->link.qdisc = sch; - q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (!q->link.q) - q->link.q = &noop_qdisc; - else - qdisc_hash_add(q->link.q, true); - - q->link.priority = TC_CBQ_MAXPRIO - 1; - q->link.priority2 = TC_CBQ_MAXPRIO - 1; - q->link.cpriority = TC_CBQ_MAXPRIO - 1; - q->link.allot = psched_mtu(qdisc_dev(sch)); - q->link.quantum = q->link.allot; - q->link.weight = q->link.R_tab->rate.rate; - - q->link.ewma_log = TC_CBQ_DEF_EWMA; - q->link.avpkt = q->link.allot/2; - q->link.minidle = -0x7FFFFFFF; - - q->toplevel = TC_CBQ_MAXLEVEL; - q->now = psched_get_time(); - - cbq_link_class(&q->link); - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT])); - - cbq_addprio(q, &q->link); - return 0; - -put_block: - tcf_block_put(q->link.block); - -put_rtab: - qdisc_put_rtab(q->link.R_tab); - return err; -} - -static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - - if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_lssopt opt; - - opt.flags = 0; - if (cl->borrow == NULL) - opt.flags |= TCF_CBQ_LSS_BOUNDED; - if (cl->share == NULL) - opt.flags |= TCF_CBQ_LSS_ISOLATED; - opt.ewma_log = cl->ewma_log; - opt.level = cl->level; - opt.avpkt = cl->avpkt; - opt.maxidle = cl->maxidle; - opt.minidle = (u32)(-cl->minidle); - opt.offtime = cl->offtime; - opt.change = ~0; - if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_wrropt opt; - - memset(&opt, 0, sizeof(opt)); - opt.flags = 0; - opt.allot = cl->allot; - opt.priority = cl->priority + 1; - opt.cpriority = cl->cpriority + 1; - opt.weight = cl->weight; - if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt)) - goto nla_put_failure; - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) -{ - unsigned char *b = skb_tail_pointer(skb); - struct tc_cbq_fopt opt; - - if (cl->split || cl->defmap) { - opt.split = cl->split ? cl->split->common.classid : 0; - opt.defmap = cl->defmap; - opt.defchange = ~0; - if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt)) - goto nla_put_failure; - } - return skb->len; - -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl) -{ - if (cbq_dump_lss(skb, cl) < 0 || - cbq_dump_rate(skb, cl) < 0 || - cbq_dump_wrr(skb, cl) < 0 || - cbq_dump_fopt(skb, cl) < 0) - return -1; - return 0; -} - -static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct nlattr *nest; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, &q->link) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - q->link.xstats.avgidle = q->link.avgidle; - return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats)); -} - -static int -cbq_dump_class(struct Qdisc *sch, unsigned long arg, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - struct nlattr *nest; - - if (cl->tparent) - tcm->tcm_parent = cl->tparent->common.classid; - else - tcm->tcm_parent = TC_H_ROOT; - tcm->tcm_handle = cl->common.classid; - tcm->tcm_info = cl->q->handle; - - nest = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (nest == NULL) - goto nla_put_failure; - if (cbq_dump_attr(skb, cl) < 0) - goto nla_put_failure; - return nla_nest_end(skb, nest); - -nla_put_failure: - nla_nest_cancel(skb, nest); - return -1; -} - -static int -cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, - struct gnet_dump *d) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - __u32 qlen; - - cl->xstats.avgidle = cl->avgidle; - cl->xstats.undertime = 0; - qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog); - - if (cl->undertime != PSCHED_PASTPERFECT) - cl->xstats.undertime = cl->undertime - q->now; - - if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || - gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || - gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) - return -1; - - return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); -} - -static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old, struct netlink_ext_ack *extack) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - cl->common.classid, extack); - if (new == NULL) - return -ENOBUFS; - } - - *old = qdisc_replace(sch, new, &cl->q); - return 0; -} - -static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - return cl->q; -} - -static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cbq_deactivate_class(cl); -} - -static unsigned long cbq_find(struct Qdisc *sch, u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - return (unsigned long)cbq_class_lookup(q, classid); -} - -static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - - WARN_ON(cl->filters); - - tcf_block_put(cl->block); - qdisc_put(cl->q); - qdisc_put_rtab(cl->R_tab); - gen_kill_estimator(&cl->rate_est); - if (cl != &q->link) - kfree(cl); -} - -static void cbq_destroy(struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct hlist_node *next; - struct cbq_class *cl; - unsigned int h; - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = NULL; -#endif - /* - * Filters must be destroyed first because we don't destroy the - * classes from root to leafs which means that filters can still - * be bound to classes which have been destroyed already. --TGR '04 - */ - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - tcf_block_put(cl->block); - cl->block = NULL; - } - } - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], - common.hnode) - cbq_destroy_class(sch, cl); - } - qdisc_class_hash_destroy(&q->clhash); -} - -static int -cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, - unsigned long *arg, struct netlink_ext_ack *extack) -{ - int err; - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)*arg; - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_CBQ_MAX + 1]; - struct cbq_class *parent; - struct qdisc_rate_table *rtab = NULL; - - err = cbq_opt_parse(tb, opt, extack); - if (err < 0) - return err; - - if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) { - NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params"); - return -EOPNOTSUPP; - } - - if (cl) { - /* Check parent */ - if (parentid) { - if (cl->tparent && - cl->tparent->common.classid != parentid) { - NL_SET_ERR_MSG(extack, "Invalid parent id"); - return -EINVAL; - } - if (!cl->tparent && parentid != TC_H_ROOT) { - NL_SET_ERR_MSG(extack, "Parent must be root"); - return -EINVAL; - } - } - - if (tb[TCA_CBQ_RATE]) { - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), - tb[TCA_CBQ_RTAB], extack); - if (rtab == NULL) - return -EINVAL; - } - - if (tca[TCA_RATE]) { - err = gen_replace_estimator(&cl->bstats, NULL, - &cl->rate_est, - NULL, - true, - tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator"); - qdisc_put_rtab(rtab); - return err; - } - } - - /* Change class parameters */ - sch_tree_lock(sch); - - if (cl->next_alive != NULL) - cbq_deactivate_class(cl); - - if (rtab) { - qdisc_put_rtab(cl->R_tab); - cl->R_tab = rtab; - } - - if (tb[TCA_CBQ_LSSOPT]) - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - - if (tb[TCA_CBQ_WRROPT]) { - cbq_rmprio(q, cl); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - } - - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - - if (cl->q->q.qlen) - cbq_activate_class(cl); - - sch_tree_unlock(sch); - - return 0; - } - - if (parentid == TC_H_ROOT) - return -EINVAL; - - if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) { - NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing"); - return -EINVAL; - } - - rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB], - extack); - if (rtab == NULL) - return -EINVAL; - - if (classid) { - err = -EINVAL; - if (TC_H_MAJ(classid ^ sch->handle) || - cbq_class_lookup(q, classid)) { - NL_SET_ERR_MSG(extack, "Specified class not found"); - goto failure; - } - } else { - int i; - classid = TC_H_MAKE(sch->handle, 0x8000); - - for (i = 0; i < 0x8000; i++) { - if (++q->hgenerator >= 0x8000) - q->hgenerator = 1; - if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) - break; - } - err = -ENOSR; - if (i >= 0x8000) { - NL_SET_ERR_MSG(extack, "Unable to generate classid"); - goto failure; - } - classid = classid|q->hgenerator; - } - - parent = &q->link; - if (parentid) { - parent = cbq_class_lookup(q, parentid); - err = -EINVAL; - if (!parent) { - NL_SET_ERR_MSG(extack, "Failed to find parentid"); - goto failure; - } - } - - err = -ENOBUFS; - cl = kzalloc(sizeof(*cl), GFP_KERNEL); - if (cl == NULL) - goto failure; - - gnet_stats_basic_sync_init(&cl->bstats); - err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); - if (err) { - kfree(cl); - goto failure; - } - - if (tca[TCA_RATE]) { - err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, - NULL, true, tca[TCA_RATE]); - if (err) { - NL_SET_ERR_MSG(extack, "Couldn't create new estimator"); - tcf_block_put(cl->block); - kfree(cl); - goto failure; - } - } - - cl->R_tab = rtab; - rtab = NULL; - cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, - NULL); - if (!cl->q) - cl->q = &noop_qdisc; - else - qdisc_hash_add(cl->q, true); - - cl->common.classid = classid; - cl->tparent = parent; - cl->qdisc = sch; - cl->allot = parent->allot; - cl->quantum = cl->allot; - cl->weight = cl->R_tab->rate.rate; - - sch_tree_lock(sch); - cbq_link_class(cl); - cl->borrow = cl->tparent; - if (cl->tparent != &q->link) - cl->share = cl->tparent; - cbq_adjust_levels(parent); - cl->minidle = -0x7FFFFFFF; - cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); - cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); - if (cl->ewma_log == 0) - cl->ewma_log = q->link.ewma_log; - if (cl->maxidle == 0) - cl->maxidle = q->link.maxidle; - if (cl->avpkt == 0) - cl->avpkt = q->link.avpkt; - if (tb[TCA_CBQ_FOPT]) - cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT])); - sch_tree_unlock(sch); - - qdisc_class_hash_grow(sch, &q->clhash); - - *arg = (unsigned long)cl; - return 0; - -failure: - qdisc_put_rtab(rtab); - return err; -} - -static int cbq_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl->filters || cl->children || cl == &q->link) - return -EBUSY; - - sch_tree_lock(sch); - - qdisc_purge_queue(cl->q); - - if (cl->next_alive) - cbq_deactivate_class(cl); - - if (q->tx_borrowed == cl) - q->tx_borrowed = q->tx_class; - if (q->tx_class == cl) { - q->tx_class = NULL; - q->tx_borrowed = NULL; - } -#ifdef CONFIG_NET_CLS_ACT - if (q->rx_class == cl) - q->rx_class = NULL; -#endif - - cbq_unlink_class(cl); - cbq_adjust_levels(cl->tparent); - cl->defmap = 0; - cbq_sync_defmap(cl); - - cbq_rmprio(q, cl); - sch_tree_unlock(sch); - - cbq_destroy_class(sch, cl); - return 0; -} - -static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = (struct cbq_class *)arg; - - if (cl == NULL) - cl = &q->link; - - return cl->block; -} - -static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, - u32 classid) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *p = (struct cbq_class *)parent; - struct cbq_class *cl = cbq_class_lookup(q, classid); - - if (cl) { - if (p && p->level <= cl->level) - return 0; - cl->filters++; - return (unsigned long)cl; - } - return 0; -} - -static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - cl->filters--; -} - -static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - unsigned int h; - - if (arg->stop) - return; - - for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) - return; - } - } -} - -static const struct Qdisc_class_ops cbq_class_ops = { - .graft = cbq_graft, - .leaf = cbq_leaf, - .qlen_notify = cbq_qlen_notify, - .find = cbq_find, - .change = cbq_change_class, - .delete = cbq_delete, - .walk = cbq_walk, - .tcf_block = cbq_tcf_block, - .bind_tcf = cbq_bind_filter, - .unbind_tcf = cbq_unbind_filter, - .dump = cbq_dump_class, - .dump_stats = cbq_dump_class_stats, -}; - -static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &cbq_class_ops, - .id = "cbq", - .priv_size = sizeof(struct cbq_sched_data), - .enqueue = cbq_enqueue, - .dequeue = cbq_dequeue, - .peek = qdisc_peek_dequeued, - .init = cbq_init, - .reset = cbq_reset, - .destroy = cbq_destroy, - .change = NULL, - .dump = cbq_dump, - .dump_stats = cbq_dump_stats, - .owner = THIS_MODULE, -}; - -static int __init cbq_module_init(void) -{ - return register_qdisc(&cbq_qdisc_ops); -} -static void __exit cbq_module_exit(void) -{ - unregister_qdisc(&cbq_qdisc_ops); -} -module_init(cbq_module_init) -module_exit(cbq_module_exit) -MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c deleted file mode 100644 index 401ffaf87d62..000000000000 --- a/net/sched/sch_dsmark.c +++ /dev/null @@ -1,518 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* net/sched/sch_dsmark.c - Differentiated Services field marker */ - -/* Written 1998-2000 by Werner Almesberger, EPFL ICA */ - - -#include <linux/module.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/types.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/skbuff.h> -#include <linux/rtnetlink.h> -#include <linux/bitops.h> -#include <net/pkt_sched.h> -#include <net/pkt_cls.h> -#include <net/dsfield.h> -#include <net/inet_ecn.h> -#include <asm/byteorder.h> - -/* - * classid class marking - * ------- ----- ------- - * n/a 0 n/a - * x:0 1 use entry [0] - * ... ... ... - * x:y y>0 y+1 use entry [y] - * ... ... ... - * x:indices-1 indices use entry [indices-1] - * ... ... ... - * x:y y+1 use entry [y & (indices-1)] - * ... ... ... - * 0xffff 0x10000 use entry [indices-1] - */ - - -#define NO_DEFAULT_INDEX (1 << 16) - -struct mask_value { - u8 mask; - u8 value; -}; - -struct dsmark_qdisc_data { - struct Qdisc *q; - struct tcf_proto __rcu *filter_list; - struct tcf_block *block; - struct mask_value *mv; - u16 indices; - u8 set_tc_index; - u32 default_index; /* index range is 0...0xffff */ -#define DSMARK_EMBEDDED_SZ 16 - struct mask_value embedded[DSMARK_EMBEDDED_SZ]; -}; - -static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index) -{ - return index <= p->indices && index > 0; -} - -/* ------------------------- Class/flow operations ------------------------- */ - -static int dsmark_graft(struct Qdisc *sch, unsigned long arg, - struct Qdisc *new, struct Qdisc **old, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n", - __func__, sch, p, new, old); - - if (new == NULL) { - new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - sch->handle, NULL); - if (new == NULL) - new = &noop_qdisc; - } - - *old = qdisc_replace(sch, new, &p->q); - return 0; -} - -static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - return p->q; -} - -static unsigned long dsmark_find(struct Qdisc *sch, u32 classid) -{ - return TC_H_MIN(classid) + 1; -} - -static unsigned long dsmark_bind_filter(struct Qdisc *sch, - unsigned long parent, u32 classid) -{ - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", - __func__, sch, qdisc_priv(sch), classid); - - return dsmark_find(sch, classid); -} - -static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl) -{ -} - -static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = { - [TCA_DSMARK_INDICES] = { .type = NLA_U16 }, - [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 }, - [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG }, - [TCA_DSMARK_MASK] = { .type = NLA_U8 }, - [TCA_DSMARK_VALUE] = { .type = NLA_U8 }, -}; - -static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - - pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n", - __func__, sch, p, classid, parent, *arg); - - if (!dsmark_valid_index(p, *arg)) { - err = -ENOENT; - goto errout; - } - - if (!opt) - goto errout; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - if (tb[TCA_DSMARK_VALUE]) - p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]); - - if (tb[TCA_DSMARK_MASK]) - p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]); - - err = 0; - -errout: - return err; -} - -static int dsmark_delete(struct Qdisc *sch, unsigned long arg, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - if (!dsmark_valid_index(p, arg)) - return -EINVAL; - - p->mv[arg - 1].mask = 0xff; - p->mv[arg - 1].value = 0; - - return 0; -} - -static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int i; - - pr_debug("%s(sch %p,[qdisc %p],walker %p)\n", - __func__, sch, p, walker); - - if (walker->stop) - return; - - for (i = 0; i < p->indices; i++) { - if (p->mv[i].mask == 0xff && !p->mv[i].value) { - walker->count++; - continue; - } - if (!tc_qdisc_stats_dump(sch, i + 1, walker)) - break; - } -} - -static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - return p->block; -} - -/* --------------------------- Qdisc operations ---------------------------- */ - -static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) -{ - unsigned int len = qdisc_pkt_len(skb); - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p); - - if (p->set_tc_index) { - int wlen = skb_network_offset(skb); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - wlen += sizeof(struct iphdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv4_get_dsfield(ip_hdr(skb)) - & ~INET_ECN_MASK; - break; - - case htons(ETH_P_IPV6): - wlen += sizeof(struct ipv6hdr); - if (!pskb_may_pull(skb, wlen) || - skb_try_make_writable(skb, wlen)) - goto drop; - - skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb)) - & ~INET_ECN_MASK; - break; - default: - skb->tc_index = 0; - break; - } - } - - if (TC_H_MAJ(skb->priority) == sch->handle) - skb->tc_index = TC_H_MIN(skb->priority); - else { - struct tcf_result res; - struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); - int result = tcf_classify(skb, NULL, fl, &res, false); - - pr_debug("result %d class 0x%04x\n", result, res.classid); - - switch (result) { -#ifdef CONFIG_NET_CLS_ACT - case TC_ACT_QUEUED: - case TC_ACT_STOLEN: - case TC_ACT_TRAP: - __qdisc_drop(skb, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; - - case TC_ACT_SHOT: - goto drop; -#endif - case TC_ACT_OK: - skb->tc_index = TC_H_MIN(res.classid); - break; - - default: - if (p->default_index != NO_DEFAULT_INDEX) - skb->tc_index = p->default_index; - break; - } - } - - err = qdisc_enqueue(skb, p->q, to_free); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - qdisc_qstats_drop(sch); - return err; - } - - sch->qstats.backlog += len; - sch->q.qlen++; - - return NET_XMIT_SUCCESS; - -drop: - qdisc_drop(skb, sch, to_free); - return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; -} - -static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct sk_buff *skb; - u32 index; - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - skb = qdisc_dequeue_peeked(p->q); - if (skb == NULL) - return NULL; - - qdisc_bstats_update(sch, skb); - qdisc_qstats_backlog_dec(sch, skb); - sch->q.qlen--; - - index = skb->tc_index & (p->indices - 1); - pr_debug("index %d->%d\n", skb->tc_index, index); - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - case htons(ETH_P_IPV6): - ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask, - p->mv[index].value); - break; - default: - /* - * Only complain if a change was actually attempted. - * This way, we can send non-IP traffic through dsmark - * and don't need yet another qdisc as a bypass. - */ - if (p->mv[index].mask != 0xff || p->mv[index].value) - pr_warn("%s: unsupported protocol %d\n", - __func__, ntohs(skb_protocol(skb, true))); - break; - } - - return skb; -} - -static struct sk_buff *dsmark_peek(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - return p->q->ops->peek(p->q); -} - -static int dsmark_init(struct Qdisc *sch, struct nlattr *opt, - struct netlink_ext_ack *extack) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *tb[TCA_DSMARK_MAX + 1]; - int err = -EINVAL; - u32 default_index = NO_DEFAULT_INDEX; - u16 indices; - int i; - - pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt); - - if (!opt) - goto errout; - - err = tcf_block_get(&p->block, &p->filter_list, sch, extack); - if (err) - return err; - - err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt, - dsmark_policy, NULL); - if (err < 0) - goto errout; - - err = -EINVAL; - if (!tb[TCA_DSMARK_INDICES]) - goto errout; - indices = nla_get_u16(tb[TCA_DSMARK_INDICES]); - - if (hweight32(indices) != 1) - goto errout; - - if (tb[TCA_DSMARK_DEFAULT_INDEX]) - default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]); - - if (indices <= DSMARK_EMBEDDED_SZ) - p->mv = p->embedded; - else - p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL); - if (!p->mv) { - err = -ENOMEM; - goto errout; - } - for (i = 0; i < indices; i++) { - p->mv[i].mask = 0xff; - p->mv[i].value = 0; - } - p->indices = indices; - p->default_index = default_index; - p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - - p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, - NULL); - if (p->q == NULL) - p->q = &noop_qdisc; - else - qdisc_hash_add(p->q, true); - - pr_debug("%s: qdisc %p\n", __func__, p->q); - - err = 0; -errout: - return err; -} - -static void dsmark_reset(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - if (p->q) - qdisc_reset(p->q); -} - -static void dsmark_destroy(struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - - pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); - - tcf_block_put(p->block); - qdisc_put(p->q); - if (p->mv != p->embedded) - kfree(p->mv); -} - -static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, - struct sk_buff *skb, struct tcmsg *tcm) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl); - - if (!dsmark_valid_index(p, cl)) - return -EINVAL; - - tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); - tcm->tcm_info = p->q->handle; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) || - nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - struct nlattr *opts = NULL; - - opts = nla_nest_start_noflag(skb, TCA_OPTIONS); - if (opts == NULL) - goto nla_put_failure; - if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices)) - goto nla_put_failure; - - if (p->default_index != NO_DEFAULT_INDEX && - nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index)) - goto nla_put_failure; - - if (p->set_tc_index && - nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX)) - goto nla_put_failure; - - return nla_nest_end(skb, opts); - -nla_put_failure: - nla_nest_cancel(skb, opts); - return -EMSGSIZE; -} - -static const struct Qdisc_class_ops dsmark_class_ops = { - .graft = dsmark_graft, - .leaf = dsmark_leaf, - .find = dsmark_find, - .change = dsmark_change, - .delete = dsmark_delete, - .walk = dsmark_walk, - .tcf_block = dsmark_tcf_block, - .bind_tcf = dsmark_bind_filter, - .unbind_tcf = dsmark_unbind_filter, - .dump = dsmark_dump_class, -}; - -static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { - .next = NULL, - .cl_ops = &dsmark_class_ops, - .id = "dsmark", - .priv_size = sizeof(struct dsmark_qdisc_data), - .enqueue = dsmark_enqueue, - .dequeue = dsmark_dequeue, - .peek = dsmark_peek, - .init = dsmark_init, - .reset = dsmark_reset, - .destroy = dsmark_destroy, - .change = NULL, - .dump = dsmark_dump, - .owner = THIS_MODULE, -}; - -static int __init dsmark_module_init(void) -{ - return register_qdisc(&dsmark_qdisc_ops); -} - -static void __exit dsmark_module_exit(void) -{ - unregister_qdisc(&dsmark_qdisc_ops); -} - -module_init(dsmark_module_init) -module_exit(dsmark_module_exit) - -MODULE_LICENSE("GPL"); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 097bd60ce964..62b436a2c8fe 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -807,8 +807,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr; - sk_refcnt_debug_inc(newsk); - if (newsk->sk_prot->init(newsk)) { sk_common_release(newsk); newsk = NULL; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 909a89a1cff4..c365df24ad33 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -601,8 +601,6 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk, newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; - sk_refcnt_debug_inc(newsk); - if (newsk->sk_prot->init(newsk)) { sk_common_release(newsk); newsk = NULL; diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index b163266e581a..d7a7420e81ec 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -360,8 +360,6 @@ static void smc_destruct(struct sock *sk) return; if (!sock_flag(sk, SOCK_DEAD)) return; - - sk_refcnt_debug_dec(sk); } static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, @@ -390,7 +388,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, spin_lock_init(&smc->accept_q_lock); spin_lock_init(&smc->conn.send_lock); sk->sk_prot->hash(sk); - sk_refcnt_debug_inc(sk); mutex_init(&smc->clcsock_release_lock); smc_init_saved_callbacks(smc); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 9f0561b67c12..a245c1b4a21b 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -845,7 +845,6 @@ static int xsk_release(struct socket *sock) sock_orphan(sk); sock->sk = NULL; - sk_refcnt_debug_release(sk); sock_put(sk); return 0; @@ -1396,8 +1395,6 @@ static void xsk_destruct(struct sock *sk) if (!xp_put_pool(xs->pool)) xdp_put_umem(xs->umem, !xs->pool); - - sk_refcnt_debug_dec(sk); } static int xsk_create(struct net *net, struct socket *sock, int protocol, @@ -1427,7 +1424,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, sk->sk_family = PF_XDP; sk->sk_destruct = xsk_destruct; - sk_refcnt_debug_inc(sk); sock_set_flag(sk, SOCK_RCU_FREE); |