aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar David S. Miller <davem@davemloft.net> 2020-09-27 13:27:01 -0700
committerGravatar David S. Miller <davem@davemloft.net> 2020-09-27 13:27:01 -0700
commit7cd427e2f9d48228fb97d7c044b939eb7a81871f (patch)
tree3689c2fbc11e8bd2576a07c0805c52905e6109e8
parentMerge branch 'hns3-next' (diff)
parentmlxsw: spectrum_ethtool: Expose transceiver_overheat counter (diff)
downloadlinux-7cd427e2f9d48228fb97d7c044b939eb7a81871f.tar.gz
linux-7cd427e2f9d48228fb97d7c044b939eb7a81871f.tar.bz2
linux-7cd427e2f9d48228fb97d7c044b939eb7a81871f.zip
Merge branch 'mlxsw-Expose-transceiver-overheat-counter'
Ido Schimmel says: ==================== mlxsw: Expose transceiver overheat counter Amit says: An overheated transceiver can be the root cause of various network problems such as link flapping. Counting the number of times a transceiver's temperature was higher than its configured threshold can therefore help in debugging such issues. This patch set exposes a transceiver overheat counter via ethtool. This is achieved by configuring the Spectrum ASIC to generate events whenever a transceiver is overheated. The temperature thresholds are queried from the transceiver (if available) and set to the default otherwise. Example: ... transceiver_overheat: 2 Patch set overview: Patches #1-#3 add required device registers Patches #4-#5 add required infrastructure in mlxsw to configure and count overheat events Patches #6-#9 gradually add support for the transceiver overheat counter Patch #10 exposes the transceiver overheat counter via ethtool ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.c368
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h132
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c44
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h4
10 files changed, 660 insertions, 5 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index f8dddcf461f5..a21afa56e3f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -26,6 +26,7 @@
#include <trace/events/devlink.h>
#include "core.h"
+#include "core_env.h"
#include "item.h"
#include "cmd.h"
#include "port.h"
@@ -87,6 +88,8 @@ struct mlxsw_core {
struct {
struct devlink_health_reporter *fw_fatal;
} health;
+ struct mlxsw_env *env;
+ bool is_initialized; /* Denotes if core was already initialized. */
unsigned long driver_priv[];
/* driver_priv has to be always the last item */
};
@@ -133,6 +136,11 @@ bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core)
}
EXPORT_SYMBOL(mlxsw_core_res_query_enabled);
+bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_core->driver->temp_warn_enabled;
+}
+
bool
mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
const struct mlxsw_fw_rev *req_rev)
@@ -1943,6 +1951,11 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (err)
goto err_thermal_init;
+ err = mlxsw_env_init(mlxsw_core, &mlxsw_core->env);
+ if (err)
+ goto err_env_init;
+
+ mlxsw_core->is_initialized = true;
devlink_params_publish(devlink);
if (!reload)
@@ -1950,6 +1963,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
return 0;
+err_env_init:
+ mlxsw_thermal_fini(mlxsw_core->thermal);
err_thermal_init:
mlxsw_hwmon_fini(mlxsw_core->hwmon);
err_hwmon_init:
@@ -2026,6 +2041,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
}
devlink_params_unpublish(devlink);
+ mlxsw_core->is_initialized = false;
+ mlxsw_env_fini(mlxsw_core->env);
mlxsw_thermal_fini(mlxsw_core->thermal);
mlxsw_hwmon_fini(mlxsw_core->hwmon);
if (mlxsw_core->driver->fini)
@@ -2829,6 +2846,16 @@ mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
}
EXPORT_SYMBOL(mlxsw_core_port_devlink_port_get);
+struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_core->env;
+}
+
+bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core)
+{
+ return mlxsw_core->is_initialized;
+}
+
int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module)
{
enum mlxsw_reg_pmtm_module_type module_type;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 2ca085a44774..92f7398287be 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -32,6 +32,8 @@ void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core);
bool mlxsw_core_res_query_enabled(const struct mlxsw_core *mlxsw_core);
+bool mlxsw_core_temp_warn_enabled(const struct mlxsw_core *mlxsw_core);
+
bool
mlxsw_core_fw_rev_minor_subminor_validate(const struct mlxsw_fw_rev *rev,
const struct mlxsw_fw_rev *req_rev);
@@ -221,6 +223,8 @@ enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
struct devlink_port *
mlxsw_core_port_devlink_port_get(struct mlxsw_core *mlxsw_core,
u8 local_port);
+struct mlxsw_env *mlxsw_core_env(const struct mlxsw_core *mlxsw_core);
+bool mlxsw_core_is_initialized(const struct mlxsw_core *mlxsw_core);
int mlxsw_core_module_max_width(struct mlxsw_core *mlxsw_core, u8 module);
int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
@@ -371,6 +375,7 @@ struct mlxsw_driver {
const struct mlxsw_config_profile *profile;
bool res_query_enabled;
bool fw_fatal_enabled;
+ bool temp_warn_enabled;
};
int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 056eeb85be60..dd26865bd587 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -10,6 +10,18 @@
#include "item.h"
#include "reg.h"
+struct mlxsw_env_module_info {
+ u64 module_overheat_counter;
+ bool is_overheat;
+};
+
+struct mlxsw_env {
+ struct mlxsw_core *core;
+ u8 module_count;
+ spinlock_t module_info_lock; /* Protects 'module_info'. */
+ struct mlxsw_env_module_info module_info[];
+};
+
static int mlxsw_env_validate_cable_ident(struct mlxsw_core *core, int id,
bool *qsfp, bool *cmis)
{
@@ -293,3 +305,359 @@ int mlxsw_env_get_module_eeprom(struct net_device *netdev,
return 0;
}
EXPORT_SYMBOL(mlxsw_env_get_module_eeprom);
+
+static int mlxsw_env_module_has_temp_sensor(struct mlxsw_core *mlxsw_core,
+ u8 module,
+ bool *p_has_temp_sensor)
+{
+ char mtbr_pl[MLXSW_REG_MTBR_LEN];
+ u16 temp;
+ int err;
+
+ mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
+ 1);
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtbr), mtbr_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
+
+ switch (temp) {
+ case MLXSW_REG_MTBR_BAD_SENS_INFO:
+ case MLXSW_REG_MTBR_NO_CONN:
+ case MLXSW_REG_MTBR_NO_TEMP_SENS:
+ case MLXSW_REG_MTBR_INDEX_NA:
+ *p_has_temp_sensor = false;
+ break;
+ default:
+ *p_has_temp_sensor = temp ? true : false;
+ }
+ return 0;
+}
+
+static int mlxsw_env_temp_event_set(struct mlxsw_core *mlxsw_core,
+ u16 sensor_index, bool enable)
+{
+ char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0};
+ enum mlxsw_reg_mtmp_tee tee;
+ int err, threshold_hi;
+
+ mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, sensor_index);
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err)
+ return err;
+
+ if (enable) {
+ err = mlxsw_env_module_temp_thresholds_get(mlxsw_core,
+ sensor_index -
+ MLXSW_REG_MTMP_MODULE_INDEX_MIN,
+ SFP_TEMP_HIGH_WARN,
+ &threshold_hi);
+ /* In case it is not possible to query the module's threshold,
+ * use the default value.
+ */
+ if (err)
+ threshold_hi = MLXSW_REG_MTMP_THRESH_HI;
+ else
+ /* mlxsw_env_module_temp_thresholds_get() multiplies
+ * Celsius degrees by 1000 whereas MTMP expects
+ * temperature in 0.125 Celsius degrees units.
+ * Convert threshold_hi to correct units.
+ */
+ threshold_hi = threshold_hi / 1000 * 8;
+
+ mlxsw_reg_mtmp_temperature_threshold_hi_set(mtmp_pl, threshold_hi);
+ mlxsw_reg_mtmp_temperature_threshold_lo_set(mtmp_pl, threshold_hi -
+ MLXSW_REG_MTMP_HYSTERESIS_TEMP);
+ }
+ tee = enable ? MLXSW_REG_MTMP_TEE_GENERATE_EVENT : MLXSW_REG_MTMP_TEE_NO_EVENT;
+ mlxsw_reg_mtmp_tee_set(mtmp_pl, tee);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtmp), mtmp_pl);
+}
+
+static int mlxsw_env_module_temp_event_enable(struct mlxsw_core *mlxsw_core,
+ u8 module_count)
+{
+ int i, err, sensor_index;
+ bool has_temp_sensor;
+
+ for (i = 0; i < module_count; i++) {
+ err = mlxsw_env_module_has_temp_sensor(mlxsw_core, i,
+ &has_temp_sensor);
+ if (err)
+ return err;
+
+ if (!has_temp_sensor)
+ continue;
+
+ sensor_index = i + MLXSW_REG_MTMP_MODULE_INDEX_MIN;
+ err = mlxsw_env_temp_event_set(mlxsw_core, sensor_index, true);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlxsw_env_mtwe_event_func(const struct mlxsw_reg_info *reg,
+ char *mtwe_pl, void *priv)
+{
+ struct mlxsw_env *mlxsw_env = priv;
+ int i, sensor_warning;
+ bool is_overheat;
+
+ for (i = 0; i < mlxsw_env->module_count; i++) {
+ /* 64-127 of sensor_index are mapped to the port modules
+ * sequentially (module 0 is mapped to sensor_index 64,
+ * module 1 to sensor_index 65 and so on)
+ */
+ sensor_warning =
+ mlxsw_reg_mtwe_sensor_warning_get(mtwe_pl,
+ i + MLXSW_REG_MTMP_MODULE_INDEX_MIN);
+ spin_lock(&mlxsw_env->module_info_lock);
+ is_overheat =
+ mlxsw_env->module_info[i].is_overheat;
+
+ if ((is_overheat && sensor_warning) ||
+ (!is_overheat && !sensor_warning)) {
+ /* Current state is "warning" and MTWE still reports
+ * warning OR current state in "no warning" and MTWE
+ * does not report warning.
+ */
+ spin_unlock(&mlxsw_env->module_info_lock);
+ continue;
+ } else if (is_overheat && !sensor_warning) {
+ /* MTWE reports "no warning", turn is_overheat off.
+ */
+ mlxsw_env->module_info[i].is_overheat = false;
+ spin_unlock(&mlxsw_env->module_info_lock);
+ } else {
+ /* Current state is "no warning" and MTWE reports
+ * "warning", increase the counter and turn is_overheat
+ * on.
+ */
+ mlxsw_env->module_info[i].is_overheat = true;
+ mlxsw_env->module_info[i].module_overheat_counter++;
+ spin_unlock(&mlxsw_env->module_info_lock);
+ }
+ }
+}
+
+static const struct mlxsw_listener mlxsw_env_temp_warn_listener =
+ MLXSW_EVENTL(mlxsw_env_mtwe_event_func, MTWE, MTWE);
+
+static int mlxsw_env_temp_warn_event_register(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+
+ if (!mlxsw_core_temp_warn_enabled(mlxsw_core))
+ return 0;
+
+ return mlxsw_core_trap_register(mlxsw_core,
+ &mlxsw_env_temp_warn_listener,
+ mlxsw_env);
+}
+
+static void mlxsw_env_temp_warn_event_unregister(struct mlxsw_env *mlxsw_env)
+{
+ if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core))
+ return;
+
+ mlxsw_core_trap_unregister(mlxsw_env->core,
+ &mlxsw_env_temp_warn_listener, mlxsw_env);
+}
+
+struct mlxsw_env_module_plug_unplug_event {
+ struct mlxsw_env *mlxsw_env;
+ u8 module;
+ struct work_struct work;
+};
+
+static void mlxsw_env_pmpe_event_work(struct work_struct *work)
+{
+ struct mlxsw_env_module_plug_unplug_event *event;
+ struct mlxsw_env *mlxsw_env;
+ bool has_temp_sensor;
+ u16 sensor_index;
+ int err;
+
+ event = container_of(work, struct mlxsw_env_module_plug_unplug_event,
+ work);
+ mlxsw_env = event->mlxsw_env;
+
+ spin_lock_bh(&mlxsw_env->module_info_lock);
+ mlxsw_env->module_info[event->module].is_overheat = false;
+ spin_unlock_bh(&mlxsw_env->module_info_lock);
+
+ err = mlxsw_env_module_has_temp_sensor(mlxsw_env->core, event->module,
+ &has_temp_sensor);
+ /* Do not disable events on modules without sensors or faulty sensors
+ * because FW returns errors.
+ */
+ if (err)
+ goto out;
+
+ if (!has_temp_sensor)
+ goto out;
+
+ sensor_index = event->module + MLXSW_REG_MTMP_MODULE_INDEX_MIN;
+ mlxsw_env_temp_event_set(mlxsw_env->core, sensor_index, true);
+
+out:
+ kfree(event);
+}
+
+static void
+mlxsw_env_pmpe_listener_func(const struct mlxsw_reg_info *reg, char *pmpe_pl,
+ void *priv)
+{
+ struct mlxsw_env_module_plug_unplug_event *event;
+ enum mlxsw_reg_pmpe_module_status module_status;
+ u8 module = mlxsw_reg_pmpe_module_get(pmpe_pl);
+ struct mlxsw_env *mlxsw_env = priv;
+
+ if (WARN_ON_ONCE(module >= mlxsw_env->module_count))
+ return;
+
+ module_status = mlxsw_reg_pmpe_module_status_get(pmpe_pl);
+ if (module_status != MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ENABLED)
+ return;
+
+ event = kmalloc(sizeof(*event), GFP_ATOMIC);
+ if (!event)
+ return;
+
+ event->mlxsw_env = mlxsw_env;
+ event->module = module;
+ INIT_WORK(&event->work, mlxsw_env_pmpe_event_work);
+ mlxsw_core_schedule_work(&event->work);
+}
+
+static const struct mlxsw_listener mlxsw_env_module_plug_listener =
+ MLXSW_EVENTL(mlxsw_env_pmpe_listener_func, PMPE, PMPE);
+
+static int
+mlxsw_env_module_plug_event_register(struct mlxsw_core *mlxsw_core)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+
+ if (!mlxsw_core_temp_warn_enabled(mlxsw_core))
+ return 0;
+
+ return mlxsw_core_trap_register(mlxsw_core,
+ &mlxsw_env_module_plug_listener,
+ mlxsw_env);
+}
+
+static void
+mlxsw_env_module_plug_event_unregister(struct mlxsw_env *mlxsw_env)
+{
+ if (!mlxsw_core_temp_warn_enabled(mlxsw_env->core))
+ return;
+
+ mlxsw_core_trap_unregister(mlxsw_env->core,
+ &mlxsw_env_module_plug_listener,
+ mlxsw_env);
+}
+
+static int
+mlxsw_env_module_oper_state_event_enable(struct mlxsw_core *mlxsw_core,
+ u8 module_count)
+{
+ int i, err;
+
+ for (i = 0; i < module_count; i++) {
+ char pmaos_pl[MLXSW_REG_PMAOS_LEN];
+
+ mlxsw_reg_pmaos_pack(pmaos_pl, i,
+ MLXSW_REG_PMAOS_E_GENERATE_EVENT);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(pmaos), pmaos_pl);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int
+mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 module,
+ u64 *p_counter)
+{
+ struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core);
+
+ /* Prevent switch driver from accessing uninitialized data. */
+ if (!mlxsw_core_is_initialized(mlxsw_core)) {
+ *p_counter = 0;
+ return 0;
+ }
+
+ if (WARN_ON_ONCE(module >= mlxsw_env->module_count))
+ return -EINVAL;
+
+ spin_lock_bh(&mlxsw_env->module_info_lock);
+ *p_counter = mlxsw_env->module_info[module].module_overheat_counter;
+ spin_unlock_bh(&mlxsw_env->module_info_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_env_module_overheat_counter_get);
+
+int mlxsw_env_init(struct mlxsw_core *mlxsw_core, struct mlxsw_env **p_env)
+{
+ char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+ struct mlxsw_env *env;
+ u8 module_count;
+ int err;
+
+ mlxsw_reg_mgpir_pack(mgpir_pl);
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgpir), mgpir_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL, &module_count);
+
+ env = kzalloc(struct_size(env, module_info, module_count), GFP_KERNEL);
+ if (!env)
+ return -ENOMEM;
+
+ spin_lock_init(&env->module_info_lock);
+ env->core = mlxsw_core;
+ env->module_count = module_count;
+ *p_env = env;
+
+ err = mlxsw_env_temp_warn_event_register(mlxsw_core);
+ if (err)
+ goto err_temp_warn_event_register;
+
+ err = mlxsw_env_module_plug_event_register(mlxsw_core);
+ if (err)
+ goto err_module_plug_event_register;
+
+ err = mlxsw_env_module_oper_state_event_enable(mlxsw_core,
+ env->module_count);
+ if (err)
+ goto err_oper_state_event_enable;
+
+ err = mlxsw_env_module_temp_event_enable(mlxsw_core, env->module_count);
+ if (err)
+ goto err_temp_event_enable;
+
+ return 0;
+
+err_temp_event_enable:
+err_oper_state_event_enable:
+ mlxsw_env_module_plug_event_unregister(env);
+err_module_plug_event_register:
+ mlxsw_env_temp_warn_event_unregister(env);
+err_temp_warn_event_register:
+ kfree(env);
+ return err;
+}
+
+void mlxsw_env_fini(struct mlxsw_env *env)
+{
+ mlxsw_env_module_plug_event_unregister(env);
+ /* Make sure there is no more event work scheduled. */
+ mlxsw_core_flush_owq();
+ mlxsw_env_temp_warn_event_unregister(env);
+ kfree(env);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
index 064d0e770c01..8e36a2634ef5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h
@@ -14,4 +14,10 @@ int mlxsw_env_get_module_eeprom(struct net_device *netdev,
struct mlxsw_core *mlxsw_core, int module,
struct ethtool_eeprom *ee, u8 *data);
+int
+mlxsw_env_module_overheat_counter_get(struct mlxsw_core *mlxsw_core, u8 module,
+ u64 *p_counter);
+int mlxsw_env_init(struct mlxsw_core *core, struct mlxsw_env **p_env);
+void mlxsw_env_fini(struct mlxsw_env *env);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 252e91072c5a..2196c946698a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -106,7 +106,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
- char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0};
unsigned long val;
int index;
int err;
@@ -119,7 +119,13 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
mlxsw_hwmon->module_sensor_max);
- mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true);
+
+ mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, index);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mtmp_mte_set(mtmp_pl, true);
+ mlxsw_reg_mtmp_mtr_set(mtmp_pl, true);
err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n");
@@ -570,7 +576,6 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
{
char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0};
- char mtmp_pl[MLXSW_REG_MTMP_LEN];
int i;
int err;
@@ -581,7 +586,15 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
}
mlxsw_hwmon->sensor_count = mlxsw_reg_mtcap_sensor_count_get(mtcap_pl);
for (i = 0; i < mlxsw_hwmon->sensor_count; i++) {
- mlxsw_reg_mtmp_pack(mtmp_pl, i, true, true);
+ char mtmp_pl[MLXSW_REG_MTMP_LEN] = {0};
+
+ mlxsw_reg_mtmp_sensor_index_set(mtmp_pl, i);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp),
+ mtmp_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mtmp_mte_set(mtmp_pl, true);
+ mlxsw_reg_mtmp_mtr_set(mtmp_pl, true);
err = mlxsw_reg_write(mlxsw_hwmon->core,
MLXSW_REG(mtmp), mtmp_pl);
if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6e3d55006089..39eff6a57ba2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5405,6 +5405,64 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port)
mlxsw_reg_pspa_sub_port_set(payload, 0);
}
+/* PMAOS - Ports Module Administrative and Operational Status
+ * ----------------------------------------------------------
+ * This register configures and retrieves the per module status.
+ */
+#define MLXSW_REG_PMAOS_ID 0x5012
+#define MLXSW_REG_PMAOS_LEN 0x10
+
+MLXSW_REG_DEFINE(pmaos, MLXSW_REG_PMAOS_ID, MLXSW_REG_PMAOS_LEN);
+
+/* reg_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmaos, slot_index, 0x00, 24, 4);
+
+/* reg_pmaos_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmaos, module, 0x00, 16, 8);
+
+/* reg_pmaos_ase
+ * Admin state update enable.
+ * If this bit is set, admin state will be updated based on admin_state field.
+ * Only relevant on Set() operations.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, pmaos, ase, 0x04, 31, 1);
+
+/* reg_pmaos_ee
+ * Event update enable.
+ * If this bit is set, event generation will be updated based on the e field.
+ * Only relevant on Set operations.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, pmaos, ee, 0x04, 30, 1);
+
+enum mlxsw_reg_pmaos_e {
+ MLXSW_REG_PMAOS_E_DO_NOT_GENERATE_EVENT,
+ MLXSW_REG_PMAOS_E_GENERATE_EVENT,
+ MLXSW_REG_PMAOS_E_GENERATE_SINGLE_EVENT,
+};
+
+/* reg_pmaos_e
+ * Event Generation on operational state change.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pmaos, e, 0x04, 0, 2);
+
+static inline void mlxsw_reg_pmaos_pack(char *payload, u8 module,
+ enum mlxsw_reg_pmaos_e e)
+{
+ MLXSW_REG_ZERO(pmaos, payload);
+ mlxsw_reg_pmaos_module_set(payload, module);
+ mlxsw_reg_pmaos_e_set(payload, e);
+ mlxsw_reg_pmaos_ee_set(payload, true);
+}
+
/* PPLR - Port Physical Loopback Register
* --------------------------------------
* This register allows configuration of the port's loopback mode.
@@ -5441,6 +5499,50 @@ static inline void mlxsw_reg_pplr_pack(char *payload, u8 local_port,
MLXSW_REG_PPLR_LB_TYPE_BIT_PHY_LOCAL : 0);
}
+/* PMPE - Port Module Plug/Unplug Event Register
+ * ---------------------------------------------
+ * This register reports any operational status change of a module.
+ * A change in the module’s state will generate an event only if the change
+ * happens after arming the event mechanism. Any changes to the module state
+ * while the event mechanism is not armed will not be reported. Software can
+ * query the PMPE register for module status.
+ */
+#define MLXSW_REG_PMPE_ID 0x5024
+#define MLXSW_REG_PMPE_LEN 0x10
+
+MLXSW_REG_DEFINE(pmpe, MLXSW_REG_PMPE_ID, MLXSW_REG_PMPE_LEN);
+
+/* reg_pmpe_slot_index
+ * Slot index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmpe, slot_index, 0x00, 24, 4);
+
+/* reg_pmpe_module
+ * Module number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pmpe, module, 0x00, 16, 8);
+
+enum mlxsw_reg_pmpe_module_status {
+ MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ENABLED = 1,
+ MLXSW_REG_PMPE_MODULE_STATUS_UNPLUGGED,
+ MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_ERROR,
+ MLXSW_REG_PMPE_MODULE_STATUS_PLUGGED_DISABLED,
+};
+
+/* reg_pmpe_module_status
+ * Module status.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pmpe, module_status, 0x00, 0, 4);
+
+/* reg_pmpe_error_type
+ * Module error details.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pmpe, error_type, 0x04, 8, 4);
+
/* PDDR - Port Diagnostics Database Register
* -----------------------------------------
* The PDDR enables to read the Phy debug database
@@ -5580,6 +5682,8 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
enum mlxsw_reg_htgt_trap_group {
MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
+ MLXSW_REG_HTGT_TRAP_GROUP_MTWE,
+ MLXSW_REG_HTGT_TRAP_GROUP_PMPE,
MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
@@ -8413,6 +8517,13 @@ MLXSW_ITEM32(reg, mtmp, max_temperature, 0x08, 0, 16);
* 2 - Generate single event
* Access: RW
*/
+
+enum mlxsw_reg_mtmp_tee {
+ MLXSW_REG_MTMP_TEE_NO_EVENT,
+ MLXSW_REG_MTMP_TEE_GENERATE_EVENT,
+ MLXSW_REG_MTMP_TEE_GENERATE_SINGLE_EVENT,
+};
+
MLXSW_ITEM32(reg, mtmp, tee, 0x0C, 30, 2);
#define MLXSW_REG_MTMP_THRESH_HI 0x348 /* 105 Celsius */
@@ -8423,6 +8534,7 @@ MLXSW_ITEM32(reg, mtmp, tee, 0x0C, 30, 2);
*/
MLXSW_ITEM32(reg, mtmp, temperature_threshold_hi, 0x0C, 0, 16);
+#define MLXSW_REG_MTMP_HYSTERESIS_TEMP 0x28 /* 5 Celsius */
/* reg_mtmp_temperature_threshold_lo
* Low threshold for Temperature Warning Event. In 0.125 Celsius.
* Access: RW
@@ -8466,6 +8578,23 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp,
mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
}
+/* MTWE - Management Temperature Warning Event
+ * -------------------------------------------
+ * This register is used for over temperature warning.
+ */
+#define MLXSW_REG_MTWE_ID 0x900B
+#define MLXSW_REG_MTWE_LEN 0x10
+
+MLXSW_REG_DEFINE(mtwe, MLXSW_REG_MTWE_ID, MLXSW_REG_MTWE_LEN);
+
+/* reg_mtwe_sensor_warning
+ * Bit vector indicating which of the sensor reading is above threshold.
+ * Address 00h bit31 is sensor_warning[127].
+ * Address 0Ch bit0 is sensor_warning[0].
+ * Access: RO
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, mtwe, sensor_warning, 0x0, 0x10, 1);
+
/* MTBR - Management Temperature Bulk Register
* -------------------------------------------
* This register is used for bulk temperature reading.
@@ -11041,7 +11170,9 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(pptb),
MLXSW_REG(pbmc),
MLXSW_REG(pspa),
+ MLXSW_REG(pmaos),
MLXSW_REG(pplr),
+ MLXSW_REG(pmpe),
MLXSW_REG(pddr),
MLXSW_REG(pmtm),
MLXSW_REG(htgt),
@@ -11071,6 +11202,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(fore),
MLXSW_REG(mtcap),
MLXSW_REG(mtmp),
+ MLXSW_REG(mtwe),
MLXSW_REG(mtbr),
MLXSW_REG(mcia),
MLXSW_REG(mpat),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 9d5a6f8d7438..ab7d12aad880 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1365,6 +1365,22 @@ static int mlxsw_sp_port_tc_mc_mode_set(struct mlxsw_sp_port *mlxsw_sp_port,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtctm), qtctm_pl);
}
+static int mlxsw_sp_port_overheat_init_val_set(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 module = mlxsw_sp_port->mapping.module;
+ u64 overheat_counter;
+ int err;
+
+ err = mlxsw_env_module_overheat_counter_get(mlxsw_sp->core, module,
+ &overheat_counter);
+ if (err)
+ return err;
+
+ mlxsw_sp_port->module_overheat_initial_val = overheat_counter;
+ return 0;
+}
+
static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
u8 split_base_local_port,
struct mlxsw_sp_port_mapping *port_mapping)
@@ -1575,6 +1591,14 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
mlxsw_sp->ptp_ops->shaper_work);
mlxsw_sp->ports[local_port] = mlxsw_sp_port;
+
+ err = mlxsw_sp_port_overheat_init_val_set(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set overheat initial value\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_overheat_init_val_set;
+ }
+
err = register_netdev(dev);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n",
@@ -1588,6 +1612,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
return 0;
err_register_netdev:
+err_port_overheat_init_val_set:
mlxsw_sp->ports[local_port] = NULL;
mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
err_port_vlan_create:
@@ -2440,6 +2465,22 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
MLXSW_REG_HTGT_INVALID_POLICER,
MLXSW_REG_HTGT_DEFAULT_PRIORITY,
MLXSW_REG_HTGT_DEFAULT_TC);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MTWE,
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_PMPE,
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
}
@@ -3197,6 +3238,7 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
.profile = &mlxsw_sp1_config_profile,
.res_query_enabled = true,
.fw_fatal_enabled = true,
+ .temp_warn_enabled = true,
};
static struct mlxsw_driver mlxsw_sp2_driver = {
@@ -3237,6 +3279,7 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
.profile = &mlxsw_sp2_config_profile,
.res_query_enabled = true,
.fw_fatal_enabled = true,
+ .temp_warn_enabled = true,
};
static struct mlxsw_driver mlxsw_sp3_driver = {
@@ -3277,6 +3320,7 @@ static struct mlxsw_driver mlxsw_sp3_driver = {
.profile = &mlxsw_sp2_config_profile,
.res_query_enabled = true,
.fw_fatal_enabled = true,
+ .temp_warn_enabled = true,
};
bool mlxsw_sp_port_dev_check(const struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f0a4347acd25..3e26eb6cb140 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -319,6 +319,7 @@ struct mlxsw_sp_port {
int max_mtu;
u32 max_speed;
struct mlxsw_sp_hdroom *hdroom;
+ u64 module_overheat_initial_val;
};
struct mlxsw_sp_port_type_speed_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
index 6045d3df00ef..2096b6478958 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2020 Mellanox Technologies. All rights reserved */
#include "reg.h"
+#include "core.h"
#include "spectrum.h"
#include "core_env.h"
@@ -552,6 +553,37 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = {
#define MLXSW_SP_PORT_HW_TC_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_tc_stats)
+struct mlxsw_sp_port_stats {
+ char str[ETH_GSTRING_LEN];
+ u64 (*getter)(struct mlxsw_sp_port *mlxsw_sp_port);
+};
+
+static u64
+mlxsw_sp_port_get_transceiver_overheat_stats(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp_port_mapping port_mapping = mlxsw_sp_port->mapping;
+ struct mlxsw_core *mlxsw_core = mlxsw_sp_port->mlxsw_sp->core;
+ u64 stats;
+ int err;
+
+ err = mlxsw_env_module_overheat_counter_get(mlxsw_core,
+ port_mapping.module,
+ &stats);
+ if (err)
+ return mlxsw_sp_port->module_overheat_initial_val;
+
+ return stats - mlxsw_sp_port->module_overheat_initial_val;
+}
+
+static struct mlxsw_sp_port_stats mlxsw_sp_port_transceiver_stats[] = {
+ {
+ .str = "transceiver_overheat",
+ .getter = mlxsw_sp_port_get_transceiver_overheat_stats,
+ },
+};
+
+#define MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_transceiver_stats)
+
#define MLXSW_SP_PORT_ETHTOOL_STATS_LEN (MLXSW_SP_PORT_HW_STATS_LEN + \
MLXSW_SP_PORT_HW_RFC_2863_STATS_LEN + \
MLXSW_SP_PORT_HW_RFC_2819_STATS_LEN + \
@@ -561,7 +593,8 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = {
(MLXSW_SP_PORT_HW_PRIO_STATS_LEN * \
IEEE_8021QAZ_MAX_TCS) + \
(MLXSW_SP_PORT_HW_TC_STATS_LEN * \
- TC_MAX_QUEUE))
+ TC_MAX_QUEUE) + \
+ MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN)
static void mlxsw_sp_port_get_prio_strings(u8 **p, int prio)
{
@@ -637,6 +670,12 @@ static void mlxsw_sp_port_get_strings(struct net_device *dev,
mlxsw_sp_port_get_tc_strings(&p, i);
mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_strings(&p);
+
+ for (i = 0; i < MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN; i++) {
+ memcpy(p, mlxsw_sp_port_transceiver_stats[i].str,
+ ETH_GSTRING_LEN);
+ p += ETH_GSTRING_LEN;
+ }
break;
}
}
@@ -732,6 +771,17 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev,
}
}
+static void __mlxsw_sp_port_get_env_stats(struct net_device *dev, u64 *data, int data_index,
+ struct mlxsw_sp_port_stats *port_stats,
+ int len)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < len; i++)
+ data[data_index + i] = port_stats[i].getter(mlxsw_sp_port);
+}
+
static void mlxsw_sp_port_get_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
@@ -786,6 +836,11 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev,
mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats(mlxsw_sp_port,
data, data_index);
data_index += mlxsw_sp_port->mlxsw_sp->ptp_ops->get_stats_count();
+
+ /* Transceiver counters */
+ __mlxsw_sp_port_get_env_stats(dev, data, data_index, mlxsw_sp_port_transceiver_stats,
+ MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN);
+ data_index += MLXSW_SP_PORT_HW_TRANSCEIVER_STATS_LEN;
}
static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index fe0b8af287a7..57f9e24602d0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -124,6 +124,10 @@ enum mlxsw_event_trap_id {
MLXSW_TRAP_ID_MFDE = 0x3,
/* Port Up/Down event generated by hardware */
MLXSW_TRAP_ID_PUDE = 0x8,
+ /* Port Module Plug/Unplug Event generated by hardware */
+ MLXSW_TRAP_ID_PMPE = 0x9,
+ /* Temperature Warning event generated by hardware */
+ MLXSW_TRAP_ID_MTWE = 0xC,
/* PTP Ingress FIFO has a new entry */
MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D,
/* PTP Egress FIFO has a new entry */