aboutsummaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
authorGravatar Tomer Tayar <ttayar@habana.ai> 2024-01-25 22:59:02 +0200
committerGravatar Oded Gabbay <ogabbay@kernel.org> 2024-02-26 09:47:00 +0200
commite855869bec3fea9f11521a21f419d5a10f4b0c12 (patch)
treee644f145ff1b3e7fe7a1699e8023b3bed8b01421 /drivers/accel
parentaccel/habanalabs/gaudi2: check extended errors according to PCIe addr_dec int... (diff)
downloadlinux-e855869bec3fea9f11521a21f419d5a10f4b0c12.tar.gz
linux-e855869bec3fea9f11521a21f419d5a10f4b0c12.tar.bz2
linux-e855869bec3fea9f11521a21f419d5a10f4b0c12.zip
accel/habanalabs: fix glbl error cause handling
The glbl error cause handling has a wrong assumption that all error bits are consecutive. Fix the handling to check all relevant error bits per ASIC. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Reviewed-by: Carl Vanderlip <quic_carlv@quicinc.com> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/habanalabs/common/habanalabs.h4
-rw-r--r--drivers/accel/habanalabs/common/security.c33
-rw-r--r--drivers/accel/habanalabs/common/security.h3
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2.c10
-rw-r--r--drivers/accel/habanalabs/gaudi2/gaudi2P.h3
5 files changed, 35 insertions, 18 deletions
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index 2a324924c6fc..d85e1d12a309 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -647,7 +647,7 @@ struct hl_hints_range {
* @num_engine_cores: number of engine cpu cores.
* @max_num_of_engines: maximum number of all engines in the ASIC.
* @num_of_special_blocks: special_blocks array size.
- * @glbl_err_cause_num: global err cause number.
+ * @glbl_err_max_cause_num: global err max cause number.
* @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is
* not supported.
* @reserved_fw_mem_size: size in MB of dram memory reserved for FW.
@@ -779,7 +779,7 @@ struct asic_fixed_properties {
u32 num_engine_cores;
u32 max_num_of_engines;
u32 num_of_special_blocks;
- u32 glbl_err_cause_num;
+ u32 glbl_err_max_cause_num;
u32 hbw_flush_reg;
u32 reserved_fw_mem_size;
u16 collective_first_sob;
diff --git a/drivers/accel/habanalabs/common/security.c b/drivers/accel/habanalabs/common/security.c
index fe913965dbad..5402a3cd0491 100644
--- a/drivers/accel/habanalabs/common/security.c
+++ b/drivers/accel/habanalabs/common/security.c
@@ -7,15 +7,31 @@
#include "habanalabs.h"
-static const char * const hl_glbl_error_cause[HL_MAX_NUM_OF_GLBL_ERR_CAUSE] = {
+static const char * const hl_glbl_error_cause[] = {
"Error due to un-priv read",
"Error due to un-secure read",
"Error due to read from unmapped reg",
"Error due to un-priv write",
"Error due to un-secure write",
"Error due to write to unmapped reg",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
"External I/F write sec violation",
"External I/F write to un-mapped reg",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
+ "N/A",
"Read to write only",
"Write to read only"
};
@@ -671,10 +687,11 @@ static bool hl_check_block_range_exclusion(struct hl_device *hdev,
static int hl_read_glbl_errors(struct hl_device *hdev,
u32 blk_idx, u32 major, u32 minor, u32 sub_minor, void *data)
{
- struct hl_special_block_info *special_blocks = hdev->asic_prop.special_blocks;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct hl_special_block_info *special_blocks = prop->special_blocks;
struct hl_special_block_info *current_block = &special_blocks[blk_idx];
u32 glbl_err_addr, glbl_err_cause, addr_val, cause_val, block_base,
- base = current_block->base_addr - lower_32_bits(hdev->asic_prop.cfg_base_address);
+ base = current_block->base_addr - lower_32_bits(prop->cfg_base_address);
int i;
block_base = base + major * current_block->major_offset +
@@ -689,13 +706,13 @@ static int hl_read_glbl_errors(struct hl_device *hdev,
glbl_err_addr = block_base + HL_GLBL_ERR_ADDR_OFFSET;
addr_val = RREG32(glbl_err_addr);
- for (i = 0 ; i < hdev->asic_prop.glbl_err_cause_num ; i++) {
+ for (i = 0 ; i <= prop->glbl_err_max_cause_num ; i++) {
if (cause_val & BIT(i))
dev_err_ratelimited(hdev->dev,
- "%s, addr %#llx\n",
- hl_glbl_error_cause[i],
- hdev->asic_prop.cfg_base_address + block_base +
- FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
+ "%s, addr %#llx\n",
+ hl_glbl_error_cause[i],
+ prop->cfg_base_address + block_base +
+ FIELD_GET(HL_GLBL_ERR_ADDRESS_MASK, addr_val));
}
WREG32(glbl_err_cause, cause_val);
diff --git a/drivers/accel/habanalabs/common/security.h b/drivers/accel/habanalabs/common/security.h
index d7a3b3e82ea4..476f70687c09 100644
--- a/drivers/accel/habanalabs/common/security.h
+++ b/drivers/accel/habanalabs/common/security.h
@@ -13,8 +13,7 @@
struct hl_device;
/* special blocks */
-#define HL_MAX_NUM_OF_GLBL_ERR_CAUSE 10
-#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
+#define HL_GLBL_ERR_ADDRESS_MASK GENMASK(11, 0)
/* GLBL_ERR_ADDR register offset from the start of the block */
#define HL_GLBL_ERR_ADDR_OFFSET 0xF44
/* GLBL_ERR_CAUSE register offset from the start of the block */
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 9f033a785c49..e6395a8e7379 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -158,11 +158,13 @@
#define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
(RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
-#define PSOC_RAZWI_ENG_STR_SIZE 128
-#define PSOC_RAZWI_MAX_ENG_PER_RTR 5
+#define PSOC_RAZWI_ENG_STR_SIZE 128
+#define PSOC_RAZWI_MAX_ENG_PER_RTR 5
/* HW scrambles only bits 0-25 */
-#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
+#define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
+
+#define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17
struct gaudi2_razwi_info {
u32 axuser_xy;
@@ -3587,7 +3589,7 @@ static int gaudi2_special_blocks_config(struct hl_device *hdev)
int i, rc;
/* Configure Special blocks */
- prop->glbl_err_cause_num = GAUDI2_NUM_OF_GLBL_ERR_CAUSE;
+ prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
sizeof(*prop->special_blocks), GFP_KERNEL);
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2P.h b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
index bc508c9cee5c..eee41387b269 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2P.h
@@ -237,9 +237,8 @@
#define GAUDI2_SOB_INCREMENT_BY_ONE (FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1) | \
FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1))
-#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
+#define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0)
-#define GAUDI2_NUM_OF_GLBL_ERR_CAUSE 8
enum gaudi2_reserved_sob_id {
GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST,