aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
diff options
context:
space:
mode:
authorGravatar Dave Airlie <airlied@redhat.com> 2024-03-08 11:21:13 +1000
committerGravatar Dave Airlie <airlied@redhat.com> 2024-03-08 11:21:13 +1000
commitaf165fb00a1eb390976f6016fc69df0da0d27fad (patch)
tree6351742220dd4b801c0c0b49689b3fc4937e95f1 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
parentMerge tag 'drm-misc-next-fixes-2024-02-29' of https://anongit.freedesktop.org... (diff)
parentdrm/amdgpu: remove misleading amdgpu_pmops_runtime_idle() comment (diff)
downloadlinux-af165fb00a1eb390976f6016fc69df0da0d27fad.tar.gz
linux-af165fb00a1eb390976f6016fc69df0da0d27fad.tar.bz2
linux-af165fb00a1eb390976f6016fc69df0da0d27fad.zip
Merge tag 'amd-drm-next-6.9-2024-03-01' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.9-2024-03-01: amdgpu: - GC 11.5.1 updates - Misc display cleanups - NBIO 7.9 updates - Backlight fixes - DMUB fixes - MPO fixes - atomfirmware table updates - SR-IOV fixes - VCN 4.x updates - use RMW accessors for pci config registers - PSR fixes - Suspend/resume fixes - RAS fixes - ABM fixes - Misc code cleanups - SI DPM fix - Revert freesync video amdkfd: - Misc cleanups - Error handling fixes radeon: - use RMW accessors for pci config registers From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240301204857.13960-1-alexander.deucher@amd.com Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 46f3d1013e8c..8ebab6f22e5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2439,6 +2439,18 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+ /* For any RAS error that needs a full reset to
+ * recover, set the fatal error status
+ */
+ if (hive) {
+ list_for_each_entry(remote_adev,
+ &hive->device_list,
+ gmc.xgmi.head)
+ amdgpu_ras_set_fed(remote_adev,
+ true);
+ } else {
+ amdgpu_ras_set_fed(adev, true);
+ }
psp_fatal_error_recovery_quirk(&adev->psp);
}
}
@@ -3440,6 +3452,26 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
return 0;
}
+bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (!ras)
+ return false;
+
+ return atomic_read(&ras->fed);
+}
+
+void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
+{
+ struct amdgpu_ras *ras;
+
+ ras = amdgpu_ras_get_context(adev);
+ if (ras)
+ atomic_set(&ras->fed, !!status);
+}
+
void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
{
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
@@ -3620,6 +3652,7 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev,
block == AMDGPU_RAS_BLOCK__SDMA ||
block == AMDGPU_RAS_BLOCK__VCN ||
block == AMDGPU_RAS_BLOCK__JPEG) &&
+ (amdgpu_ras_mask & (1 << block)) &&
amdgpu_ras_is_poison_mode_supported(adev) &&
amdgpu_ras_get_ras_block(adev, block, 0))
ret = 1;