aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c17
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c36
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c74
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c17
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c265
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c77
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h72
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c28
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c20
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_print.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c74
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c60
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_tlb.c16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c420
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_tlb.c11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h33
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c20
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c70
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h39
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c46
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c63
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c249
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c40
47 files changed, 1275 insertions, 607 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 7ad36198aab2..86a04afff64b 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -4,9 +4,9 @@
*/
#include "gen8_engine_cs.h"
-#include "i915_drv.h"
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
+#include "intel_gt.h"
#include "intel_lrc.h"
#include "intel_ring.h"
@@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
static int mtl_dummy_pipe_control(struct i915_request *rq)
{
/* Wa_14016712196 */
- if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) ||
+ IS_DG2(rq->i915)) {
u32 *cs;
/* dummy PIPE_CONTROL + depth flush */
@@ -278,7 +278,8 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
* deals with Protected Memory which is not needed for
* AUX CCS invalidation and lead to unwanted side effects.
*/
- if (mode & EMIT_FLUSH)
+ if ((mode & EMIT_FLUSH) &&
+ GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70))
bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
@@ -808,18 +809,20 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
struct drm_i915_private *i915 = rq->i915;
+ struct intel_gt *gt = rq->engine->gt;
u32 flags = (PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
- PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
+ if (GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70))
+ flags |= PIPE_CONTROL_FLUSH_L3;
+
/* Wa_14016712196 */
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
/* dummy PIPE_CONTROL + depth flush */
cs = gen12_emit_pipe_control(cs, 0,
PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index c8568e5d1147..9895e18df043 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -242,9 +242,9 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
len = gen8_pd_range(start, end, lvl--, &idx);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
- __func__, vm, lvl + 1, start, end,
- idx, len, atomic_read(px_used(pd)));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, start, end,
+ idx, len, atomic_read(px_used(pd)));
GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
do {
@@ -252,8 +252,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
gen8_pd_contains(start, end, lvl)) {
- DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
- __func__, vm, lvl + 1, idx, start, end);
+ GTT_TRACE("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+ __func__, vm, lvl + 1, idx, start, end);
clear_pd_entry(pd, idx, scratch);
__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
start += (u64)I915_PDES << gen8_pd_shift(lvl);
@@ -270,10 +270,10 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
u64 *vaddr;
count = gen8_pt_count(start, end);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
- __func__, vm, lvl, start, end,
- gen8_pd_index(start, 0), count,
- atomic_read(&pt->used));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
+ __func__, vm, lvl, start, end,
+ gen8_pd_index(start, 0), count,
+ atomic_read(&pt->used));
GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
num_ptes = count;
@@ -325,9 +325,9 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
len = gen8_pd_range(*start, end, lvl--, &idx);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
- __func__, vm, lvl + 1, *start, end,
- idx, len, atomic_read(px_used(pd)));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, *start, end,
+ idx, len, atomic_read(px_used(pd)));
GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
spin_lock(&pd->lock);
@@ -338,8 +338,8 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
if (!pt) {
spin_unlock(&pd->lock);
- DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
- __func__, vm, lvl + 1, idx);
+ GTT_TRACE("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+ __func__, vm, lvl + 1, idx);
pt = stash->pt[!!lvl];
__i915_gem_object_pin_pages(pt->base);
@@ -369,10 +369,10 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
} else {
unsigned int count = gen8_pt_count(*start, end);
- DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
- __func__, vm, lvl, *start, end,
- gen8_pd_index(*start, 0), count,
- atomic_read(&pt->used));
+ GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
+ __func__, vm, lvl, *start, end,
+ gen8_pd_index(*start, 0), count,
+ atomic_read(&pt->used));
atomic_add(count, &pt->used);
/* All other pdes may be simultaneously removed */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index b58c30ac8ef0..40269e4c1e31 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -170,6 +170,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_GEM_HWS_SEQNO 0x40
#define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32))
#define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32))
+#define I915_GEM_HWS_GGTT_BIND 0x46
+#define I915_GEM_HWS_GGTT_BIND_ADDR (I915_GEM_HWS_GGTT_BIND * sizeof(u32))
#define I915_GEM_HWS_PXP 0x60
#define I915_GEM_HWS_PXP_ADDR (I915_GEM_HWS_PXP * sizeof(u32))
#define I915_GEM_HWS_GSC 0x62
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index e85d70a62123..4a11219e560e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -316,10 +316,9 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
* out in the wash.
*/
cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
- drm_dbg(&gt->i915->drm,
- "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n",
- GRAPHICS_VER(gt->i915), cxt_size * 64,
- cxt_size - 1);
+ gt_dbg(gt, "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n",
+ GRAPHICS_VER(gt->i915), cxt_size * 64,
+ cxt_size - 1);
return round_up(cxt_size * 64, PAGE_SIZE);
case 3:
case 2:
@@ -788,7 +787,7 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt)
if (!(BIT(i) & vdbox_mask)) {
gt->info.engine_mask &= ~BIT(_VCS(i));
- drm_dbg(&i915->drm, "vcs%u fused off\n", i);
+ gt_dbg(gt, "vcs%u fused off\n", i);
continue;
}
@@ -796,8 +795,7 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt)
gt->info.vdbox_sfc_access |= BIT(i);
logical_vdbox++;
}
- drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n",
- vdbox_mask, VDBOX_MASK(gt));
+ gt_dbg(gt, "vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(gt));
GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
for (i = 0; i < I915_MAX_VECS; i++) {
@@ -808,11 +806,10 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt)
if (!(BIT(i) & vebox_mask)) {
gt->info.engine_mask &= ~BIT(_VECS(i));
- drm_dbg(&i915->drm, "vecs%u fused off\n", i);
+ gt_dbg(gt, "vecs%u fused off\n", i);
}
}
- drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n",
- vebox_mask, VEBOX_MASK(gt));
+ gt_dbg(gt, "vebox enable: %04x, instances: %04lx\n", vebox_mask, VEBOX_MASK(gt));
GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
}
@@ -838,7 +835,7 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
*/
for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) {
info->engine_mask &= ~BIT(_CCS(i));
- drm_dbg(&i915->drm, "ccs%u fused off\n", i);
+ gt_dbg(gt, "ccs%u fused off\n", i);
}
}
@@ -866,8 +863,8 @@ static void engine_mask_apply_copy_fuses(struct intel_gt *gt)
_BCS(instance));
if (mask & info->engine_mask) {
- drm_dbg(&i915->drm, "bcs%u fused off\n", instance);
- drm_dbg(&i915->drm, "bcs%u fused off\n", instance + 1);
+ gt_dbg(gt, "bcs%u fused off\n", instance);
+ gt_dbg(gt, "bcs%u fused off\n", instance + 1);
info->engine_mask &= ~mask;
}
@@ -907,8 +904,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
* submission, which will wake up the GSC power well.
*/
if (__HAS_ENGINE(info->engine_mask, GSC0) && !intel_uc_wants_gsc_uc(&gt->uc)) {
- drm_notice(&gt->i915->drm,
- "No GSC FW selected, disabling GSC CS and media C6\n");
+ gt_notice(gt, "No GSC FW selected, disabling GSC CS and media C6\n");
info->engine_mask &= ~BIT(GSC0);
}
@@ -1097,8 +1093,7 @@ static int init_status_page(struct intel_engine_cs *engine)
*/
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
- drm_err(&engine->i915->drm,
- "Failed to allocate status page\n");
+ gt_err(engine->gt, "Failed to allocate status page\n");
return PTR_ERR(obj);
}
@@ -1419,6 +1414,20 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce)
}
static struct intel_context *
+create_ggtt_bind_context(struct intel_engine_cs *engine)
+{
+ static struct lock_class_key kernel;
+
+ /*
+ * MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple
+ * bind requets at a time so get a bigger ring.
+ */
+ return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_512K,
+ I915_GEM_HWS_GGTT_BIND_ADDR,
+ &kernel, "ggtt_bind_context");
+}
+
+static struct intel_context *
create_kernel_context(struct intel_engine_cs *engine)
{
static struct lock_class_key kernel;
@@ -1441,7 +1450,7 @@ create_kernel_context(struct intel_engine_cs *engine)
*/
static int engine_init_common(struct intel_engine_cs *engine)
{
- struct intel_context *ce;
+ struct intel_context *ce, *bce = NULL;
int ret;
engine->set_default_submission(engine);
@@ -1457,17 +1466,34 @@ static int engine_init_common(struct intel_engine_cs *engine)
ce = create_kernel_context(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
+ /*
+ * Create a separate pinned context for GGTT update with blitter engine
+ * if a platform require such service. MI_UPDATE_GTT works on other
+ * engines as well but BCS should be less busy engine so pick that for
+ * GGTT updates.
+ */
+ if (i915_ggtt_require_binder(engine->i915) && engine->id == BCS0) {
+ bce = create_ggtt_bind_context(engine);
+ if (IS_ERR(bce)) {
+ ret = PTR_ERR(bce);
+ goto err_ce_context;
+ }
+ }
ret = measure_breadcrumb_dw(ce);
if (ret < 0)
- goto err_context;
+ goto err_bce_context;
engine->emit_fini_breadcrumb_dw = ret;
engine->kernel_context = ce;
+ engine->bind_context = bce;
return 0;
-err_context:
+err_bce_context:
+ if (bce)
+ intel_engine_destroy_pinned_context(bce);
+err_ce_context:
intel_engine_destroy_pinned_context(ce);
return ret;
}
@@ -1537,6 +1563,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
if (engine->kernel_context)
intel_engine_destroy_pinned_context(engine->kernel_context);
+ if (engine->bind_context)
+ intel_engine_destroy_pinned_context(engine->bind_context);
+
+
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
cleanup_status_page(engine);
@@ -1616,9 +1646,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
* Wa_22011802037: Prior to doing a reset, ensure CS is
* stopped, set ring stop bit and prefetch disable bit to halt CS
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index b538b5c04948..e91fc881dbf1 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
- if (IS_METEORLAKE(i915) && engine->id == GSC0) {
+ if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) {
intel_uncore_write(engine->gt->uncore,
RC_PSMI_CTRL_GSCCS,
_MASKED_BIT_DISABLE(IDLE_MSG_DISABLE));
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index 6b9d9f837669..fdd4ddd3a978 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -177,6 +177,7 @@
#define CTX_CTRL_RS_CTX_ENABLE REG_BIT(1)
#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
+#define GEN12_CTX_CTRL_RUNALONE_MODE REG_BIT(7)
#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8)
#define RING_CTX_SR_CTL(base) _MMIO((base) + 0x244)
#define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index a7e677598004..8769760257fd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -402,7 +402,15 @@ struct intel_engine_cs {
unsigned long context_tag;
- struct rb_node uabi_node;
+ /*
+ * The type evolves during initialization, see related comment for
+ * struct drm_i915_private's uabi_engines member.
+ */
+ union {
+ struct llist_node uabi_llist;
+ struct list_head uabi_list;
+ struct rb_node uabi_node;
+ };
struct intel_sseu sseu;
@@ -416,6 +424,9 @@ struct intel_engine_cs {
struct llist_head barrier_tasks;
struct intel_context *kernel_context; /* pinned */
+ struct intel_context *bind_context; /* pinned, only for BCS0 */
+ /* mark the bind context's availability status */
+ bool bind_context_ready;
/**
* pinned_contexts_list: List of pinned contexts. This list is only
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
index dcedff41a825..118164ddbb2e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_user.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -38,8 +38,7 @@ intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
void intel_engine_add_user(struct intel_engine_cs *engine)
{
- llist_add((struct llist_node *)&engine->uabi_node,
- (struct llist_head *)&engine->i915->uabi_engines);
+ llist_add(&engine->uabi_llist, &engine->i915->uabi_engines_llist);
}
static const u8 uabi_classes[] = {
@@ -54,9 +53,9 @@ static int engine_cmp(void *priv, const struct list_head *A,
const struct list_head *B)
{
const struct intel_engine_cs *a =
- container_of((struct rb_node *)A, typeof(*a), uabi_node);
+ container_of(A, typeof(*a), uabi_list);
const struct intel_engine_cs *b =
- container_of((struct rb_node *)B, typeof(*b), uabi_node);
+ container_of(B, typeof(*b), uabi_list);
if (uabi_classes[a->class] < uabi_classes[b->class])
return -1;
@@ -73,7 +72,7 @@ static int engine_cmp(void *priv, const struct list_head *A,
static struct llist_node *get_engines(struct drm_i915_private *i915)
{
- return llist_del_all((struct llist_head *)&i915->uabi_engines);
+ return llist_del_all(&i915->uabi_engines_llist);
}
static void sort_engines(struct drm_i915_private *i915,
@@ -83,9 +82,8 @@ static void sort_engines(struct drm_i915_private *i915,
llist_for_each_safe(pos, next, get_engines(i915)) {
struct intel_engine_cs *engine =
- container_of((struct rb_node *)pos, typeof(*engine),
- uabi_node);
- list_add((struct list_head *)&engine->uabi_node, engines);
+ container_of(pos, typeof(*engine), uabi_llist);
+ list_add(&engine->uabi_list, engines);
}
list_sort(NULL, engines, engine_cmp);
}
@@ -213,8 +211,7 @@ void intel_engines_driver_register(struct drm_i915_private *i915)
p = &i915->uabi_engines.rb_node;
list_for_each_safe(it, next, &engines) {
struct intel_engine_cs *engine =
- container_of((struct rb_node *)it, typeof(*engine),
- uabi_node);
+ container_of(it, typeof(*engine), uabi_list);
if (intel_gt_has_unrecoverable_error(engine->gt))
continue; /* ignore incomplete engines */
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 3292524469d5..e8f42ec6b1b4 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
* Wa_22011802037: In addition to stopping the cs, we need
* to wait for any pending mi force wakeups
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt))
intel_engine_wait_for_pending_mi_fw(engine);
engine->execlists.reset_ccid = active_ccid(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index da21f2786b5d..1c93e84278a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -15,18 +15,23 @@
#include "display/intel_display.h"
#include "gem/i915_gem_lmem.h"
+#include "intel_context.h"
#include "intel_ggtt_gmch.h"
+#include "intel_gpu_commands.h"
#include "intel_gt.h"
#include "intel_gt_regs.h"
#include "intel_pci_config.h"
+#include "intel_ring.h"
#include "i915_drv.h"
#include "i915_pci.h"
+#include "i915_request.h"
#include "i915_scatterlist.h"
#include "i915_utils.h"
#include "i915_vgpu.h"
#include "intel_gtt.h"
#include "gen8_ppgtt.h"
+#include "intel_engine_pm.h"
static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
unsigned long color,
@@ -201,22 +206,36 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
}
+static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm_if_active(uncore->rpm, wakeref) {
+ struct intel_guc *guc = &gt->uc.guc;
+
+ intel_guc_invalidate_tlb_guc(guc);
+ }
+}
+
static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
{
struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct intel_gt *gt;
gen8_ggtt_invalidate(ggtt);
- if (GRAPHICS_VER(i915) >= 12) {
- struct intel_gt *gt;
-
- list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
+ list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
+ if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc)) {
+ guc_ggtt_ct_invalidate(gt);
+ } else if (GRAPHICS_VER(i915) >= 12) {
intel_uncore_write_fw(gt->uncore,
GEN12_GUC_TLB_INV_CR,
GEN12_GUC_TLB_INV_CR_INVALIDATE);
- } else {
- intel_uncore_write_fw(ggtt->vm.gt->uncore,
- GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+ } else {
+ intel_uncore_write_fw(gt->uncore,
+ GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+ }
}
}
@@ -252,6 +271,145 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
return pte;
}
+static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
+{
+ struct intel_gt *gt = ggtt->vm.gt;
+
+ return intel_gt_is_bind_context_ready(gt);
+}
+
+static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
+{
+ struct intel_context *ce;
+ struct intel_gt *gt = ggtt->vm.gt;
+
+ if (intel_gt_is_wedged(gt))
+ return NULL;
+
+ ce = gt->engine[BCS0]->bind_context;
+ GEM_BUG_ON(!ce);
+
+ /*
+ * If the GT is not awake already at this stage then fallback
+ * to pci based GGTT update otherwise __intel_wakeref_get_first()
+ * would conflict with fs_reclaim trying to allocate memory while
+ * doing rpm_resume().
+ */
+ if (!intel_gt_pm_get_if_awake(gt))
+ return NULL;
+
+ intel_engine_pm_get(ce->engine);
+
+ return ce;
+}
+
+static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
+{
+ intel_engine_pm_put(ce->engine);
+ intel_gt_pm_put(ce->engine->gt);
+}
+
+static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
+ struct sg_table *pages, u32 num_entries,
+ const gen8_pte_t pte)
+{
+ struct i915_sched_attr attr = {};
+ struct intel_gt *gt = ggtt->vm.gt;
+ const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
+ struct sgt_iter iter;
+ struct i915_request *rq;
+ struct intel_context *ce;
+ u32 *cs;
+
+ if (!num_entries)
+ return true;
+
+ ce = gen8_ggtt_bind_get_ce(ggtt);
+ if (!ce)
+ return false;
+
+ if (pages)
+ iter = __sgt_iter(pages->sgl, true);
+
+ while (num_entries) {
+ int count = 0;
+ dma_addr_t addr;
+ /*
+ * MI_UPDATE_GTT can update 512 entries in a single command but
+ * that end up with engine reset, 511 works.
+ */
+ u32 n_ptes = min_t(u32, 511, num_entries);
+
+ if (mutex_lock_interruptible(&ce->timeline->mutex))
+ goto put_ce;
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
+ intel_context_exit(ce);
+ if (IS_ERR(rq)) {
+ GT_TRACE(gt, "Failed to get bind request\n");
+ mutex_unlock(&ce->timeline->mutex);
+ goto put_ce;
+ }
+
+ cs = intel_ring_begin(rq, 2 * n_ptes + 2);
+ if (IS_ERR(cs)) {
+ GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
+ i915_request_set_error_once(rq, PTR_ERR(cs));
+ /* once a request is created, it must be queued */
+ goto queue_err_rq;
+ }
+
+ *cs++ = MI_UPDATE_GTT | (2 * n_ptes);
+ *cs++ = offset << 12;
+
+ if (pages) {
+ for_each_sgt_daddr_next(addr, iter) {
+ if (count == n_ptes)
+ break;
+ *cs++ = lower_32_bits(pte | addr);
+ *cs++ = upper_32_bits(pte | addr);
+ count++;
+ }
+ /* fill remaining with scratch pte, if any */
+ if (count < n_ptes) {
+ memset64((u64 *)cs, scratch_pte,
+ n_ptes - count);
+ cs += (n_ptes - count) * 2;
+ }
+ } else {
+ memset64((u64 *)cs, pte, n_ptes);
+ cs += n_ptes * 2;
+ }
+
+ intel_ring_advance(rq, cs);
+queue_err_rq:
+ i915_request_get(rq);
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+
+ mutex_unlock(&ce->timeline->mutex);
+ /* This will break if the request is complete or after engine reset */
+ i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+ if (rq->fence.error)
+ goto err_rq;
+
+ i915_request_put(rq);
+
+ num_entries -= n_ptes;
+ offset += n_ptes;
+ }
+
+ gen8_ggtt_bind_put_ce(ce);
+ return true;
+
+err_rq:
+ i915_request_put(rq);
+put_ce:
+ gen8_ggtt_bind_put_ce(ce);
+ return false;
+}
+
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
{
writeq(pte, addr);
@@ -272,6 +430,21 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm,
ggtt->invalidate(ggtt);
}
+static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
+ dma_addr_t addr, u64 offset,
+ unsigned int pat_index, u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t pte;
+
+ pte = ggtt->vm.pte_encode(addr, pat_index, flags);
+ if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
+ gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
+ return ggtt->invalidate(ggtt);
+
+ gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
+}
+
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
struct i915_vma_resource *vma_res,
unsigned int pat_index,
@@ -311,6 +484,50 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
ggtt->invalidate(ggtt);
}
+static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ unsigned int pat_index, u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+ gen8_pte_t pte_encode;
+ u64 start, end;
+
+ pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
+ start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
+ end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
+ if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
+ goto err;
+
+ start = end;
+ end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
+ if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
+ vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
+ goto err;
+
+ start += vma_res->node_size / I915_GTT_PAGE_SIZE;
+ if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
+ goto err;
+
+ return true;
+
+err:
+ return false;
+}
+
+static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
+ struct i915_vma_resource *vma_res,
+ unsigned int pat_index, u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+ if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
+ __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
+ return ggtt->invalidate(ggtt);
+
+ gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
+}
+
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
u64 start, u64 length)
{
@@ -332,6 +549,27 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
gen8_set_pte(&gtt_base[i], scratch_pte);
}
+static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry,
+ NULL, num_entries, scratch_pte))
+ return ggtt->invalidate(ggtt);
+
+ gen8_ggtt_clear_range(vm, start, length);
+}
+
static void gen6_ggtt_insert_page(struct i915_address_space *vm,
dma_addr_t addr,
u64 offset,
@@ -1008,7 +1246,18 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
}
- if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
+ if (i915_ggtt_require_binder(i915)) {
+ ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
+ ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
+ ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
+ /*
+ * On GPU is hung, we might bind VMAs for error capture.
+ * Fallback to CPU GGTT updates in that case.
+ */
+ ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
+ }
+
+ if (intel_uc_wants_guc_submission(&ggtt->vm.gt->uc))
ggtt->invalidate = guc_ggtt_invalidate;
else
ggtt->invalidate = gen8_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c
index bcc3605158db..6d440de8ba01 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.c
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.c
@@ -11,6 +11,7 @@
#include "gem/i915_gem_region.h"
#include "gt/intel_gsc.h"
#include "gt/intel_gt.h"
+#include "gt/intel_gt_print.h"
#define GSC_BAR_LENGTH 0x00000FFC
@@ -49,13 +50,13 @@ gsc_ext_om_alloc(struct intel_gsc *gsc, struct intel_gsc_intf *intf, size_t size
I915_BO_ALLOC_CONTIGUOUS |
I915_BO_ALLOC_CPU_CLEAR);
if (IS_ERR(obj)) {
- drm_err(&gt->i915->drm, "Failed to allocate gsc memory\n");
+ gt_err(gt, "Failed to allocate gsc memory\n");
return PTR_ERR(obj);
}
err = i915_gem_object_pin_pages_unlocked(obj);
if (err) {
- drm_err(&gt->i915->drm, "Failed to pin pages for gsc memory\n");
+ gt_err(gt, "Failed to pin pages for gsc memory\n");
goto out_put;
}
@@ -286,12 +287,12 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id)
int ret;
if (intf_id >= INTEL_GSC_NUM_INTERFACES) {
- drm_warn_once(&gt->i915->drm, "GSC irq: intf_id %d is out of range", intf_id);
+ gt_warn_once(gt, "GSC irq: intf_id %d is out of range", intf_id);
return;
}
if (!HAS_HECI_GSC(gt->i915)) {
- drm_warn_once(&gt->i915->drm, "GSC irq: not supported");
+ gt_warn_once(gt, "GSC irq: not supported");
return;
}
@@ -300,7 +301,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id)
ret = generic_handle_irq(gt->gsc.intf[intf_id].irq);
if (ret)
- drm_err_ratelimited(&gt->i915->drm, "error handling GSC irq: %d\n", ret);
+ gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret);
}
void intel_gsc_irq_handler(struct intel_gt *gt, u32 iir)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 449f0b7fc843..ed32bf5b1546 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -62,7 +62,13 @@ void intel_gt_common_init_early(struct intel_gt *gt)
/* Preliminary initialization of Tile 0 */
int intel_root_gt_init_early(struct drm_i915_private *i915)
{
- struct intel_gt *gt = to_gt(i915);
+ struct intel_gt *gt;
+
+ gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL);
+ if (!gt)
+ return -ENOMEM;
+
+ i915->gt[0] = gt;
gt->i915 = i915;
gt->uncore = &i915->uncore;
@@ -262,10 +268,21 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
I915_MASTER_ERROR_INTERRUPT);
}
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ /*
+ * For the media GT, this ring fault register is not replicated,
+ * so don't do multicast/replicated register read/write operation on it.
+ */
+ if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) {
+ intel_uncore_rmw(uncore, XELPMP_RING_FAULT_REG,
+ RING_FAULT_VALID, 0);
+ intel_uncore_posting_read(uncore,
+ XELPMP_RING_FAULT_REG);
+
+ } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
RING_FAULT_VALID, 0);
intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+
} else if (GRAPHICS_VER(i915) >= 12) {
intel_uncore_rmw(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID, 0);
intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
@@ -911,8 +928,6 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
if (ret)
return ret;
- i915->gt[0] = gt;
-
if (!HAS_EXTRA_GT_LIST(i915))
return 0;
@@ -1019,3 +1034,57 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
else
return I915_MAP_WC;
}
+
+bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
+{
+ return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
+}
+
+static void __intel_gt_bind_context_set_ready(struct intel_gt *gt, bool ready)
+{
+ struct intel_engine_cs *engine = gt->engine[BCS0];
+
+ if (engine && engine->bind_context)
+ engine->bind_context_ready = ready;
+}
+
+/**
+ * intel_gt_bind_context_set_ready - Set the context binding as ready
+ *
+ * @gt: GT structure
+ *
+ * This function marks the binder context as ready.
+ */
+void intel_gt_bind_context_set_ready(struct intel_gt *gt)
+{
+ __intel_gt_bind_context_set_ready(gt, true);
+}
+
+/**
+ * intel_gt_bind_context_set_unready - Set the context binding as ready
+ * @gt: GT structure
+ *
+ * This function marks the binder context as not ready.
+ */
+
+void intel_gt_bind_context_set_unready(struct intel_gt *gt)
+{
+ __intel_gt_bind_context_set_ready(gt, false);
+}
+
+/**
+ * intel_gt_is_bind_context_ready - Check if context binding is ready
+ *
+ * @gt: GT structure
+ *
+ * This function returns binder context's ready status.
+ */
+bool intel_gt_is_bind_context_ready(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine = gt->engine[BCS0];
+
+ if (engine)
+ return engine->bind_context_ready;
+
+ return false;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 6c34547b58b5..970bedf6b78a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -6,7 +6,6 @@
#ifndef __INTEL_GT__
#define __INTEL_GT__
-#include "i915_drv.h"
#include "intel_engine_types.h"
#include "intel_gt_types.h"
#include "intel_reset.h"
@@ -14,6 +13,69 @@
struct drm_i915_private;
struct drm_printer;
+/*
+ * Check that the GT is a graphics GT and has an IP version within the
+ * specified range (inclusive).
+ */
+#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \
+ BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \
+ BUILD_BUG_ON_ZERO((until) < (from)) + \
+ ((gt)->type != GT_MEDIA && \
+ GRAPHICS_VER_FULL((gt)->i915) >= (from) && \
+ GRAPHICS_VER_FULL((gt)->i915) <= (until)))
+
+/*
+ * Check that the GT is a media GT and has an IP version within the
+ * specified range (inclusive).
+ *
+ * Only usable on platforms with a standalone media design (i.e., IP version 13
+ * and higher).
+ */
+#define IS_MEDIA_GT_IP_RANGE(gt, from, until) ( \
+ BUILD_BUG_ON_ZERO((from) < IP_VER(13, 0)) + \
+ BUILD_BUG_ON_ZERO((until) < (from)) + \
+ ((gt) && (gt)->type == GT_MEDIA && \
+ MEDIA_VER_FULL((gt)->i915) >= (from) && \
+ MEDIA_VER_FULL((gt)->i915) <= (until)))
+
+/*
+ * Check that the GT is a graphics GT with a specific IP version and has
+ * a stepping in the range [from, until). The lower stepping bound is
+ * inclusive, the upper bound is exclusive. The most common use-case of this
+ * macro is for checking bounds for workarounds, which usually have a stepping
+ * ("from") at which the hardware issue is first present and another stepping
+ * ("until") at which a hardware fix is present and the software workaround is
+ * no longer necessary. E.g.,
+ *
+ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)
+ * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER)
+ *
+ * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper
+ * stepping bound for the specified IP version.
+ */
+#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \
+ BUILD_BUG_ON_ZERO((until) <= (from)) + \
+ (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \
+ IS_GRAPHICS_STEP((gt)->i915, (from), (until))))
+
+/*
+ * Check that the GT is a media GT with a specific IP version and has
+ * a stepping in the range [from, until). The lower stepping bound is
+ * inclusive, the upper bound is exclusive. The most common use-case of this
+ * macro is for checking bounds for workarounds, which usually have a stepping
+ * ("from") at which the hardware issue is first present and another stepping
+ * ("until") at which a hardware fix is present and the software workaround is
+ * no longer necessary. "STEP_FOREVER" can be passed as "until" for
+ * workarounds that have no upper stepping bound for the specified IP version.
+ *
+ * This macro may only be used to match on platforms that have a standalone
+ * media design (i.e., media version 13 or higher).
+ */
+#define IS_MEDIA_GT_IP_STEP(gt, ipver, from, until) ( \
+ BUILD_BUG_ON_ZERO((until) <= (from)) + \
+ (IS_MEDIA_GT_IP_RANGE((gt), (ipver), (ipver)) && \
+ IS_MEDIA_STEP((gt)->i915, (from), (until))))
+
#define GT_TRACE(gt, fmt, ...) do { \
const struct intel_gt *gt__ __maybe_unused = (gt); \
GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
@@ -25,10 +87,7 @@ static inline bool gt_is_root(struct intel_gt *gt)
return !gt->info.id;
}
-static inline bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
-{
- return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
-}
+bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
@@ -117,4 +176,7 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
struct drm_i915_gem_object *obj,
bool always_coherent);
+void intel_gt_bind_context_set_ready(struct intel_gt *gt);
+void intel_gt_bind_context_set_unready(struct intel_gt *gt);
+bool intel_gt_is_bind_context_ready(struct intel_gt *gt);
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 2c0f1f3e28ff..34913912d8ae 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -4,7 +4,7 @@
*/
#include "i915_drv.h"
-
+#include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
@@ -166,8 +166,8 @@ void intel_gt_mcr_init(struct intel_gt *gt)
gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
/* Wa_14016747170 */
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK,
intel_uncore_read(gt->uncore,
MTL_GT_ACTIVITY_FACTOR));
@@ -440,6 +440,28 @@ void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags)
}
/**
+ * intel_gt_mcr_lock_sanitize - Sanitize MCR steering lock
+ * @gt: GT structure
+ *
+ * This will be used to sanitize the initial status of the hardware lock
+ * during driver load and resume since there won't be any concurrent access
+ * from other agents at those times, but it's possible that boot firmware
+ * may have left the lock in a bad state.
+ *
+ */
+void intel_gt_mcr_lock_sanitize(struct intel_gt *gt)
+{
+ /*
+ * This gets called at load/resume time, so we shouldn't be
+ * racing with other driver threads grabbing the mcr lock.
+ */
+ lockdep_assert_not_held(&gt->mcr_lock);
+
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1);
+}
+
+/**
* intel_gt_mcr_read - read a specific instance of an MCR register
* @gt: GT structure
* @reg: the MCR register to read
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
index 41684495b7da..01ac565a56a4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
@@ -11,6 +11,7 @@
void intel_gt_mcr_init(struct intel_gt *gt);
void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags);
void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags);
+void intel_gt_mcr_lock_sanitize(struct intel_gt *gt);
u32 intel_gt_mcr_read(struct intel_gt *gt,
i915_mcr_reg_t reg,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 5a942af0a14e..f5899d503e23 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -13,6 +13,7 @@
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_clock_utils.h"
+#include "intel_gt_mcr.h"
#include "intel_gt_pm.h"
#include "intel_gt_print.h"
#include "intel_gt_requests.h"
@@ -216,6 +217,21 @@ void intel_gt_pm_fini(struct intel_gt *gt)
intel_rc6_fini(&gt->rc6);
}
+void intel_gt_resume_early(struct intel_gt *gt)
+{
+ /*
+ * Sanitize steer semaphores during driver resume. This is necessary
+ * to address observed cases of steer semaphores being
+ * held after a suspend operation. Confirmation from the hardware team
+ * assures the safety of this operation, as no lock acquisitions
+ * by other agents occur during driver load/resume process.
+ */
+ intel_gt_mcr_lock_sanitize(gt);
+
+ intel_uncore_resume_early(gt->uncore);
+ intel_gt_check_and_clear_faults(gt);
+}
+
int intel_gt_resume(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
@@ -280,6 +296,7 @@ int intel_gt_resume(struct intel_gt *gt)
out_fw:
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
intel_gt_pm_put(gt);
+ intel_gt_bind_context_set_ready(gt);
return err;
err_wedged:
@@ -306,6 +323,7 @@ static void wait_for_suspend(struct intel_gt *gt)
void intel_gt_suspend_prepare(struct intel_gt *gt)
{
+ intel_gt_bind_context_set_unready(gt);
user_forcewake(gt, true);
wait_for_suspend(gt);
}
@@ -359,6 +377,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
void intel_gt_runtime_suspend(struct intel_gt *gt)
{
+ intel_gt_bind_context_set_unready(gt);
intel_uc_runtime_suspend(&gt->uc);
GT_TRACE(gt, "\n");
@@ -376,6 +395,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
if (ret)
return ret;
+ intel_gt_bind_context_set_ready(gt);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 6c9a46452364..b1eeb5b33918 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -78,6 +78,7 @@ void intel_gt_pm_fini(struct intel_gt *gt);
void intel_gt_suspend_prepare(struct intel_gt *gt);
void intel_gt_suspend_late(struct intel_gt *gt);
int intel_gt_resume(struct intel_gt *gt);
+void intel_gt_resume_early(struct intel_gt *gt);
void intel_gt_runtime_suspend(struct intel_gt *gt);
int intel_gt_runtime_resume(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 357e2f865727..f900cc68d6d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -290,7 +290,6 @@ static int mtl_drpc(struct seq_file *m)
seq_puts(m, "RC6\n");
break;
default:
- MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status));
seq_puts(m, "Unknown\n");
break;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_print.h b/drivers/gpu/drm/i915/gt/intel_gt_print.h
index 55a336a9ff06..7fdc78c79273 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_print.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_print.h
@@ -16,6 +16,9 @@
#define gt_warn(_gt, _fmt, ...) \
drm_warn(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+#define gt_warn_once(_gt, _fmt, ...) \
+ drm_warn_once(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+
#define gt_notice(_gt, _fmt, ...) \
drm_notice(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 2cdfb2f713d0..eecd0a87a647 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -26,7 +26,7 @@
#define MTL_CAGF_MASK REG_GENMASK(8, 0)
#define MTL_CC0 0x0
#define MTL_CC6 0x3
-#define MTL_CC_MASK REG_GENMASK(12, 9)
+#define MTL_CC_MASK REG_GENMASK(10, 9)
/* RPM unit config (Gen8+) */
#define RPM_CONFIG0 _MMIO(0xd00)
@@ -164,6 +164,8 @@
#define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20d4)
#define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
#define GEN11_ENABLE_32_PLANE_MODE (1 << 7)
+#define GEN12_CS_DEBUG_MODE2 _MMIO(0x20d8)
+#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6)
#define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0)
#define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1 << 14)
@@ -412,9 +414,6 @@
#define XEHP_CULLBIT1 MCR_REG(0x6100)
-#define CHICKEN_RASTER_1 MCR_REG(0x6204)
-#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8)
-
#define CHICKEN_RASTER_2 MCR_REG(0x6208)
#define TBIMR_FAST_CLIP REG_BIT(5)
@@ -1085,6 +1084,7 @@
#define GEN12_RING_FAULT_REG _MMIO(0xcec4)
#define XEHP_RING_FAULT_REG MCR_REG(0xcec4)
+#define XELPMP_RING_FAULT_REG _MMIO(0xcec4)
#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7)
#define RING_FAULT_GTTSEL_MASK (1 << 11)
#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff)
@@ -1221,6 +1221,8 @@
#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0)
#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11)
+#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3)
+
#define ICL_HDC_MODE MCR_REG(0xe5f4)
#define EU_PERF_CNTL2 PERF_REG(0xe658)
@@ -1231,6 +1233,7 @@
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4)
+#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32)
#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 13944a14ea2d..4fbed27ef0ec 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -21,6 +21,11 @@
#include "intel_gt_regs.h"
#include "intel_gtt.h"
+bool i915_ggtt_require_binder(struct drm_i915_private *i915)
+{
+ /* Wa_13010847436 & Wa_14019519902 */
+ return MEDIA_VER_FULL(i915) == IP_VER(13, 0);
+}
static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 4d6296cdbcfd..b471edac2699 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -35,9 +35,9 @@
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
-#define DBG(...) trace_printk(__VA_ARGS__)
+#define GTT_TRACE(...) trace_printk(__VA_ARGS__)
#else
-#define DBG(...)
+#define GTT_TRACE(...)
#endif
#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */
@@ -171,6 +171,9 @@ struct intel_gt;
#define for_each_sgt_daddr(__dp, __iter, __sgt) \
__for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
+#define for_each_sgt_daddr_next(__dp, __iter) \
+ __for_each_daddr_next(__dp, __iter, I915_GTT_PAGE_SIZE)
+
struct i915_page_table {
struct drm_i915_gem_object *base;
union {
@@ -688,4 +691,6 @@ static inline struct sgt_dma {
return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) };
}
+bool i915_ggtt_require_binder(struct drm_i915_private *i915);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c378cc7c953c..eaf66d903166 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -845,6 +845,27 @@ lrc_setup_indirect_ctx(u32 *regs,
lrc_ring_indirect_offset_default(engine) << 6;
}
+static bool ctx_needs_runalone(const struct intel_context *ce)
+{
+ struct i915_gem_context *gem_ctx;
+ bool ctx_is_protected = false;
+
+ /*
+ * On MTL and newer platforms, protected contexts require setting
+ * the LRC run-alone bit or else the encryption will not happen.
+ */
+ if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) &&
+ (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) {
+ rcu_read_lock();
+ gem_ctx = rcu_dereference(ce->gem_context);
+ if (gem_ctx)
+ ctx_is_protected = gem_ctx->uses_protected_content;
+ rcu_read_unlock();
+ }
+
+ return ctx_is_protected;
+}
+
static void init_common_regs(u32 * const regs,
const struct intel_context *ce,
const struct intel_engine_cs *engine,
@@ -860,6 +881,8 @@ static void init_common_regs(u32 * const regs,
if (GRAPHICS_VER(engine->i915) < 11)
ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
+ if (ctx_needs_runalone(ce))
+ ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE);
regs[CTX_CONTEXT_CONTROL] = ctl;
regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
@@ -1317,29 +1340,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
}
/*
- * On DG2 during context restore of a preempted context in GPGPU mode,
- * RCS restore hang is detected. This is extremely timing dependent.
- * To address this below sw wabb is implemented for DG2 A steppings.
- */
-static u32 *
-dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
-{
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base));
- *cs++ = 0x21;
-
- *cs++ = MI_LOAD_REGISTER_REG;
- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1);
-
- *cs++ = MI_LOAD_REGISTER_REG;
- *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
- *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2);
-
- return cs;
-}
-
-/*
* The bspec's tuning guide asks us to program a vertical watermark value of
* 0x3FF. However this register is not saved/restored properly by the
* hardware, so we're required to apply the desired value via INDIRECT_CTX
@@ -1357,27 +1357,34 @@ dg2_emit_draw_watermark_setting(u32 *cs)
}
static u32 *
+gen12_invalidate_state_cache(u32 *cs)
+{
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
+ *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
+ return cs;
+}
+
+static u32 *
gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
{
cs = gen12_emit_timestamp_wa(ce, cs);
cs = gen12_emit_cmd_buf_wa(ce, cs);
cs = gen12_emit_restore_scratch(ce, cs);
- /* Wa_22011450934:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0))
- cs = dg2_emit_rcs_hang_wabb(ce, cs);
-
/* Wa_16013000631:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(ce->engine->i915))
+ if (IS_DG2_G11(ce->engine->i915))
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
cs = gen12_emit_aux_table_inv(ce->engine, cs);
+ /* Wa_18022495364 */
+ if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10)))
+ cs = gen12_invalidate_state_cache(cs);
+
/* Wa_16014892111 */
- if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
IS_DG2(ce->engine->i915))
cs = dg2_emit_draw_watermark_setting(cs);
@@ -1391,8 +1398,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
cs = gen12_emit_restore_scratch(ce, cs);
/* Wa_16013000631:dg2 */
- if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(ce->engine->i915))
+ if (IS_DG2_G11(ce->engine->i915))
if (ce->engine->class == COMPUTE_CLASS)
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 2c014407225c..353f93baaca0 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = {
MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
};
-static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = {
- /* Wa_14011441408: Set Go to Memory for MOCS#0 */
- MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Coherent; GO:Memory */
- MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
- /* UC - Non-Coherent; GO:Memory */
- MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)),
-
- /* WB - LC */
- MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)),
-};
-
static const struct drm_i915_mocs_entry pvc_mocs_table[] = {
/* Error */
MOCS_ENTRY(0, 0, L3_3_WB),
@@ -499,7 +487,7 @@ static bool has_mocs(const struct drm_i915_private *i915)
return !IS_DGFX(i915);
}
-static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
+static unsigned int get_mocs_settings(struct drm_i915_private *i915,
struct drm_i915_mocs_table *table)
{
unsigned int flags;
@@ -507,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
memset(table, 0, sizeof(struct drm_i915_mocs_table));
table->unused_entries_index = I915_MOCS_PTE;
- if (IS_METEORLAKE(i915)) {
+ if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) {
table->size = ARRAY_SIZE(mtl_mocs_table);
table->table = mtl_mocs_table;
table->n_entries = MTL_NUM_MOCS_ENTRIES;
@@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
table->wb_index = 2;
table->unused_entries_index = 2;
} else if (IS_DG2(i915)) {
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
- table->table = dg2_mocs_table_g10_ax;
- } else {
- table->size = ARRAY_SIZE(dg2_mocs_table);
- table->table = dg2_mocs_table;
- }
+ table->size = ARRAY_SIZE(dg2_mocs_table);
+ table->table = dg2_mocs_table;
table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->unused_entries_index = 3;
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 58bb1c55294c..8b67abd720be 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_EI_MODE(1);
/*
- * Wa_16011777198 and BSpec 52698 - Render powergating must be off.
+ * BSpec 52698 - Render powergating must be off.
* FIXME BSpec is outdated, disabling powergating for MTL is just
* temporary wa and should be removed after fixing real cause
* of forcewake timeouts.
*/
- if (IS_METEORLAKE(gt->i915) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
pg_enable =
GEN9_MEDIA_PG_ENABLE |
GEN11_MEDIA_SAMPLER_PG_ENABLE;
@@ -526,8 +524,7 @@ static bool rc6_supported(struct intel_rc6 *rc6)
return false;
}
- if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) &&
- gt->type == GT_MEDIA) {
+ if (IS_MEDIA_GT_IP_STEP(gt, IP_VER(13, 0), STEP_A0, STEP_B0)) {
drm_notice(&i915->drm,
"Media RC6 disabled on A step\n");
return false;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index cc6bd21a3e51..d5ed904f355d 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -26,6 +26,7 @@
#include "intel_engine_regs.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_print.h"
#include "intel_gt_requests.h"
#include "intel_mchbar_regs.h"
#include "intel_pci_config.h"
@@ -161,16 +162,16 @@ static int i915_do_reset(struct intel_gt *gt,
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
int err;
- /* Assert reset for at least 20 usec, and wait for acknowledgement. */
+ /* Assert reset for at least 50 usec, and wait for acknowledgement. */
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
udelay(50);
- err = wait_for_atomic(i915_in_reset(pdev), 50);
+ err = _wait_for_atomic(i915_in_reset(pdev), 50000, 0);
/* Clear the reset request. */
pci_write_config_byte(pdev, I915_GDRST, 0);
udelay(50);
if (!err)
- err = wait_for_atomic(!i915_in_reset(pdev), 50);
+ err = _wait_for_atomic(!i915_in_reset(pdev), 50000, 0);
return err;
}
@@ -190,7 +191,7 @@ static int g33_do_reset(struct intel_gt *gt,
struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev);
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
- return wait_for_atomic(g4x_reset_complete(pdev), 50);
+ return _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
}
static int g4x_do_reset(struct intel_gt *gt,
@@ -207,7 +208,7 @@ static int g4x_do_reset(struct intel_gt *gt,
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_MEDIA | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
if (ret) {
GT_TRACE(gt, "Wait for media reset failed\n");
goto out;
@@ -215,7 +216,7 @@ static int g4x_do_reset(struct intel_gt *gt,
pci_write_config_byte(pdev, I915_GDRST,
GRDOM_RENDER | GRDOM_RESET_ENABLE);
- ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
+ ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0);
if (ret) {
GT_TRACE(gt, "Wait for render reset failed\n");
goto out;
@@ -592,10 +593,10 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
ret = __intel_wait_for_register_fw(uncore, reg, mask, ack,
700, 0, NULL);
if (ret)
- drm_err(&engine->i915->drm,
- "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
- engine->name, request,
- intel_uncore_read_fw(uncore, reg));
+ gt_err(engine->gt,
+ "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n",
+ engine->name, request,
+ intel_uncore_read_fw(uncore, reg));
return ret;
}
@@ -705,7 +706,7 @@ static int __reset_guc(struct intel_gt *gt)
static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
- if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0))
+ if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0))
return false;
if (!__HAS_ENGINE(engine_mask, GSC0))
@@ -785,9 +786,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
reset_mask = wa_14015076503_start(gt, engine_mask, !retry);
GT_TRACE(gt, "engine_mask=%x\n", reset_mask);
- preempt_disable();
ret = reset(gt, reset_mask, retry);
- preempt_enable();
wa_14015076503_end(gt, reset_mask);
}
@@ -1201,17 +1200,16 @@ void intel_gt_reset(struct intel_gt *gt,
goto unlock;
if (reason)
- drm_notice(&gt->i915->drm,
- "Resetting chip for %s\n", reason);
+ gt_notice(gt, "Resetting chip for %s\n", reason);
atomic_inc(&gt->i915->gpu_error.reset_count);
awake = reset_prepare(gt);
if (!intel_has_gpu_reset(gt)) {
if (gt->i915->params.reset)
- drm_err(&gt->i915->drm, "GPU reset not supported\n");
+ gt_err(gt, "GPU reset not supported\n");
else
- drm_dbg(&gt->i915->drm, "GPU reset disabled\n");
+ gt_dbg(gt, "GPU reset disabled\n");
goto error;
}
@@ -1219,7 +1217,7 @@ void intel_gt_reset(struct intel_gt *gt,
intel_runtime_pm_disable_interrupts(gt->i915);
if (do_reset(gt, stalled_mask)) {
- drm_err(&gt->i915->drm, "Failed to reset chip\n");
+ gt_err(gt, "Failed to reset chip\n");
goto taint;
}
@@ -1238,9 +1236,7 @@ void intel_gt_reset(struct intel_gt *gt,
*/
ret = intel_gt_init_hw(gt);
if (ret) {
- drm_err(&gt->i915->drm,
- "Failed to initialise HW following reset (%d)\n",
- ret);
+ gt_err(gt, "Failed to initialise HW following reset (%d)\n", ret);
goto taint;
}
@@ -1607,9 +1603,7 @@ static void intel_wedge_me(struct work_struct *work)
{
struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
- drm_err(&w->gt->i915->drm,
- "%s timed out, cancelling all in-flight rendering.\n",
- w->name);
+ gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name);
intel_gt_set_wedged(w->gt);
}
@@ -1632,6 +1626,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w)
w->gt = NULL;
}
+/*
+ * Wa_22011802037 requires that we (or the GuC) ensure that no command
+ * streamers are executing MI_FORCE_WAKE while an engine reset is initiated.
+ */
+bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt)
+{
+ if (GRAPHICS_VER(gt->i915) < 11)
+ return false;
+
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0))
+ return true;
+
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ return false;
+
+ return true;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_reset.c"
#include "selftest_hangcheck.c"
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 25c975b6e8fc..f615b30b81c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
bool intel_has_gpu_reset(const struct intel_gt *gt);
bool intel_has_reset_engine(const struct intel_gt *gt);
+bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt);
+
#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 092542f53aad..4feef874e6d6 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1161,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
{
struct drm_i915_private *i915 = rps_to_i915(rps);
- if (IS_METEORLAKE(i915))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
return mtl_get_freq_caps(rps, caps);
else
return __gen6_rps_get_freq_caps(rps, caps);
diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c
index 139608c30d97..4bb13d1890e3 100644
--- a/drivers/gpu/drm/i915/gt/intel_tlb.c
+++ b/drivers/gpu/drm/i915/gt/intel_tlb.c
@@ -12,6 +12,7 @@
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_tlb.h"
+#include "uc/intel_guc.h"
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -131,11 +132,24 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
return;
with_intel_gt_pm_if_awake(gt, wakeref) {
+ struct intel_guc *guc = &gt->uc.guc;
+
mutex_lock(&gt->tlb.invalidate_lock);
if (tlb_seqno_passed(gt, seqno))
goto unlock;
- mmio_invalidate_full(gt);
+ if (HAS_GUC_TLB_INVALIDATION(gt->i915)) {
+ /*
+ * Only perform GuC TLB invalidation if GuC is ready.
+ * The only time GuC could not be ready is on GT reset,
+ * which would clobber all the TLBs anyways, making
+ * any TLB invalidation path here unnecessary.
+ */
+ if (intel_guc_is_ready(guc))
+ intel_guc_invalidate_tlb_engines(guc);
+ } else {
+ mmio_invalidate_full(gt);
+ }
write_seqcount_invalidate(&gt->tlb.seqno);
unlock:
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 3ae0dbd39eaa..192ac0e59afa 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -11,6 +11,7 @@
#include "intel_gpu_commands.h"
#include "intel_gt.h"
#include "intel_gt_mcr.h"
+#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_ring.h"
#include "intel_workarounds.h"
@@ -119,8 +120,8 @@ static void wa_init_finish(struct i915_wa_list *wal)
if (!wal->count)
return;
- drm_dbg(&wal->gt->i915->drm, "Initialized %u %s workarounds on %s\n",
- wal->wa_count, wal->name, wal->engine_name);
+ gt_dbg(wal->gt, "Initialized %u %s workarounds on %s\n",
+ wal->wa_count, wal->name, wal->engine_name);
}
static enum forcewake_domains
@@ -764,68 +765,41 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
{
dg2_ctx_gt_tuning_init(engine, wal);
- /* Wa_16011186671:dg2_g11 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
- wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
- wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010469329:dg2_g10 */
- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
- XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
-
- /*
- * Wa_22010465075:dg2_g10
- * Wa_22010613112:dg2_g10
- * Wa_14010698770:dg2_g10
- */
- wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
- GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
- }
-
/* Wa_16013271637:dg2 */
wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
/* Wa_14014947963:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
- wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
+ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
/* Wa_18018764978:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) ||
- IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915))
- wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
-
- /* Wa_15010599737:dg2 */
- wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN);
+ wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
/* Wa_18019271663:dg2 */
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
}
-static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
+static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
dg2_ctx_gt_tuning_init(engine, wal);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
}
-static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine,
- struct i915_wa_list *wal)
+static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
- mtl_ctx_gt_tuning_init(engine, wal);
+ xelpg_ctx_gt_tuning_init(engine, wal);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_14014947963 */
wa_masked_field_set(wal, VF_PREEMPTION,
PREEMPTION_VERTEX_COUNT, 0x4000);
@@ -931,8 +905,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
goto done;
- if (IS_METEORLAKE(i915))
- mtl_ctx_workarounds_init(engine, wal);
+ if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
+ xelpg_ctx_workarounds_init(engine, wal);
else if (IS_PONTEVECCHIO(i915))
; /* noop; none at this time */
else if (IS_DG2(i915))
@@ -1606,31 +1580,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
static void
dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- struct intel_engine_cs *engine;
- int id;
-
xehp_init_mcr(gt, wal);
/* Wa_14011060649:dg2 */
wa_14011060649(gt, wal);
- /*
- * Although there are per-engine instances of these registers,
- * they technically exist outside the engine itself and are not
- * impacted by engine resets. Furthermore, they're part of the
- * GuC blacklist so trying to treat them as engine workarounds
- * will result in GuC initialization failure and a wedged GPU.
- */
- for_each_engine(engine, gt, id) {
- if (engine->class != VIDEO_DECODE_CLASS)
- continue;
-
- /* Wa_16010515920:dg2_g10 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
- wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
- ALNUNIT_CLKGATE_DIS);
- }
-
if (IS_DG2_G10(gt->i915)) {
/* Wa_22010523718:dg2 */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
@@ -1641,70 +1595,15 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
DSS_ROUTER_CLKGATE_DIS);
}
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012362059:dg2 */
- wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
- }
-
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
- /* Wa_14010948348:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
-
- /* Wa_14011037102:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
-
- /* Wa_14011371254:dg2_g10 */
- wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
-
- /* Wa_14011431319:dg2_g10 */
- wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
- GAMTLBVDBOX7_CLKGATE_DIS |
- GAMTLBVDBOX6_CLKGATE_DIS |
- GAMTLBVDBOX5_CLKGATE_DIS |
- GAMTLBVDBOX4_CLKGATE_DIS |
- GAMTLBVDBOX3_CLKGATE_DIS |
- GAMTLBVDBOX2_CLKGATE_DIS |
- GAMTLBVDBOX1_CLKGATE_DIS |
- GAMTLBVDBOX0_CLKGATE_DIS |
- GAMTLBKCR_CLKGATE_DIS |
- GAMTLBGUC_CLKGATE_DIS |
- GAMTLBBLT_CLKGATE_DIS);
- wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
- GAMTLBGFXA1_CLKGATE_DIS |
- GAMTLBCOMPA0_CLKGATE_DIS |
- GAMTLBCOMPA1_CLKGATE_DIS |
- GAMTLBCOMPB0_CLKGATE_DIS |
- GAMTLBCOMPB1_CLKGATE_DIS |
- GAMTLBCOMPC0_CLKGATE_DIS |
- GAMTLBCOMPC1_CLKGATE_DIS |
- GAMTLBCOMPD0_CLKGATE_DIS |
- GAMTLBCOMPD1_CLKGATE_DIS |
- GAMTLBMERT_CLKGATE_DIS |
- GAMTLBVEBOX3_CLKGATE_DIS |
- GAMTLBVEBOX2_CLKGATE_DIS |
- GAMTLBVEBOX1_CLKGATE_DIS |
- GAMTLBVEBOX0_CLKGATE_DIS);
-
- /* Wa_14010569222:dg2_g10 */
- wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
- GAMEDIA_CLKGATE_DIS);
-
- /* Wa_14011028019:dg2_g10 */
- wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
-
- /* Wa_14010680813:dg2_g10 */
- wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL,
- CONTROL_BLOCK_CLKGATE_DIS |
- EGRESS_BLOCK_CLKGATE_DIS |
- TAG_BLOCK_CLKGATE_DIS);
- }
-
/* Wa_14014830051:dg2 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
- /* Wa_14015795083 */
- wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+ /*
+ * Wa_14015795083
+ * Skip verification for possibly locked register.
+ */
+ wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
+ 0, 0, false);
/* Wa_18018781329 */
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
@@ -1747,8 +1646,8 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_22016670082 */
wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_14014830051 */
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
@@ -1791,10 +1690,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
*/
static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
{
- if (IS_METEORLAKE(gt->i915)) {
- if (gt->type != GT_MEDIA)
- wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
-
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) {
+ wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
}
@@ -1818,15 +1715,15 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
gt_tuning_settings(gt, wal);
if (gt->type == GT_MEDIA) {
- if (MEDIA_VER(i915) >= 13)
+ if (MEDIA_VER_FULL(i915) == IP_VER(13, 0))
xelpmp_gt_workarounds_init(gt, wal);
else
- MISSING_CASE(MEDIA_VER(i915));
+ MISSING_CASE(MEDIA_VER_FULL(i915));
return;
}
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
xelpg_gt_workarounds_init(gt, wal);
else if (IS_PONTEVECCHIO(i915))
pvc_gt_workarounds_init(gt, wal);
@@ -1884,10 +1781,10 @@ wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
const char *name, const char *from)
{
if ((cur ^ wa->set) & wa->read) {
- drm_err(&gt->i915->drm,
- "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
- name, from, i915_mmio_reg_offset(wa->reg),
- cur, cur & wa->read, wa->set & wa->read);
+ gt_err(gt,
+ "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
+ name, from, i915_mmio_reg_offset(wa->reg),
+ cur, cur & wa->read, wa->set & wa->read);
return false;
}
@@ -2242,29 +2139,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine)
switch (engine->class) {
case RENDER_CLASS:
- /*
- * Wa_1507100340:dg2_g10
- *
- * This covers 4 registers which are next to one another :
- * - PS_INVOCATION_COUNT
- * - PS_INVOCATION_COUNT_UDW
- * - PS_DEPTH_COUNT
- * - PS_DEPTH_COUNT_UDW
- */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
- whitelist_reg_ext(w, PS_INVOCATION_COUNT,
- RING_FORCE_TO_NONPRIV_ACCESS_RD |
- RING_FORCE_TO_NONPRIV_RANGE_4);
-
/* Required by recommended tuning setting (not a workaround) */
whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
break;
- case COMPUTE_CLASS:
- /* Wa_16011157294:dg2_g10 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
- whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
- break;
default:
break;
}
@@ -2294,7 +2172,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine)
blacklist_trtt(engine);
}
-static void mtl_whitelist_build(struct intel_engine_cs *engine)
+static void xelpg_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
@@ -2316,8 +2194,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
wa_init_start(w, engine->gt, "whitelist", engine->name);
- if (IS_METEORLAKE(i915))
- mtl_whitelist_build(engine);
+ if (engine->gt->type == GT_MEDIA)
+ ; /* none yet */
+ else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
+ xelpg_whitelist_build(engine);
else if (IS_PONTEVECCHIO(i915))
pvc_whitelist_build(engine);
else if (IS_DG2(i915))
@@ -2415,62 +2295,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
}
}
-static bool needs_wa_1308578152(struct intel_engine_cs *engine)
-{
- return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >=
- GEN_DSS_PER_GSLICE;
-}
-
static void
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
/* Wa_22014600077 */
wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
ENABLE_EU_COUNT_FOR_TDL_FLUSH);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_1509727124 */
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
SC_DISABLE_POWER_OPTIMIZATION_EBB);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915) ||
- IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_22012856258 */
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
GEN12_DISABLE_READ_SUPPRESSION);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14013392000:dg2_g11 */
- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
- /* Wa_14012419201:dg2 */
- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
- }
-
- /* Wa_1308578152:dg2_g10 when first gslice is fused off */
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) &&
- needs_wa_1308578152(engine)) {
- wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
- GEN12_REPLAY_MODE_GRANULARITY);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_DG2(i915)) {
/*
* Wa_22010960976:dg2
* Wa_14013347512:dg2
@@ -2479,34 +2332,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
- /*
- * Wa_1608949956:dg2_g10
- * Wa_14010198302:dg2_g10
- */
- wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
- MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) ||
+ IS_DG2(i915)) {
+ /* Wa_14015150844 */
+ wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0,
+ _MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES),
+ 0, true);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
- /* Wa_22010430635:dg2 */
- wa_mcr_masked_en(wal,
- GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_GRF_CLEAR);
-
- /* Wa_14013202645:dg2 */
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
- wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
-
- /* Wa_22012532006:dg2 */
- if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
- wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
-
- if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G10(i915)) {
+ if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
/* Wa_22014600077:dg2 */
wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
_MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
@@ -2514,6 +2348,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
true);
}
+ if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
+ IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
+ /*
+ * Wa_1606700617:tgl,dg1,adl-p
+ * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
+ * Wa_14010826681:tgl,dg1,rkl,adl-p
+ * Wa_18019627453:dg2
+ */
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
+ }
+
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
@@ -2527,19 +2374,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
*/
wa_write_or(wal, GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
- }
- if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) ||
- IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
- /*
- * Wa_1606700617:tgl,dg1,adl-p
- * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
- * Wa_14010826681:tgl,dg1,rkl,adl-p
- * Wa_18019627453:dg2
- */
- wa_masked_en(wal,
- GEN9_CS_DEBUG_MODE1,
- FF_DOP_CLOCK_GATE_DISABLE);
+ /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
+ wa_mcr_masked_en(wal,
+ GEN10_SAMPLER_MODE,
+ ENABLE_SMALLPL);
}
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
@@ -2566,14 +2405,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
}
- if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
- IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
- /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
- wa_mcr_masked_en(wal,
- GEN10_SAMPLER_MODE,
- ENABLE_SMALLPL);
- }
-
if (GRAPHICS_VER(i915) == 11) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
@@ -2975,10 +2806,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* function invoked by __intel_engine_init_ctx_wa().
*/
static void
-add_render_compute_tuning_settings(struct drm_i915_private *i915,
+add_render_compute_tuning_settings(struct intel_gt *gt,
struct i915_wa_list *wal)
{
- if (IS_METEORLAKE(i915) || IS_DG2(i915))
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
/*
@@ -3007,8 +2840,9 @@ static void
general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
- add_render_compute_tuning_settings(i915, wal);
+ add_render_compute_tuning_settings(gt, wal);
if (GRAPHICS_VER(i915) >= 11) {
/* This is not a Wa (although referred to as
@@ -3029,13 +2863,13 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
/* Wa_14017856879 */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
/*
* Wa_14017066071
* Wa_14017654203
@@ -3043,37 +2877,47 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
MTL_DISABLE_SAMPLER_SC_OOO);
- if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0))
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
/* Wa_22015279794 */
wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
DISABLE_PREFETCH_INTO_IC);
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
- IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
+ IS_DG2(i915)) {
/* Wa_22013037850 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
DISABLE_128B_EVICTION_COMMAND_UDW);
+
+ /* Wa_18017747507 */
+ wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
IS_PONTEVECCHIO(i915) ||
IS_DG2(i915)) {
/* Wa_22014226127 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
}
- if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) ||
- IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) ||
- IS_DG2(i915)) {
- /* Wa_18017747507 */
- wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
+ if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) {
+ /* Wa_14015227452:dg2,pvc */
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+
+ /* Wa_16015675438:dg2,pvc */
+ wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
- IS_DG2_G11(i915)) {
+ if (IS_DG2(i915)) {
+ /*
+ * Wa_16011620976:dg2_g11
+ * Wa_22015475538:dg2
+ */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+ }
+
+ if (IS_DG2_G11(i915)) {
/*
* Wa_22012826095:dg2
* Wa_22013059131:dg2
@@ -3085,18 +2929,23 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
/* Wa_22013059131:dg2 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
- }
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
/*
- * Wa_14010918519:dg2_g10
+ * Wa_22012654132
*
- * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
- * so ignoring verification.
+ * Note that register 0xE420 is write-only and cannot be read
+ * back for verification on DG2 (due to Wa_14012342262), so
+ * we need to explicitly skip the readback.
*/
- wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
- FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
- 0, false);
+ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+ _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+ 0 /* write-only, so skip validation */,
+ true);
+ }
+
+ if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) {
+ /* Wa_18028616096 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
}
if (IS_XEHPSDV(i915)) {
@@ -3114,35 +2963,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
}
-
- if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
- /* Wa_14015227452:dg2,pvc */
- wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
-
- /* Wa_16015675438:dg2,pvc */
- wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
- }
-
- if (IS_DG2(i915)) {
- /*
- * Wa_16011620976:dg2_g11
- * Wa_22015475538:dg2
- */
- wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
- }
-
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915))
- /*
- * Wa_22012654132
- *
- * Note that register 0xE420 is write-only and cannot be read
- * back for verification on DG2 (due to Wa_14012342262), so
- * we need to explicitly skip the readback.
- */
- wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
- _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
- 0 /* write-only, so skip validation */,
- true);
}
static void
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index 3def5ca72dec..1a34cbe04fb6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -710,7 +710,7 @@ static int threaded_migrate(struct intel_migrate *migrate,
thread[i].tsk = tsk;
}
- msleep(10); /* start all threads before we kthread_stop() */
+ msleep(10 * n_cpus); /* start all threads before we kthread_stop() */
for (i = 0; i < n_cpus; ++i) {
struct task_struct *tsk = thread[i].tsk;
diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c
index 7e41f69fc818..00b872b6380b 100644
--- a/drivers/gpu/drm/i915/gt/selftest_tlb.c
+++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c
@@ -136,8 +136,15 @@ pte_tlbinv(struct intel_context *ce,
i915_request_get(rq);
i915_request_add(rq);
- /* Short sleep to sanitycheck the batch is spinning before we begin */
- msleep(10);
+ /*
+ * Short sleep to sanitycheck the batch is spinning before we begin.
+ * FIXME: Why is GSC so slow?
+ */
+ if (ce->engine->class == OTHER_CLASS)
+ msleep(200);
+ else
+ msleep(10);
+
if (va == vb) {
if (!i915_request_completed(rq)) {
pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index f359bef046e0..33f253410d0c 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -138,6 +138,8 @@ enum intel_guc_action {
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507,
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
+ INTEL_GUC_ACTION_TLB_INVALIDATION = 0x7000,
+ INTEL_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001,
INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002,
INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
@@ -181,4 +183,35 @@ enum intel_guc_state_capture_event_status {
#define INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x000000FF
+#define INTEL_GUC_TLB_INVAL_TYPE_MASK REG_GENMASK(7, 0)
+#define INTEL_GUC_TLB_INVAL_MODE_MASK REG_GENMASK(11, 8)
+#define INTEL_GUC_TLB_INVAL_FLUSH_CACHE REG_BIT(31)
+
+enum intel_guc_tlb_invalidation_type {
+ INTEL_GUC_TLB_INVAL_ENGINES = 0x0,
+ INTEL_GUC_TLB_INVAL_GUC = 0x3,
+};
+
+/*
+ * 0: Heavy mode of Invalidation:
+ * The pipeline of the engine(s) for which the invalidation is targeted to is
+ * blocked, and all the in-flight transactions are guaranteed to be Globally
+ * Observed before completing the TLB invalidation
+ * 1: Lite mode of Invalidation:
+ * TLBs of the targeted engine(s) are immediately invalidated.
+ * In-flight transactions are NOT guaranteed to be Globally Observed before
+ * completing TLB invalidation.
+ * Light Invalidation Mode is to be used only when
+ * it can be guaranteed (by SW) that the address translations remain invariant
+ * for the in-flight transactions across the TLB invalidation. In other words,
+ * this mode can be used when the TLB invalidation is intended to clear out the
+ * stale cached translations that are no longer in use. Light Invalidation Mode
+ * is much faster than the Heavy Invalidation Mode, as it does not wait for the
+ * in-flight transactions to be GOd.
+ */
+enum intel_guc_tlb_inval_mode {
+ INTEL_GUC_TLB_INVAL_MODE_HEAVY = 0x0,
+ INTEL_GUC_TLB_INVAL_MODE_LITE = 0x1,
+};
+
#endif /* _ABI_GUC_ACTIONS_ABI_H */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
index 0d3b22a74365..453d855dd1de 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c
@@ -68,8 +68,7 @@ static void gsc_work(struct work_struct *work)
* A proxy failure right after firmware load means the proxy-init
* step has failed so mark GSC as not usable after this
*/
- drm_err(&gt->i915->drm,
- "GSC proxy handler failed to init\n");
+ gt_err(gt, "GSC proxy handler failed to init\n");
intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
}
goto out_put;
@@ -83,11 +82,10 @@ static void gsc_work(struct work_struct *work)
* status register to check if the proxy init was actually successful
*/
if (intel_gsc_uc_fw_proxy_init_done(gsc, false)) {
- drm_dbg(&gt->i915->drm, "GSC Proxy initialized\n");
+ gt_dbg(gt, "GSC Proxy initialized\n");
intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_RUNNING);
} else {
- drm_err(&gt->i915->drm,
- "GSC status reports proxy init not complete\n");
+ gt_err(gt, "GSC status reports proxy init not complete\n");
intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
index 89ed5ee9cded..2fde5c360cff 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c
@@ -81,8 +81,17 @@ out_rq:
i915_request_add(rq);
- if (!err && i915_request_wait(rq, 0, msecs_to_jiffies(500)) < 0)
- err = -ETIME;
+ if (!err) {
+ /*
+ * Start timeout for i915_request_wait only after considering one possible
+ * pending GSC-HECI submission cycle on the other (non-privileged) path.
+ */
+ if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS))
+ drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm,
+ "Delay in gsc-heci-priv submission to gsccs-hw");
+ if (i915_request_wait(rq, 0, msecs_to_jiffies(GSC_HECI_REPLY_LATENCY_MS)) < 0)
+ err = -ETIME;
+ }
i915_request_put(rq);
@@ -186,6 +195,13 @@ out_rq:
i915_request_add(rq);
if (!err) {
+ /*
+ * Start timeout for i915_request_wait only after considering one possible
+ * pending GSC-HECI submission cycle on the other (privileged) path.
+ */
+ if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS))
+ drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm,
+ "Delay in gsc-heci-non-priv submission to gsccs-hw");
if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
msecs_to_jiffies(timeout_ms)) < 0)
err = -ETIME;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h
index 09d3fbdad05a..c4308291c003 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h
@@ -12,6 +12,12 @@ struct i915_vma;
struct intel_context;
struct intel_gsc_uc;
+#define GSC_HECI_REPLY_LATENCY_MS 500
+/*
+ * Max FW response time is 500ms, but this should be counted from the time the
+ * command has hit the GSC-CS hardware, not the preceding handoff to GuC CTB.
+ */
+
struct intel_gsc_mtl_header {
u32 validity_marker;
#define GSC_HECI_VALIDITY_MARKER 0xA578875A
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 569b5fe94c41..3f3df1166b86 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -159,6 +159,21 @@ static void gen11_disable_guc_interrupts(struct intel_guc *guc)
gen11_reset_guc_interrupts(guc);
}
+static void guc_dead_worker_func(struct work_struct *w)
+{
+ struct intel_guc *guc = container_of(w, struct intel_guc, dead_guc_worker);
+ struct intel_gt *gt = guc_to_gt(guc);
+ unsigned long last = guc->last_dead_guc_jiffies;
+ unsigned long delta = jiffies_to_msecs(jiffies - last);
+
+ if (delta < 500) {
+ intel_gt_set_wedged(gt);
+ } else {
+ intel_gt_handle_error(gt, ALL_ENGINES, I915_ERROR_CAPTURE, "dead GuC");
+ guc->last_dead_guc_jiffies = jiffies;
+ }
+}
+
void intel_guc_init_early(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -171,6 +186,8 @@ void intel_guc_init_early(struct intel_guc *guc)
intel_guc_slpc_init_early(&guc->slpc);
intel_guc_rc_init_early(guc);
+ INIT_WORK(&guc->dead_guc_worker, guc_dead_worker_func);
+
mutex_init(&guc->send_mutex);
spin_lock_init(&guc->irq_lock);
if (GRAPHICS_VER(i915) >= 11) {
@@ -272,18 +289,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50))
flags |= GUC_WA_POLLCS;
- /* Wa_16011759253:dg2_g10:a0 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
- flags |= GUC_WA_GAM_CREDITS;
-
/* Wa_14014475959 */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(gt->i915))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
/*
- * Wa_14012197797:dg2_g10:a0,dg2_g11:a0
- * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12
+ * Wa_14012197797
+ * Wa_22011391025
*
* The same WA bit is used for both and 22011391025 is applicable to
* all DG2.
@@ -292,28 +305,26 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
flags |= GUC_WA_DUAL_QUEUE;
/* Wa_22011802037: graphics version 11/12 */
- if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(gt->i915) >= 11 &&
- GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70)))
+ if (intel_engine_reset_needs_wa_22011802037(gt))
flags |= GUC_WA_PRE_PARSER;
- /* Wa_16011777198:dg2 */
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
- flags |= GUC_WA_RCS_RESET_BEFORE_RC6;
-
/*
- * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..)
- * Wa_22012727685:dg2_g11[a0..)
+ * Wa_22012727170
+ * Wa_22012727685
*/
- if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER))
+ if (IS_DG2_G11(gt->i915))
flags |= GUC_WA_CONTEXT_ISOLATION;
/* Wa_16015675438 */
if (!RCS_MASK(gt))
flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
+ /* Wa_14018913170 */
+ if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0)) {
+ if (IS_DG2(gt->i915) || IS_METEORLAKE(gt->i915) || IS_PONTEVECCHIO(gt->i915))
+ flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
+ }
+
return flags;
}
@@ -461,6 +472,8 @@ void intel_guc_fini(struct intel_guc *guc)
if (!intel_uc_fw_is_loadable(&guc->fw))
return;
+ flush_work(&guc->dead_guc_worker);
+
if (intel_guc_slpc_is_used(guc))
intel_guc_slpc_fini(&guc->slpc);
@@ -585,6 +598,20 @@ out:
return ret;
}
+int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action)
+{
+ if (action == INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED)
+ guc_err(guc, "Crash dump notification\n");
+ else if (action == INTEL_GUC_ACTION_NOTIFY_EXCEPTION)
+ guc_err(guc, "Exception notification\n");
+ else
+ guc_err(guc, "Unknown crash notification: 0x%04X\n", action);
+
+ queue_work(system_unbound_wq, &guc->dead_guc_worker);
+
+ return 0;
+}
+
int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
const u32 *payload, u32 len)
{
@@ -601,6 +628,9 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
if (msg & INTEL_GUC_RECV_MSG_EXCEPTION)
guc_err(guc, "Received early exception notification!\n");
+ if (msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED | INTEL_GUC_RECV_MSG_EXCEPTION))
+ queue_work(system_unbound_wq, &guc->dead_guc_worker);
+
return 0;
}
@@ -640,6 +670,8 @@ int intel_guc_suspend(struct intel_guc *guc)
return 0;
if (intel_guc_submission_is_used(guc)) {
+ flush_work(&guc->dead_guc_worker);
+
/*
* This H2G MMIO command tears down the GuC in two steps. First it will
* generate a G2H CTB for every active context indicating a reset. In
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 8dc291ff0093..2b6dfe62c8f2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -79,6 +79,18 @@ struct intel_guc {
*/
atomic_t outstanding_submission_g2h;
+ /** @tlb_lookup: xarray to store all pending TLB invalidation requests */
+ struct xarray tlb_lookup;
+
+ /**
+ * @serial_slot: id to the initial waiter created in tlb_lookup,
+ * which is used only when failed to allocate new waiter.
+ */
+ u32 serial_slot;
+
+ /** @next_seqno: the next id (sequence number) to allocate. */
+ u32 next_seqno;
+
/** @interrupts: pointers to GuC interrupt-managing functions. */
struct {
bool enabled;
@@ -266,6 +278,20 @@ struct intel_guc {
unsigned long last_stat_jiffies;
} timestamp;
+ /**
+ * @dead_guc_worker: Asynchronous worker thread for forcing a GuC reset.
+ * Specifically used when the G2H handler wants to issue a reset. Resets
+ * require flushing the G2H queue. So, the G2H processing itself must not
+ * trigger a reset directly. Instead, go via this worker.
+ */
+ struct work_struct dead_guc_worker;
+ /**
+ * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrance
+ * used to prevent a fundamentally broken system from continuously
+ * reloading the GuC.
+ */
+ unsigned long last_dead_guc_jiffies;
+
#ifdef CONFIG_DRM_I915_SELFTEST
/**
* @number_guc_id_stolen: The number of guc_ids that have been stolen
@@ -274,6 +300,11 @@ struct intel_guc {
#endif
};
+struct intel_guc_tlb_wait {
+ struct wait_queue_head wq;
+ bool busy;
+};
+
/*
* GuC version number components are only 8-bit, so converting to a 32bit 8.8.8
* integer works.
@@ -281,6 +312,7 @@ struct intel_guc {
#define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat))
#define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch)
#define GUC_SUBMIT_VER(guc) MAKE_GUC_VER_STRUCT((guc)->submission_version)
+#define GUC_FIRMWARE_VER(guc) MAKE_GUC_VER_STRUCT((guc)->fw.file_selected.ver)
static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
{
@@ -476,6 +508,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
int intel_guc_error_capture_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len);
+int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action);
struct intel_engine_cs *
intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance);
@@ -499,4 +532,10 @@ void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p);
int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc);
+bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc);
+int intel_guc_invalidate_tlb_engines(struct intel_guc *guc);
+int intel_guc_invalidate_tlb_guc(struct intel_guc *guc);
+int intel_guc_tlb_invalidation_done(struct intel_guc *guc,
+ const u32 *payload, u32 len);
+void wake_up_all_tlb_invalidate(struct intel_guc *guc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index 331cec07c125..a4da0208c883 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -1101,8 +1101,8 @@ guc_capture_create_prealloc_nodes(struct intel_guc *guc)
static int
guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstate *buf)
{
- struct guc_state_capture_group_header_t ghdr = {0};
- struct guc_state_capture_header_t hdr = {0};
+ struct guc_state_capture_group_header_t ghdr = {};
+ struct guc_state_capture_header_t hdr = {};
struct __guc_capture_parsed_output *node = NULL;
struct guc_mmio_reg *regs = NULL;
int i, numlists, numregs, ret = 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 97eadd08181d..89e314b3756b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -96,13 +96,40 @@ struct ct_request {
struct ct_incoming_msg {
struct list_head link;
u32 size;
- u32 msg[];
+ u32 msg[] __counted_by(size);
};
enum { CTB_SEND = 0, CTB_RECV = 1 };
enum { CTB_OWNER_HOST = 0 };
+/*
+ * Some H2G commands involve a synchronous response that the driver needs
+ * to wait for. In such cases, a timeout is required to prevent the driver
+ * from waiting forever in the case of an error (either no error response
+ * is defined in the protocol or something has died and requires a reset).
+ * The specific command may be defined as having a time bound response but
+ * the CT is a queue and that time guarantee only starts from the point
+ * when the command reaches the head of the queue and is processed by GuC.
+ *
+ * Ideally there would be a helper to report the progress of a given
+ * command through the CT. However, that would require a significant
+ * amount of work in the CT layer. In the meantime, provide a reasonable
+ * estimation of the worst case latency it should take for the entire
+ * queue to drain. And therefore, how long a caller should wait before
+ * giving up on their request. The current estimate is based on empirical
+ * measurement of a test that fills the buffer with context creation and
+ * destruction requests as they seem to be the slowest operation.
+ */
+long intel_guc_ct_max_queue_time_jiffies(void)
+{
+ /*
+ * A 4KB buffer full of context destroy commands takes a little
+ * over a second to process so bump that to 2s to be super safe.
+ */
+ return (CTB_H2G_BUFFER_SIZE * HZ) / SZ_2K;
+}
+
static void ct_receive_tasklet_func(struct tasklet_struct *t);
static void ct_incoming_request_worker_func(struct work_struct *w);
@@ -1112,12 +1139,11 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r
ret = 0;
break;
case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED:
- CT_ERROR(ct, "Received GuC crash dump notification!\n");
- ret = 0;
- break;
case INTEL_GUC_ACTION_NOTIFY_EXCEPTION:
- CT_ERROR(ct, "Received GuC exception notification!\n");
- ret = 0;
+ ret = intel_guc_crash_process_msg(guc, action);
+ break;
+ case INTEL_GUC_ACTION_TLB_INVALIDATION_DONE:
+ ret = intel_guc_tlb_invalidation_done(guc, payload, len);
break;
default:
ret = -EOPNOTSUPP;
@@ -1190,9 +1216,17 @@ static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *requ
switch (action) {
case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE:
case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE:
+ case INTEL_GUC_ACTION_TLB_INVALIDATION_DONE:
g2h_release_space(ct, request->size);
}
+ /*
+ * TLB invalidation responses must be handled immediately as processing
+ * of other G2H notifications may be blocked by an invalidation request.
+ */
+ if (action == INTEL_GUC_ACTION_TLB_INVALIDATION_DONE)
+ return ct_process_request(ct, request);
+
spin_lock_irqsave(&ct->requests.lock, flags);
list_add_tail(&request->link, &ct->requests.incoming);
spin_unlock_irqrestore(&ct->requests.lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
index 58e42901ff49..2c4bb9a941be 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -104,6 +104,8 @@ struct intel_guc_ct {
#endif
};
+long intel_guc_ct_max_queue_time_jiffies(void);
+
void intel_guc_ct_init_early(struct intel_guc_ct *ct);
int intel_guc_ct_init(struct intel_guc_ct *ct);
void intel_guc_ct_fini(struct intel_guc_ct *ct);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index b4d56eccfb1f..8ae1846431da 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -22,6 +22,7 @@
/* Payload length only i.e. don't include G2H header length */
#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 2
#define G2H_LEN_DW_DEREGISTER_CONTEXT 1
+#define G2H_LEN_DW_INVALIDATE_TLB 1
#define GUC_CONTEXT_DISABLE 0
#define GUC_CONTEXT_ENABLE 1
@@ -100,6 +101,7 @@
#define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17)
#define GUC_WA_POLLCS BIT(18)
#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21)
+#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22)
#define GUC_CTL_FEATURE 2
#define GUC_CTL_ENABLE_SLPC BIT(2)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 477df260ae3a..2dfb07cc4b33 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -138,17 +138,6 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
return ret > 0 ? -EPROTO : ret;
}
-static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
-{
- u32 request[] = {
- GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
- SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
- id,
- };
-
- return intel_guc_send(guc, request, ARRAY_SIZE(request));
-}
-
static bool slpc_is_running(struct intel_guc_slpc *slpc)
{
return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING;
@@ -199,15 +188,6 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)
return ret;
}
-static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id)
-{
- struct intel_guc *guc = slpc_to_guc(slpc);
-
- GEM_BUG_ON(id >= SLPC_MAX_PARAM);
-
- return guc_action_slpc_unset_param(guc, id);
-}
-
static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
{
struct intel_guc *guc = slpc_to_guc(slpc);
@@ -672,49 +652,6 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
slpc->boost_freq = slpc->rp0_freq;
}
-/**
- * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode
- * @slpc: pointer to intel_guc_slpc.
- * @mode: new value of the mode.
- *
- * This function will override the GUCRC mode.
- *
- * Return: 0 on success, non-zero error code on failure.
- */
-int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode)
-{
- int ret;
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
- intel_wakeref_t wakeref;
-
- if (mode >= SLPC_GUCRC_MODE_MAX)
- return -EINVAL;
-
- with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
- if (ret)
- guc_err(slpc_to_guc(slpc), "Override RC mode %d failed: %pe\n",
- mode, ERR_PTR(ret));
- }
-
- return ret;
-}
-
-int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc)
-{
- struct drm_i915_private *i915 = slpc_to_i915(slpc);
- intel_wakeref_t wakeref;
- int ret = 0;
-
- with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
- ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE);
- if (ret)
- guc_err(slpc_to_guc(slpc), "Unsetting RC mode failed: %pe\n", ERR_PTR(ret));
- }
-
- return ret;
-}
-
/*
* intel_guc_slpc_enable() - Start SLPC
* @slpc: pointer to intel_guc_slpc.
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 597eb5413ddf..6ac6503c39d4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -44,8 +44,6 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val);
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
-int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc);
-int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode);
int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index dc7b40e06e38..d37698bd6b91 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -32,6 +32,7 @@
#include "i915_drv.h"
#include "i915_reg.h"
+#include "i915_irq.h"
#include "i915_trace.h"
/**
@@ -1690,9 +1691,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
* Wa_22011802037: In addition to stopping the cs, we need
* to wait for any pending mi force wakeups
*/
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
- (GRAPHICS_VER(engine->i915) >= 11 &&
- GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) {
+ if (intel_engine_reset_needs_wa_22011802037(engine->gt)) {
intel_engine_stop_cs(engine);
intel_engine_wait_for_pending_mi_fw(engine);
}
@@ -1798,6 +1797,20 @@ next_context:
intel_context_put(parent);
}
+void wake_up_all_tlb_invalidate(struct intel_guc *guc)
+{
+ struct intel_guc_tlb_wait *wait;
+ unsigned long i;
+
+ if (!intel_guc_tlb_invalidation_is_available(guc))
+ return;
+
+ xa_lock_irq(&guc->tlb_lookup);
+ xa_for_each(&guc->tlb_lookup, i, wait)
+ wake_up(&wait->wq);
+ xa_unlock_irq(&guc->tlb_lookup);
+}
+
void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
{
struct intel_context *ce;
@@ -1923,6 +1936,12 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
/* GuC is blown away, drop all references to contexts */
xa_destroy(&guc->context_lookup);
+
+ /*
+ * Wedged GT won't respond to any TLB invalidation request. Simply
+ * release all the blocked waiters.
+ */
+ wake_up_all_tlb_invalidate(guc);
}
void intel_guc_submission_reset_finish(struct intel_guc *guc)
@@ -1945,11 +1964,65 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
intel_guc_global_policies_update(guc);
enable_submission(guc);
intel_gt_unpark_heartbeats(guc_to_gt(guc));
+
+ /*
+ * The full GT reset will have cleared the TLB caches and flushed the
+ * G2H message queue; we can release all the blocked waiters.
+ */
+ wake_up_all_tlb_invalidate(guc);
}
static void destroyed_worker_func(struct work_struct *w);
static void reset_fail_worker_func(struct work_struct *w);
+bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc)
+{
+ return HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915) &&
+ intel_guc_is_ready(guc);
+}
+
+static int init_tlb_lookup(struct intel_guc *guc)
+{
+ struct intel_guc_tlb_wait *wait;
+ int err;
+
+ if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915))
+ return 0;
+
+ xa_init_flags(&guc->tlb_lookup, XA_FLAGS_ALLOC);
+
+ wait = kzalloc(sizeof(*wait), GFP_KERNEL);
+ if (!wait)
+ return -ENOMEM;
+
+ init_waitqueue_head(&wait->wq);
+
+ /* Preallocate a shared id for use under memory pressure. */
+ err = xa_alloc_cyclic_irq(&guc->tlb_lookup, &guc->serial_slot, wait,
+ xa_limit_32b, &guc->next_seqno, GFP_KERNEL);
+ if (err < 0) {
+ kfree(wait);
+ return err;
+ }
+
+ return 0;
+}
+
+static void fini_tlb_lookup(struct intel_guc *guc)
+{
+ struct intel_guc_tlb_wait *wait;
+
+ if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915))
+ return;
+
+ wait = xa_load(&guc->tlb_lookup, guc->serial_slot);
+ if (wait && wait->busy)
+ guc_err(guc, "Unexpected busy item in tlb_lookup on fini\n");
+ kfree(wait);
+
+ xa_destroy(&guc->tlb_lookup);
+}
+
/*
* Set up the memory resources to be shared with the GuC (via the GGTT)
* at firmware loading time.
@@ -1968,11 +2041,15 @@ int intel_guc_submission_init(struct intel_guc *guc)
return ret;
}
+ ret = init_tlb_lookup(guc);
+ if (ret)
+ goto destroy_pool;
+
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap) {
ret = -ENOMEM;
- goto destroy_pool;
+ goto destroy_tlb;
}
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1981,9 +2058,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
return 0;
+destroy_tlb:
+ fini_tlb_lookup(guc);
destroy_pool:
guc_lrc_desc_pool_destroy_v69(guc);
-
return ret;
}
@@ -1996,6 +2074,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
bitmap_free(guc->submission_state.guc_ids_bitmap);
+ fini_tlb_lookup(guc);
guc->submission_initialized = false;
}
@@ -4299,7 +4378,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
/* Wa_14014475959:dg2 */
if (engine->class == COMPUTE_CLASS)
- if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) ||
+ if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
IS_DG2(engine->i915))
engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT;
@@ -4626,6 +4705,154 @@ g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
return ce;
}
+static void wait_wake_outstanding_tlb_g2h(struct intel_guc *guc, u32 seqno)
+{
+ struct intel_guc_tlb_wait *wait;
+ unsigned long flags;
+
+ xa_lock_irqsave(&guc->tlb_lookup, flags);
+ wait = xa_load(&guc->tlb_lookup, seqno);
+
+ if (wait)
+ wake_up(&wait->wq);
+ else
+ guc_dbg(guc,
+ "Stale TLB invalidation response with seqno %d\n", seqno);
+
+ xa_unlock_irqrestore(&guc->tlb_lookup, flags);
+}
+
+int intel_guc_tlb_invalidation_done(struct intel_guc *guc,
+ const u32 *payload, u32 len)
+{
+ if (len < 1)
+ return -EPROTO;
+
+ wait_wake_outstanding_tlb_g2h(guc, payload[0]);
+ return 0;
+}
+
+static long must_wait_woken(struct wait_queue_entry *wq_entry, long timeout)
+{
+ /*
+ * This is equivalent to wait_woken() with the exception that
+ * we do not wake up early if the kthread task has been completed.
+ * As we are called from page reclaim in any task context,
+ * we may be invoked from stopped kthreads, but we *must*
+ * complete the wait from the HW.
+ */
+ do {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (wq_entry->flags & WQ_FLAG_WOKEN)
+ break;
+
+ timeout = schedule_timeout(timeout);
+ } while (timeout);
+
+ /* See wait_woken() and woken_wake_function() */
+ __set_current_state(TASK_RUNNING);
+ smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN);
+
+ return timeout;
+}
+
+static bool intel_gt_is_enabled(const struct intel_gt *gt)
+{
+ /* Check if GT is wedged or suspended */
+ if (intel_gt_is_wedged(gt) || !intel_irqs_enabled(gt->i915))
+ return false;
+ return true;
+}
+
+static int guc_send_invalidate_tlb(struct intel_guc *guc,
+ enum intel_guc_tlb_invalidation_type type)
+{
+ struct intel_guc_tlb_wait _wq, *wq = &_wq;
+ struct intel_gt *gt = guc_to_gt(guc);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int err;
+ u32 seqno;
+ u32 action[] = {
+ INTEL_GUC_ACTION_TLB_INVALIDATION,
+ 0,
+ REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_TYPE_MASK, type) |
+ REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_MODE_MASK,
+ INTEL_GUC_TLB_INVAL_MODE_HEAVY) |
+ INTEL_GUC_TLB_INVAL_FLUSH_CACHE,
+ };
+ u32 size = ARRAY_SIZE(action);
+
+ /*
+ * Early guard against GT enablement. TLB invalidation should not be
+ * attempted if the GT is disabled due to suspend/wedge.
+ */
+ if (!intel_gt_is_enabled(gt))
+ return -EINVAL;
+
+ init_waitqueue_head(&_wq.wq);
+
+ if (xa_alloc_cyclic_irq(&guc->tlb_lookup, &seqno, wq,
+ xa_limit_32b, &guc->next_seqno,
+ GFP_ATOMIC | __GFP_NOWARN) < 0) {
+ /* Under severe memory pressure? Serialise TLB allocations */
+ xa_lock_irq(&guc->tlb_lookup);
+ wq = xa_load(&guc->tlb_lookup, guc->serial_slot);
+ wait_event_lock_irq(wq->wq,
+ !READ_ONCE(wq->busy),
+ guc->tlb_lookup.xa_lock);
+ /*
+ * Update wq->busy under lock to ensure only one waiter can
+ * issue the TLB invalidation command using the serial slot at a
+ * time. The condition is set to true before releasing the lock
+ * so that other caller continue to wait until woken up again.
+ */
+ wq->busy = true;
+ xa_unlock_irq(&guc->tlb_lookup);
+
+ seqno = guc->serial_slot;
+ }
+
+ action[1] = seqno;
+
+ add_wait_queue(&wq->wq, &wait);
+
+ /* This is a critical reclaim path and thus we must loop here. */
+ err = intel_guc_send_busy_loop(guc, action, size, G2H_LEN_DW_INVALIDATE_TLB, true);
+ if (err)
+ goto out;
+
+ /*
+ * Late guard against GT enablement. It is not an error for the TLB
+ * invalidation to time out if the GT is disabled during the process
+ * due to suspend/wedge. In fact, the TLB invalidation is cancelled
+ * in this case.
+ */
+ if (!must_wait_woken(&wait, intel_guc_ct_max_queue_time_jiffies()) &&
+ intel_gt_is_enabled(gt)) {
+ guc_err(guc,
+ "TLB invalidation response timed out for seqno %u\n", seqno);
+ err = -ETIME;
+ }
+out:
+ remove_wait_queue(&wq->wq, &wait);
+ if (seqno != guc->serial_slot)
+ xa_erase_irq(&guc->tlb_lookup, seqno);
+
+ return err;
+}
+
+/* Send a H2G command to invalidate the TLBs at engine level and beyond. */
+int intel_guc_invalidate_tlb_engines(struct intel_guc *guc)
+{
+ return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_ENGINES);
+}
+
+/* Send a H2G command to invalidate the GuC's internal TLB. */
+int intel_guc_invalidate_tlb_guc(struct intel_guc *guc)
+{
+ return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_GUC);
+}
+
int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
const u32 *msg,
u32 len)
@@ -4804,19 +5031,19 @@ static void guc_context_replay(struct intel_context *ce)
static void guc_handle_context_reset(struct intel_guc *guc,
struct intel_context *ce)
{
+ bool capture = intel_context_is_schedulable(ce);
+
trace_intel_context_reset(ce);
- guc_dbg(guc, "Got context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n",
+ guc_dbg(guc, "%s context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n",
+ capture ? "Got" : "Ignoring",
ce->guc_id.id, ce->engine->name,
str_yes_no(intel_context_is_exiting(ce)),
str_yes_no(intel_context_is_banned(ce)));
- if (likely(intel_context_is_schedulable(ce))) {
+ if (capture) {
capture_error_state(guc, ce);
guc_context_replay(ce);
- } else {
- guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s",
- ce->guc_id.id, ce->engine->name);
}
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 98b103375b7a..27f6561dd731 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -688,6 +688,8 @@ void intel_uc_suspend(struct intel_uc *uc)
/* flush the GSC worker */
intel_gsc_uc_flush_work(&uc->gsc);
+ wake_up_all_tlb_invalidate(guc);
+
if (!intel_guc_is_ready(guc)) {
guc->interrupts.enabled = false;
return;
@@ -736,6 +738,11 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication)
intel_gsc_uc_resume(&uc->gsc);
+ if (intel_guc_tlb_invalidation_is_available(guc)) {
+ intel_guc_invalidate_tlb_engines(guc);
+ intel_guc_invalidate_tlb_guc(guc);
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 8be005de1d28..362639162ed6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -88,12 +88,12 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* security fixes, etc. to be enabled.
*/
#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_maj, guc_mmp) \
- fw_def(METEORLAKE, 0, guc_maj(mtl, 70, 6, 6)) \
- fw_def(DG2, 0, guc_maj(dg2, 70, 5, 1)) \
- fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 5, 1)) \
+ fw_def(METEORLAKE, 0, guc_maj(mtl, 70, 12, 1)) \
+ fw_def(DG2, 0, guc_maj(dg2, 70, 12, 1)) \
+ fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 12, 1)) \
fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 70, 1, 1)) \
fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 69, 0, 3)) \
- fw_def(ALDERLAKE_S, 0, guc_maj(tgl, 70, 5, 1)) \
+ fw_def(ALDERLAKE_S, 0, guc_maj(tgl, 70, 12, 1)) \
fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 70, 1, 1)) \
fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 69, 0, 3)) \
fw_def(DG1, 0, guc_maj(dg1, 70, 5, 1)) \
@@ -132,6 +132,17 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(SKYLAKE, 0, huc_mmp(skl, 2, 0, 0))
/*
+ * The GSC FW has multiple version (see intel_gsc_uc.h for details); since what
+ * we care about is the interface, we use the compatibility version in the
+ * binary names.
+ * Same as with the GuC, a major version bump indicate a
+ * backward-incompatible change, while a minor version bump indicates a
+ * backward-compatible one, so we use only the former in the file name.
+ */
+#define INTEL_GSC_FIRMWARE_DEFS(fw_def, gsc_def) \
+ fw_def(METEORLAKE, 0, gsc_def(mtl, 1, 0))
+
+/*
* Set of macros for producing a list of filenames from the above table.
*/
#define __MAKE_UC_FW_PATH_BLANK(prefix_, name_) \
@@ -166,6 +177,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
#define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
__MAKE_UC_FW_PATH_MMP(prefix_, "huc", major_, minor_, patch_)
+#define MAKE_GSC_FW_PATH(prefix_, major_, minor_) \
+ __MAKE_UC_FW_PATH_MAJOR(prefix_, "gsc", major_)
+
/*
* All blobs need to be declared via MODULE_FIRMWARE().
* This first expansion of the table macros is solely to provide
@@ -176,6 +190,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP)
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC)
+INTEL_GSC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GSC_FW_PATH)
/*
* The next expansion of the table macros (in __uc_fw_auto_select below) provides
@@ -225,6 +240,10 @@ struct __packed uc_fw_blob {
#define HUC_FW_BLOB_GSC(prefix_) \
UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_))
+#define GSC_FW_BLOB(prefix_, major_, minor_) \
+ UC_FW_BLOB_NEW(major_, minor_, 0, true, \
+ MAKE_GSC_FW_PATH(prefix_, major_, minor_))
+
struct __packed uc_fw_platform_requirement {
enum intel_platform p;
u8 rev; /* first platform rev using this FW */
@@ -251,9 +270,14 @@ static const struct uc_fw_platform_requirement blobs_huc[] = {
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
};
+static const struct uc_fw_platform_requirement blobs_gsc[] = {
+ INTEL_GSC_FIRMWARE_DEFS(MAKE_FW_LIST, GSC_FW_BLOB)
+};
+
static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
+ [INTEL_UC_FW_TYPE_GSC] = { blobs_gsc, ARRAY_SIZE(blobs_gsc) },
};
static void
@@ -267,14 +291,6 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
bool found;
/*
- * GSC FW support is still not fully in place, so we're not defining
- * the FW blob yet because we don't want the driver to attempt to load
- * it until we're ready for it.
- */
- if (uc_fw->type == INTEL_UC_FW_TYPE_GSC)
- return;
-
- /*
* The only difference between the ADL GuC FWs is the HWConfig support.
* ADL-N does not support HWConfig, so we should use the same binary as
* ADL-S, otherwise the GuC might attempt to fetch a config table that