mirror of
https://github.com/torvalds/linux.git
synced 2026-05-24 23:22:31 +02:00
drm/amdkfd: gfx12.1 cluster barrier context save workaround
Trap cluster barrier may not serialize with user cluster barrier under some circumstances. Add a check for pending user cluster barrier complete. Signed-off-by: Jay Cornwall <jay.cornwall@amd.com> Tested-by: Gang Ba <Gang.Ba@amd.com> Cc: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Reviewed-by: Lancelot Six <lancelot.six@amd.com> Cc: Vladimir Indic <vladimir.indic@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
ea89b305b6
commit
29b703d7ad
|
|
@ -3754,11 +3754,11 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
|
|||
0x84708a70, 0x8070ff70,
|
||||
0x00000200, 0x7e000280,
|
||||
0x7e020280, 0x7e040280,
|
||||
0xbefd0080, 0xbe804ec2,
|
||||
0xbf94fffe, 0xb8faf804,
|
||||
0x8b7a847a, 0x91788478,
|
||||
0x8c787a78, 0xd7610002,
|
||||
0xbefd0080, 0xd7610002,
|
||||
0x0000fa71, 0x807d817d,
|
||||
0xbe804ec2, 0xbf94fffe,
|
||||
0xb8faf804, 0x8b7a847a,
|
||||
0x91788478, 0x8c787a78,
|
||||
0xd7610002, 0x0000fa6c,
|
||||
0x807d817d, 0x917aff6d,
|
||||
0x80000000, 0xd7610002,
|
||||
|
|
@ -4587,7 +4587,7 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
|
|||
};
|
||||
|
||||
static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
|
||||
0xbfa00001, 0xbfa003aa,
|
||||
0xbfa00001, 0xbfa003b4,
|
||||
0xb0804009, 0xb8eef81a,
|
||||
0xbf880000, 0xb980081a,
|
||||
0x00000000, 0xb8f8f804,
|
||||
|
|
@ -4838,15 +4838,20 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
|
|||
0x84708a70, 0x8070ff70,
|
||||
0x00000200, 0x7e000280,
|
||||
0x7e020280, 0x7e040280,
|
||||
0xbefd0080, 0xb8faf802,
|
||||
0xbf0c8b7a, 0xbfa20003,
|
||||
0xbe804fc2, 0xbf94fffe,
|
||||
0xbfa10001, 0xbe804ec4,
|
||||
0xbf94fffc, 0xb8faf804,
|
||||
0x8b7aff7a, 0x0001000c,
|
||||
0x9178ff78, 0x0001000c,
|
||||
0x8c787a78, 0xd7610002,
|
||||
0xbefd0080, 0xd7610002,
|
||||
0x0000fa71, 0x807d817d,
|
||||
0xb8faf802, 0xbf0c8b7a,
|
||||
0xbfa20003, 0xbe804fc2,
|
||||
0xbf94fffe, 0xbfa10001,
|
||||
0xbe804ec4, 0xbf94fffc,
|
||||
0xbefa4c88, 0xbfc70000,
|
||||
0xbf0c807a, 0xbfa20006,
|
||||
0x9371ff7a, 0x00070004,
|
||||
0x937aff7a, 0x00070010,
|
||||
0xbf06717a, 0xbfa2fff6,
|
||||
0xb8faf804, 0x8b7aff7a,
|
||||
0x0001000c, 0x9178ff78,
|
||||
0x0001000c, 0x8c787a78,
|
||||
0xd7610002, 0x0000fa6c,
|
||||
0x807d817d, 0x917aff6d,
|
||||
0x80000000, 0xd7610002,
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@
|
|||
#define HAVE_BANKED_VGPRS (ASIC_FAMILY == CHIP_GC_12_0_3)
|
||||
#define NUM_NAMED_BARRIERS (ASIC_FAMILY == CHIP_GC_12_0_3 ? 0x10 : 0)
|
||||
#define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3)
|
||||
#define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3)
|
||||
|
||||
#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
|
||||
#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
|
||||
|
|
@ -104,6 +105,7 @@ var SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT = 0
|
|||
var SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE = 2
|
||||
|
||||
var BARRIER_STATE_SIGNAL_OFFSET = 16
|
||||
var BARRIER_STATE_SIGNAL_SIZE = 7
|
||||
var BARRIER_STATE_MEMBER_OFFSET = 4
|
||||
var BARRIER_STATE_MEMBER_SIZE = 7
|
||||
var BARRIER_STATE_VALID_OFFSET = 0
|
||||
|
|
@ -519,9 +521,11 @@ L_SAVE_HWREG:
|
|||
v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
|
||||
s_mov_b32 m0, 0x0 //Next lane of v2 to write to
|
||||
|
||||
write_hwreg_to_v2(s_save_m0)
|
||||
|
||||
// Ensure no further changes to barrier or LDS state.
|
||||
// STATE_PRIV.*BARRIER_COMPLETE may change up to this point.
|
||||
wait_trap_barriers(s_save_tmp)
|
||||
wait_trap_barriers(s_save_tmp, s_save_m0, 1)
|
||||
|
||||
// Re-read final state of *BARRIER_COMPLETE fields for save.
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV)
|
||||
|
|
@ -529,7 +533,6 @@ L_SAVE_HWREG:
|
|||
s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALL_BARRIER_COMPLETE_MASK
|
||||
s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp
|
||||
|
||||
write_hwreg_to_v2(s_save_m0)
|
||||
write_hwreg_to_v2(s_save_pc_lo)
|
||||
s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
|
||||
write_hwreg_to_v2(s_save_tmp)
|
||||
|
|
@ -1197,7 +1200,7 @@ L_SKIP_CLUSTER_BARRIER_RESTORE:
|
|||
|
||||
// Make barrier and LDS state visible to all waves in the group/cluster.
|
||||
// STATE_PRIV.*BARRIER_COMPLETE may change after this point.
|
||||
wait_trap_barriers(s_restore_tmp)
|
||||
wait_trap_barriers(s_restore_tmp, 0, 0)
|
||||
|
||||
#if HAVE_CLUSTER_BARRIER
|
||||
// SCC is changed by wait_trap_barriers, restore it separately.
|
||||
|
|
@ -1210,7 +1213,7 @@ L_SKIP_CLUSTER_BARRIER_RESTORE:
|
|||
L_END_PGM:
|
||||
// Make sure that no wave of the group/cluster can exit the trap handler
|
||||
// before the group/cluster barrier state is saved.
|
||||
wait_trap_barriers(s_restore_tmp)
|
||||
wait_trap_barriers(s_restore_tmp, 0, 0)
|
||||
|
||||
s_endpgm_saved
|
||||
end
|
||||
|
|
@ -1300,11 +1303,11 @@ function restore_xnack_state_priv(s_tmp)
|
|||
end
|
||||
#endif
|
||||
|
||||
function wait_trap_barriers(s_tmp)
|
||||
function wait_trap_barriers(s_tmp1, s_tmp2, serialize_wa)
|
||||
#if HAVE_CLUSTER_BARRIER
|
||||
// If not in a WG then wave cannot use s_barrier_signal_isfirst.
|
||||
s_getreg_b32 s_tmp, hwreg(HW_REG_WAVE_STATUS)
|
||||
s_bitcmp0_b32 s_tmp, SQ_WAVE_STATUS_IN_WG_SHIFT
|
||||
s_getreg_b32 s_tmp1, hwreg(HW_REG_WAVE_STATUS)
|
||||
s_bitcmp0_b32 s_tmp1, SQ_WAVE_STATUS_IN_WG_SHIFT
|
||||
s_cbranch_scc1 L_TRAP_CLUSTER_BARRIER_SIGNAL
|
||||
|
||||
s_barrier_signal_isfirst -2
|
||||
|
|
@ -1318,6 +1321,25 @@ L_TRAP_CLUSTER_BARRIER_SIGNAL:
|
|||
|
||||
L_SKIP_TRAP_CLUSTER_BARRIER_SIGNAL:
|
||||
s_barrier_wait -4
|
||||
|
||||
#if CLUSTER_BARRIER_SERIALIZE_WORKAROUND
|
||||
if serialize_wa
|
||||
// Trap cluster barrier may complete with a user cluster barrier in-flight.
|
||||
// This is indicated if user cluster member count and signal count are equal.
|
||||
L_WAIT_USER_CLUSTER_BARRIER_COMPLETE:
|
||||
s_sendmsg_rtn_b32 s_tmp1, sendmsg(MSG_RTN_GET_CLUSTER_BARRIER_STATE)
|
||||
s_wait_kmcnt 0
|
||||
s_bitcmp0_b32 s_tmp1, BARRIER_STATE_VALID_OFFSET
|
||||
s_cbranch_scc1 L_NOT_IN_CLUSTER
|
||||
|
||||
s_bfe_u32 s_tmp2, s_tmp1, (BARRIER_STATE_MEMBER_OFFSET | (BARRIER_STATE_MEMBER_SIZE << 0x10))
|
||||
s_bfe_u32 s_tmp1, s_tmp1, (BARRIER_STATE_SIGNAL_OFFSET | (BARRIER_STATE_SIGNAL_SIZE << 0x10))
|
||||
s_cmp_eq_u32 s_tmp1, s_tmp2
|
||||
s_cbranch_scc1 L_WAIT_USER_CLUSTER_BARRIER_COMPLETE
|
||||
end
|
||||
L_NOT_IN_CLUSTER:
|
||||
#endif
|
||||
|
||||
#else
|
||||
s_barrier_signal -2
|
||||
s_barrier_wait -2
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user