drm/amdkfd: gfx12.1 trap handler support for expert scheduling mode

- Leave DEP_MODE unchanged as it is ignored in the trap handler
- Save/restore SCHED_MODE (gfx12.0 saves in ttmp11)

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Lancelot Six <lancelot.six@amd.com>
Cc: Vladimir Indic <vladimir.indic@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Jay Cornwall 2026-01-16 12:48:45 -06:00 committed by Alex Deucher
parent 29b703d7ad
commit bbcad5a889
2 changed files with 220 additions and 191 deletions

View File

@ -4587,18 +4587,14 @@ static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
};
static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0xbfa00001, 0xbfa003b4,
0xb0804009, 0xb8eef81a,
0xbf880000, 0xb980081a,
0x00000000, 0xb8f8f804,
0x9177ff77, 0x0c000000,
0x846e9a6e, 0x8c776e77,
0xbfa00001, 0xbfa003ac,
0xb0804009, 0xb8f8f804,
0x9178ff78, 0x00008c00,
0xb8fbf811, 0x8b6eff78,
0x00004000, 0xbfa10008,
0x8b6eff7b, 0x00000080,
0xbfa20018, 0x8b6ea07b,
0xbfa200d4, 0xbf830010,
0xbfa200d1, 0xbf830010,
0xb8fbf811, 0xbfa0fffb,
0x8b6eff7b, 0x00000bd0,
0xbfa20010, 0xb8eef812,
@ -4609,7 +4605,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0xf0000000, 0xbfa20005,
0x8b6fff6f, 0x00000200,
0xbfa20002, 0x8b6ea07b,
0xbfa200be, 0x9177ff77,
0xbfa200bb, 0x9177ff77,
0x007fc000, 0xb8fa04a1,
0x847a967a, 0x8c777a77,
0xb8fa0421, 0x847a957a,
@ -4702,189 +4698,189 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0xb97a0421, 0x857a8e77,
0xb97a3021, 0x8bfe7e7e,
0x8bea6a6a, 0x85788978,
0x936eff77, 0x0002001a,
0xb96ef81a, 0xb9783244,
0xbe804a6c, 0xb8faf802,
0xbf0d987a, 0xbfa10001,
0xbfb00000, 0x8b6dff6d,
0x01ffffff, 0xbefa0080,
0xb97a0151, 0x9177ff77,
0x007fc000, 0xb8fa04a1,
0x847a967a, 0x8c777a77,
0xb8fa0421, 0x847a957a,
0x8c777a77, 0xb8fa3021,
0x847a8e7a, 0x8c777a77,
0xb980f821, 0x00000000,
0xbf0d847b, 0xbfa20078,
0xf4003eb6, 0xf8000000,
0xbfc70000, 0xf4003bb6,
0xf8000008, 0x8b76ff7a,
0x80000000, 0xbfa20027,
0x9376ff7a, 0x00060019,
0x81f9a376, 0xbf0b8179,
0xbfa20068, 0x81f9ac76,
0xbf0b8179, 0xbfa20062,
0x81f9b776, 0xbf0b8179,
0xbfa2005f, 0x8b76ff7a,
0x000001ff, 0xbf06ff76,
0x000000fe, 0xbfa2005d,
0xbf06ff76, 0x000000ff,
0xbfa20057, 0xbf06ff76,
0x000000fa, 0xbfa20054,
0x81f9ff76, 0x000000e9,
0xbf0b8179, 0xbfa20050,
0x8b76ff7b, 0xffff0000,
0xbf06ff76, 0xbf860000,
0xbfa10051, 0x9376ff7b,
0x0002000e, 0x8b79ff7b,
0x00003f00, 0x85798679,
0x8c767976, 0xb9763b01,
0xbfa00049, 0x8b76ff7a,
0xfc000000, 0xbf06ff76,
0xd4000000, 0xbfa20013,
0xbf06ff76, 0xc8000000,
0xbfa20027, 0x8b76ff7a,
0xff000000, 0xbf06ff76,
0xcf000000, 0xbfa20039,
0x8b79ff7a, 0xffff0000,
0xbf06ff79, 0xcc350000,
0xbfa20037, 0xbf06ff79,
0xcc3a0000, 0xbfa20034,
0xbf06ff76, 0xcc000000,
0xbfa10031, 0x8b76ff7b,
0x000001ff, 0xbf06ff76,
0x000000ff, 0xbfa20029,
0xbf06ff76, 0x000000fa,
0xbfa20026, 0x81f6ff76,
0x000000e9, 0xbf0b8176,
0xbfa20022, 0x8b76ff7b,
0x0003fe00, 0xbf06ff76,
0x0001fe00, 0xbfa2001d,
0x8b76ff7b, 0x07fc0000,
0xbf06ff76, 0x03fc0000,
0xbfa20018, 0xbfa00014,
0x9376ff7a, 0x00040016,
0x81f68176, 0xbf0b8176,
0xbfa20012, 0x9376ff7a,
0x00050011, 0x81f68176,
0xbf0b8176, 0xbfa2000d,
0xb9783244, 0xbe804a6c,
0xb8faf802, 0xbf0d987a,
0xbfa10001, 0xbfb00000,
0x8b6dff6d, 0x01ffffff,
0xbefa0080, 0xb97a0151,
0x9177ff77, 0x007fc000,
0xb8fa04a1, 0x847a967a,
0x8c777a77, 0xb8fa0421,
0x847a957a, 0x8c777a77,
0xb8fa3021, 0x847a8e7a,
0x8c777a77, 0xb980f821,
0x00000000, 0xbf0d847b,
0xbfa20078, 0xf4003eb6,
0xf8000000, 0xbfc70000,
0xf4003bb6, 0xf8000008,
0x8b76ff7a, 0x80000000,
0xbfa20027, 0x9376ff7a,
0x00060019, 0x81f9a376,
0xbf0b8179, 0xbfa20068,
0x81f9ac76, 0xbf0b8179,
0xbfa20062, 0x81f9b776,
0xbf0b8179, 0xbfa2005f,
0x8b76ff7a, 0x000001ff,
0xbf06ff76, 0x000000fe,
0xbfa2005d, 0xbf06ff76,
0x000000ff, 0xbfa20057,
0xbf06ff76, 0x000000fa,
0xbfa20054, 0x81f9ff76,
0x000000e9, 0xbf0b8179,
0xbfa20050, 0x8b76ff7b,
0xffff0000, 0xbf06ff76,
0xbf860000, 0xbfa10051,
0x9376ff7b, 0x0002000e,
0x8b79ff7b, 0x00003f00,
0x85798679, 0x8c767976,
0xb9763b01, 0xbfa00049,
0x8b76ff7a, 0xfc000000,
0xbf06ff76, 0xd4000000,
0xbfa20013, 0xbf06ff76,
0xc8000000, 0xbfa20027,
0x8b76ff7a, 0xff000000,
0xbf06ff76, 0xcf000000,
0xbfa20039, 0x8b79ff7a,
0xffff0000, 0xbf06ff79,
0xcc350000, 0xbfa20037,
0xbf06ff79, 0xcc3a0000,
0xbfa20034, 0xbf06ff76,
0xcc000000, 0xbfa10031,
0x8b76ff7b, 0x000001ff,
0xbf06ff76, 0x000000ff,
0xbfa20008, 0x8b76ff7b,
0xbfa20029, 0xbf06ff76,
0x000000fa, 0xbfa20026,
0x81f6ff76, 0x000000e9,
0xbf0b8176, 0xbfa20022,
0x8b76ff7b, 0x0003fe00,
0xbf06ff76, 0x0001fe00,
0xbfa2001d, 0x8b76ff7b,
0x07fc0000, 0xbf06ff76,
0x03fc0000, 0xbfa20018,
0xbfa00014, 0x9376ff7a,
0x00040016, 0x81f68176,
0xbf0b8176, 0xbfa20012,
0x9376ff7a, 0x00050011,
0x81f68176, 0xbf0b8176,
0xbfa2000d, 0x8b76ff7a,
0x000001ff, 0xbf06ff76,
0x000000ff, 0xbfa20003,
0xbfc70000, 0xbefb006e,
0xbfa0ffad, 0xbfc70000,
0xbefb006f, 0xbfa0ffaa,
0xbfc70000, 0xbeee007e,
0xbeef007f, 0xbefe0180,
0xbefe4d84, 0xbf8a0000,
0x8b7aff7f, 0x04000000,
0x847a857a, 0x8c6d7a6d,
0xb8eff822, 0xb980f822,
0x00000000, 0xb8fa2b01,
0x847a997a, 0x8c6d7a6d,
0xbefa0080, 0xb97a2b01,
0xbefa007e, 0x8b7bff7f,
0x01ffffff, 0xbefe00c1,
0xbeff00c1, 0xee0a407a,
0x000c0000, 0x00000000,
0x7e000280, 0xbefe007a,
0xbeff007b, 0xb8fb0742,
0x847b997b, 0xb8fa3b05,
0x807a817a, 0xbf0d997b,
0xbfa20002, 0x847a897a,
0xbfa00001, 0x847a8a7a,
0x000000ff, 0xbfa20008,
0x8b76ff7b, 0x000001ff,
0xbf06ff76, 0x000000ff,
0xbfa20003, 0xbfc70000,
0xbefb006e, 0xbfa0ffad,
0xbfc70000, 0xbefb006f,
0xbfa0ffaa, 0xbfc70000,
0xbeee007e, 0xbeef007f,
0xbefe0180, 0xbefe4d84,
0xbf8a0000, 0x8b7aff7f,
0x04000000, 0x847a857a,
0x8c6d7a6d, 0xb8eff822,
0xb980f822, 0x00000000,
0xb8fa2b01, 0x847a997a,
0x8c6d7a6d, 0xbefa0080,
0xb97a2b01, 0xbefa007e,
0x8b7bff7f, 0x01ffffff,
0x807aff7a, 0x000001c0,
0x807a7e7a, 0x827b807b,
0xd7610000, 0x00010870,
0xd7610000, 0x00010a71,
0xd7610000, 0x00010c72,
0xd7610000, 0x00010e73,
0xd7610000, 0x00011074,
0xd7610000, 0x00011275,
0xd7610000, 0x00011476,
0xd7610000, 0x00011677,
0xd7610000, 0x00011a79,
0xd7610000, 0x00011c7e,
0xd7610000, 0x00011e7f,
0xbefe00ff, 0x00003fff,
0xbeff0080, 0xee0a407a,
0x000c0000, 0x00000000,
0xd760007a, 0x00011d00,
0xd760007b, 0x00011f00,
0xbefe00c1, 0xbeff00c1,
0xee0a407a, 0x000c0000,
0x00000000, 0x7e000280,
0xbefe007a, 0xbeff007b,
0xbef4007e, 0x8b75ff7f,
0x01ffffff, 0xbef1007d,
0xb8f30742, 0x84739973,
0xbefe00c1, 0x857d9973,
0x8b7d817d, 0xbf06817d,
0xbfa20002, 0xbeff0080,
0xbfa00002, 0xbeff00c1,
0xbfa0000a, 0xee0a4074,
0x008c0000, 0x00008000,
0xee0a4074, 0x010c0000,
0xb8fb0742, 0x847b997b,
0xb8fa3b05, 0x807a817a,
0xbf0d997b, 0xbfa20002,
0x847a897a, 0xbfa00001,
0x847a8a7a, 0x8b7bff7f,
0x01ffffff, 0x807aff7a,
0x000001c0, 0x807a7e7a,
0x827b807b, 0xd7610000,
0x00010870, 0xd7610000,
0x00010a71, 0xd7610000,
0x00010c72, 0xd7610000,
0x00010e73, 0xd7610000,
0x00011074, 0xd7610000,
0x00011275, 0xd7610000,
0x00011476, 0xd7610000,
0x00011677, 0xd7610000,
0x00011a79, 0xd7610000,
0x00011c7e, 0xd7610000,
0x00011e7f, 0xbefe00ff,
0x00003fff, 0xbeff0080,
0xee0a407a, 0x000c0000,
0x00000000, 0xd760007a,
0x00011d00, 0xd760007b,
0x00011f00, 0xbefe007a,
0xbeff007b, 0xbef4007e,
0x8b75ff7f, 0x01ffffff,
0xbef1007d, 0xb8f30742,
0x84739973, 0xbefe00c1,
0x857d9973, 0x8b7d817d,
0xbf06817d, 0xbfa20002,
0xbeff0080, 0xbfa00002,
0xbeff00c1, 0xbfa0000a,
0xee0a4074, 0x008c0000,
0x00008000, 0xee0a4074,
0x010c0000, 0x00010000,
0xee0a4074, 0x018c0000,
0x00018000, 0xbfa00009,
0xee0a4074, 0x008c0000,
0x00010000, 0xee0a4074,
0x018c0000, 0x00018000,
0xbfa00009, 0xee0a4074,
0x008c0000, 0x00010000,
0xee0a4074, 0x010c0000,
0x00020000, 0xee0a4074,
0x018c0000, 0x00030000,
0xb8f03b05, 0x80708170,
0xbf0d9973, 0xbfa20002,
0x84708970, 0xbfa00001,
0x84708a70, 0x8070ff70,
0x00000200, 0x7e000280,
0x7e020280, 0x7e040280,
0xbefd0080, 0xd7610002,
0x0000fa71, 0x807d817d,
0xb8faf802, 0xbf0c8b7a,
0xbfa20003, 0xbe804fc2,
0xbf94fffe, 0xbfa10001,
0xbe804ec4, 0xbf94fffc,
0xbefa4c88, 0xbfc70000,
0xbf0c807a, 0xbfa20006,
0x9371ff7a, 0x00070004,
0x937aff7a, 0x00070010,
0xbf06717a, 0xbfa2fff6,
0xb8faf804, 0x8b7aff7a,
0x0001000c, 0x9178ff78,
0x0001000c, 0x8c787a78,
0xd7610002, 0x0000fa6c,
0x807d817d, 0x917aff6d,
0x80000000, 0xd7610002,
0x0000fa7a, 0x807d817d,
0xd7610002, 0x0000fa6e,
0x807d817d, 0xbefa0080,
0xd7610002, 0x0000fa7a,
0x807d817d, 0xd7610002,
0x0000fa78, 0x807d817d,
0xb8faf811, 0xd7610002,
0x0000fa7a, 0x807d817d,
0xd7610002, 0x0000fa6f,
0x807d817d, 0xb8f1f801,
0x937aff6d, 0x00060019,
0x847a8c7a, 0x8c717a71,
0xd7610002, 0x0000fa71,
0x807d817d, 0xb8f1f814,
0xd7610002, 0x0000fa71,
0x807d817d, 0xb8f1f815,
0xd7610002, 0x0000fa71,
0x807d817d, 0xb8f1f812,
0xd7610002, 0x0000fa71,
0x807d817d, 0xb8f1f813,
0x010c0000, 0x00020000,
0xee0a4074, 0x018c0000,
0x00030000, 0xb8f03b05,
0x80708170, 0xbf0d9973,
0xbfa20002, 0x84708970,
0xbfa00001, 0x84708a70,
0x8070ff70, 0x00000200,
0x7e000280, 0x7e020280,
0x7e040280, 0xbefd0080,
0xd7610002, 0x0000fa71,
0x807d817d, 0xb8faf802,
0xbf0c8b7a, 0xbfa20003,
0xbe804fc2, 0xbf94fffe,
0xbfa10001, 0xbe804ec4,
0xbf94fffc, 0xbefa4c88,
0xbfc70000, 0xbf0c807a,
0xbfa20006, 0x9371ff7a,
0x00070004, 0x937aff7a,
0x00070010, 0xbf06717a,
0xbfa2fff6, 0xb8faf804,
0x8b7aff7a, 0x0001000c,
0x9178ff78, 0x0001000c,
0x8c787a78, 0xd7610002,
0x0000fa6c, 0x807d817d,
0x917aff6d, 0x80000000,
0xd7610002, 0x0000fa7a,
0x807d817d, 0xbefa50c1,
0x807d817d, 0xd7610002,
0x0000fa6e, 0x807d817d,
0xbefa0080, 0xd7610002,
0x0000fa7a, 0x807d817d,
0xd7610002, 0x0000fa78,
0x807d817d, 0xb8faf811,
0xd7610002, 0x0000fa7a,
0x807d817d, 0xd7610002,
0x0000fa6f, 0x807d817d,
0xb8f1f801, 0x937aff6d,
0x00060019, 0x847a8c7a,
0x8c717a71, 0xd7610002,
0x0000fa71, 0x807d817d,
0xb8f1f814, 0xd7610002,
0x0000fa71, 0x807d817d,
0xb8f1f815, 0xd7610002,
0x0000fa71, 0x807d817d,
0xb8f1f812, 0xd7610002,
0x0000fa71, 0x807d817d,
0xb8f1f813, 0xd7610002,
0x0000fa71, 0x807d817d,
0xb8faf802, 0xd7610002,
0x0000fa7a, 0x807d817d,
0xbefa50c1, 0xbfc70000,
0xd7610002, 0x0000fa7a,
0x807d817d, 0xbefa4c88,
0xbfc70000, 0xd7610002,
0x0000fa7a, 0x807d817d,
0xbefa4c88, 0xbfc70000,
0xd7610002, 0x0000fa7a,
0x807d817d, 0xbefe00ff,
0x0000ffff, 0xbeff0080,
0xb8faf81a, 0xd7610002,
0x0000fa7a, 0x807d817d,
0xbefe00c1, 0xbeff0080,
0x80767074, 0x82778075,
0xee0a4076, 0x010c0000,
0x00000000, 0xbefe00c1,
@ -5061,7 +5057,7 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0x018c0000, 0x00030000,
0x807d847d, 0x8070ff70,
0x00000400, 0xbf0a7b7d,
0xbfa2ffe9, 0xbfa00183,
0xbfa2ffe9, 0xbfa00184,
0xbef4007e, 0x8b75ff7f,
0x01ffffff, 0xbef1007f,
0xb8f20742, 0x84729972,
@ -5229,6 +5225,8 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0x856e906e, 0x8b6e6e6e,
0xbfa10003, 0xbe804ec3,
0x816ec16e, 0xbfa0fffb,
0xf4601bbb, 0xf8000040,
0xbfc70000, 0xb96ef81a,
0xbefd006f, 0xbefe0070,
0xbeff0071, 0xb979f822,
0xb97b2011, 0x857b867b,
@ -5248,19 +5246,17 @@ static const uint32_t cwsr_trap_gfx12_1_0_hex[] = {
0x856e8e77, 0xb96e3021,
0x8b6dff6d, 0x01ffffff,
0x8bfe7e7e, 0x8bea6a6a,
0x936eff77, 0x0002001a,
0xb96ef81a, 0xb97af804,
0xb97af804, 0xb8eef802,
0xbf0c8b6e, 0xbfa20003,
0xbe804fc2, 0xbf94fffe,
0xbfa10001, 0xbe804ec4,
0xbf94fffc, 0x857a897a,
0xb97a0244, 0xbe804a6c,
0xb8eef802, 0xbf0c8b6e,
0xbfa20003, 0xbe804fc2,
0xbf94fffe, 0xbfa10001,
0xbe804ec4, 0xbf94fffc,
0x857a897a, 0xb97a0244,
0xbe804a6c, 0xb8eef802,
0xbf0c8b6e, 0xbfa20003,
0xbe804fc2, 0xbf94fffe,
0xbfa10001, 0xbe804ec4,
0xbf94fffc, 0xbfb10000,
0xbfb10000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0x00000000,
};

View File

@ -36,6 +36,7 @@
#define NUM_NAMED_BARRIERS (ASIC_FAMILY == CHIP_GC_12_0_3 ? 0x10 : 0)
#define HAVE_CLUSTER_BARRIER (ASIC_FAMILY == CHIP_GC_12_0_3)
#define CLUSTER_BARRIER_SERIALIZE_WORKAROUND (ASIC_FAMILY == CHIP_GC_12_0_3)
#define RELAXED_SCHEDULING_IN_TRAP (ASIC_FAMILY == CHIP_GFX12)
#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
@ -110,6 +111,12 @@ var BARRIER_STATE_MEMBER_OFFSET = 4
var BARRIER_STATE_MEMBER_SIZE = 7
var BARRIER_STATE_VALID_OFFSET = 0
#if RELAXED_SCHEDULING_IN_TRAP
var TTMP11_SCHED_MODE_SHIFT = 26
var TTMP11_SCHED_MODE_SIZE = 2
var TTMP11_SCHED_MODE_MASK = 0xC000000
#endif
var NAMED_BARRIERS_SR_OFFSET_FROM_HWREG = 0x80
var S_BARRIER_INIT_MEMBERCNT_MASK = 0x7F0000
var S_BARRIER_INIT_MEMBERCNT_SHIFT = 0x10
@ -222,18 +229,22 @@ L_JUMP_TO_RESTORE:
s_branch L_RESTORE
L_SKIP_RESTORE:
#if RELAXED_SCHEDULING_IN_TRAP
// Assume most relaxed scheduling mode is set. Save and revert to normal mode.
s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_SCHED_MODE)
s_wait_alu 0
s_setreg_imm32_b32 hwreg(HW_REG_WAVE_SCHED_MODE, \
SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT, SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE), 0
#endif
s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC
#if RELAXED_SCHEDULING_IN_TRAP
// Save SCHED_MODE[1:0] into ttmp11[27:26].
s_andn2_b32 ttmp11, ttmp11, TTMP11_SCHED_MODE_MASK
s_lshl_b32 ttmp2, ttmp2, TTMP11_SCHED_MODE_SHIFT
s_or_b32 ttmp11, ttmp11, ttmp2
#endif
// Clear SPI_PRIO: do not save with elevated priority.
// Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
@ -315,7 +326,7 @@ L_FETCH_2ND_TRAP:
s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
s_or_b32 ttmp15, ttmp15, ~ADDRESS_HI32_MASK
L_NO_SIGN_EXTEND_TMA:
#if ASIC_FAMILY == CHIP_GFX12
#if RELAXED_SCHEDULING_IN_TRAP
// Move SCHED_MODE[1:0] from ttmp11 to unused bits in ttmp1[27:26] (return PC_HI).
// The second-level trap will restore from ttmp1 for backwards compatibility.
s_and_b32 ttmp2, ttmp11, TTMP11_SCHED_MODE_MASK
@ -381,8 +392,10 @@ L_EXIT_TRAP:
// Only restore fields which the trap handler changes.
s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT
#if RELAXED_SCHEDULING_IN_TRAP
// Assume relaxed scheduling mode after this point.
restore_sched_mode(ttmp2)
#endif
s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
@ -590,8 +603,18 @@ L_SAVE_HWREG:
write_hwreg_to_v2(s_save_tmp)
#endif
#if ASIC_FAMILY >= CHIP_GC_12_0_3
s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_SCHED_MODE)
write_hwreg_to_v2(s_save_tmp)
#endif
#if ! SAVE_TTMPS_IN_SGPR_BLOCK
// Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
s_mov_b32 exec_lo, 0xFFFF
#else
// All 128 bytes are available for HWREGs.
s_mov_b32 exec_lo, 0xFFFFFFFF
#endif
s_mov_b32 exec_hi, 0x0
s_add_u32 s_save_addr_lo, s_save_base_addr_lo, s_save_mem_offset
s_addc_u32 s_save_addr_hi, s_save_base_addr_hi, 0x0
@ -1154,6 +1177,12 @@ L_SKIP_TRAP_CLUSTER_BARRIER_SIGNAL:
L_SKIP_CLUSTER_BARRIER_RESTORE:
#endif
#if ASIC_FAMILY >= CHIP_GC_12_0_3
s_load_b32 s_restore_tmp, [s_restore_addr_lo, s_restore_addr_hi], null scope:SCOPE_SYS offset:0x40
s_wait_kmcnt 0
s_setreg_b32 hwreg(HW_REG_WAVE_SCHED_MODE), s_restore_tmp
#endif
s_mov_b32 m0, s_restore_m0
s_mov_b32 exec_lo, s_restore_exec_lo
s_mov_b32 exec_hi, s_restore_exec_hi
@ -1193,8 +1222,10 @@ L_SKIP_CLUSTER_BARRIER_RESTORE:
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
#if RELAXED_SCHEDULING_IN_TRAP
// Assume relaxed scheduling mode after this point.
restore_sched_mode(s_restore_tmp)
#endif
s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu
@ -1346,10 +1377,12 @@ L_NOT_IN_CLUSTER:
#endif
end
#if RELAXED_SCHEDULING_IN_TRAP
function restore_sched_mode(s_tmp)
s_bfe_u32 s_tmp, ttmp11, (TTMP11_SCHED_MODE_SHIFT | (TTMP11_SCHED_MODE_SIZE << 0x10))
s_setreg_b32 hwreg(HW_REG_WAVE_SCHED_MODE), s_tmp
end
#endif
function restore_barrier_signal_count(barrier_id)
// extract the saved signal count from s_restore_tmp