mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 02:24:24 +02:00
drm/amdkfd: save and restore barrier state for gfx12
Add support to save and restore the work group barrier state in gfx12 CWSR trap handler. There is no support to directly restore the signal count of a barrier state, so instead this patch repeatedly calls s_barrier_signal to increment the signal count to the desired value. In this patch, I have implemented the logic to restore the barrier at the end of the block restoring the HWREGs. This process needs to be done by exactly 1 wave per work group. To achieve this, the initial value of s_restore_spi_init_hi (containing a FIRST_WAVE bit) needs to be saved up until that point. An alternative could be restore the barrier earlier in the process (around when LDS is restored, as the same wave does both). Doing this would break the pattern that the restore procedure follows the CWSR area layout. Before restoring the barrier, this patch checks if the barrier was whose state was saved has the "valid" bit set, even if I don't think this barrier can be in an invalid state during context save. I expect this test to always be true. Signed-off-by: Lancelot SIX <lancelot.six@amd.com> Reviewed-by: Jay Cornwall <jay.cornwall@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
f281003336
commit
450abfe433
|
|
@ -3647,7 +3647,7 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
|
|||
};
|
||||
|
||||
static const uint32_t cwsr_trap_gfx12_hex[] = {
|
||||
0xbfa00001, 0xbfa0023b,
|
||||
0xbfa00001, 0xbfa00240,
|
||||
0xb0804009, 0xb8f8f804,
|
||||
0x9178ff78, 0x00008c00,
|
||||
0xb8fbf811, 0x8b6eff78,
|
||||
|
|
@ -3781,21 +3781,57 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
|
|||
0x0000fa71, 0x807d817d,
|
||||
0xb8faf802, 0xd7610002,
|
||||
0x0000fa7a, 0x807d817d,
|
||||
0xbefe00ff, 0x0000ffff,
|
||||
0xbeff0080, 0xc4068070,
|
||||
0xbefa50c1, 0xbfc70000,
|
||||
0xd7610002, 0x0000fa7a,
|
||||
0x807d817d, 0xbefe00ff,
|
||||
0x0000ffff, 0xbeff0080,
|
||||
0xc4068070, 0x008ce802,
|
||||
0x00000000, 0xbefe00c1,
|
||||
0xb8f03b05, 0x80708170,
|
||||
0xbf0d9973, 0xbfa20002,
|
||||
0x84708970, 0xbfa00001,
|
||||
0x84708a70, 0xb8fa1e06,
|
||||
0x847a8a7a, 0x80707a70,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbef90080, 0xbefd0080,
|
||||
0xbf800000, 0xbe804100,
|
||||
0xbe824102, 0xbe844104,
|
||||
0xbe864106, 0xbe884108,
|
||||
0xbe8a410a, 0xbe8c410c,
|
||||
0xbe8e410e, 0xd7610002,
|
||||
0x0000f200, 0x80798179,
|
||||
0xd7610002, 0x0000f201,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f202, 0x80798179,
|
||||
0xd7610002, 0x0000f203,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f204, 0x80798179,
|
||||
0xd7610002, 0x0000f205,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f206, 0x80798179,
|
||||
0xd7610002, 0x0000f207,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f208, 0x80798179,
|
||||
0xd7610002, 0x0000f209,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f20a, 0x80798179,
|
||||
0xd7610002, 0x0000f20b,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f20c, 0x80798179,
|
||||
0xd7610002, 0x0000f20d,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f20e, 0x80798179,
|
||||
0xd7610002, 0x0000f20f,
|
||||
0x80798179, 0xbf06a079,
|
||||
0xbfa10007, 0xc4068070,
|
||||
0x008ce802, 0x00000000,
|
||||
0xbefe00c1, 0xb8f03b05,
|
||||
0x80708170, 0xbf0d9973,
|
||||
0xbfa20002, 0x84708970,
|
||||
0xbfa00001, 0x84708a70,
|
||||
0xb8fa1e06, 0x847a8a7a,
|
||||
0x80707a70, 0xbef600ff,
|
||||
0x01000000, 0xbef90080,
|
||||
0xbefd0080, 0xbf800000,
|
||||
0x8070ff70, 0x00000080,
|
||||
0xbef90080, 0x7e040280,
|
||||
0x807d907d, 0xbf0aff7d,
|
||||
0x00000060, 0xbfa2ffbb,
|
||||
0xbe804100, 0xbe824102,
|
||||
0xbe844104, 0xbe864106,
|
||||
0xbe884108, 0xbe8a410a,
|
||||
0xbe8c410c, 0xbe8e410e,
|
||||
0xd7610002, 0x0000f200,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f201, 0x80798179,
|
||||
|
|
@ -3814,130 +3850,97 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
|
|||
0xd7610002, 0x0000f20a,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f20b, 0x80798179,
|
||||
0xd7610002, 0x0000f20c,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f20d, 0x80798179,
|
||||
0xd7610002, 0x0000f20e,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f20f, 0x80798179,
|
||||
0xbf06a079, 0xbfa10007,
|
||||
0xc4068070, 0x008ce802,
|
||||
0x00000000, 0x8070ff70,
|
||||
0x00000080, 0xbef90080,
|
||||
0x7e040280, 0x807d907d,
|
||||
0xbf0aff7d, 0x00000060,
|
||||
0xbfa2ffbb, 0xbe804100,
|
||||
0xbe824102, 0xbe844104,
|
||||
0xbe864106, 0xbe884108,
|
||||
0xbe8a410a, 0xd7610002,
|
||||
0x0000f200, 0x80798179,
|
||||
0xd7610002, 0x0000f201,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f202, 0x80798179,
|
||||
0xd7610002, 0x0000f203,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f204, 0x80798179,
|
||||
0xd7610002, 0x0000f205,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f206, 0x80798179,
|
||||
0xd7610002, 0x0000f207,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f208, 0x80798179,
|
||||
0xd7610002, 0x0000f209,
|
||||
0x80798179, 0xd7610002,
|
||||
0x0000f20a, 0x80798179,
|
||||
0xd7610002, 0x0000f20b,
|
||||
0x80798179, 0xc4068070,
|
||||
0x008ce802, 0x00000000,
|
||||
0xbefe00c1, 0x857d9973,
|
||||
0x8b7d817d, 0xbf06817d,
|
||||
0xbfa20002, 0xbeff0080,
|
||||
0xbfa00001, 0xbeff00c1,
|
||||
0xb8fb4306, 0x8b7bc17b,
|
||||
0xbfa10045, 0x8b7aff6d,
|
||||
0x80000000, 0xbfa10042,
|
||||
0x847b867b, 0x847b827b,
|
||||
0xbef6007b, 0xb8f03b05,
|
||||
0x80708170, 0xbf0d9973,
|
||||
0xbfa20002, 0x84708970,
|
||||
0xbfa00001, 0x84708a70,
|
||||
0xb8fa1e06, 0x847a8a7a,
|
||||
0x80707a70, 0x8070ff70,
|
||||
0x00000200, 0x8070ff70,
|
||||
0x00000080, 0xbef600ff,
|
||||
0x01000000, 0xd71f0000,
|
||||
0x000100c1, 0xd7200000,
|
||||
0x000200c1, 0x16000084,
|
||||
0x00000000, 0xbefe00c1,
|
||||
0x857d9973, 0x8b7d817d,
|
||||
0xbf06817d, 0xbefd0080,
|
||||
0xbfa20013, 0xbe8300ff,
|
||||
0x00000080, 0xbf800000,
|
||||
0xbf800000, 0xbf800000,
|
||||
0xd8d80000, 0x01000000,
|
||||
0xbf890000, 0xc4068070,
|
||||
0x008ce801, 0x00000000,
|
||||
0x807d037d, 0x80700370,
|
||||
0xd5250000, 0x0001ff00,
|
||||
0x00000080, 0xbf0a7b7d,
|
||||
0xbfa2fff3, 0xbfa00012,
|
||||
0xbe8300ff, 0x00000100,
|
||||
0xbf06817d, 0xbfa20002,
|
||||
0xbeff0080, 0xbfa00001,
|
||||
0xbeff00c1, 0xb8fb4306,
|
||||
0x8b7bc17b, 0xbfa10045,
|
||||
0x8b7aff6d, 0x80000000,
|
||||
0xbfa10042, 0x847b867b,
|
||||
0x847b827b, 0xbef6007b,
|
||||
0xb8f03b05, 0x80708170,
|
||||
0xbf0d9973, 0xbfa20002,
|
||||
0x84708970, 0xbfa00001,
|
||||
0x84708a70, 0xb8fa1e06,
|
||||
0x847a8a7a, 0x80707a70,
|
||||
0x8070ff70, 0x00000200,
|
||||
0x8070ff70, 0x00000080,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xd71f0000, 0x000100c1,
|
||||
0xd7200000, 0x000200c1,
|
||||
0x16000084, 0x857d9973,
|
||||
0x8b7d817d, 0xbf06817d,
|
||||
0xbefd0080, 0xbfa20013,
|
||||
0xbe8300ff, 0x00000080,
|
||||
0xbf800000, 0xbf800000,
|
||||
0xbf800000, 0xd8d80000,
|
||||
0x01000000, 0xbf890000,
|
||||
0xc4068070, 0x008ce801,
|
||||
0x00000000, 0x807d037d,
|
||||
0x80700370, 0xd5250000,
|
||||
0x0001ff00, 0x00000100,
|
||||
0x0001ff00, 0x00000080,
|
||||
0xbf0a7b7d, 0xbfa2fff3,
|
||||
0xbefe00c1, 0x857d9973,
|
||||
0x8b7d817d, 0xbf06817d,
|
||||
0xbfa20004, 0xbef000ff,
|
||||
0x00000200, 0xbeff0080,
|
||||
0xbfa00003, 0xbef000ff,
|
||||
0x00000400, 0xbeff00c1,
|
||||
0xb8fb3b05, 0x807b817b,
|
||||
0x847b827b, 0x857d9973,
|
||||
0x8b7d817d, 0xbf06817d,
|
||||
0xbfa2001b, 0xbef600ff,
|
||||
0x01000000, 0xbefd0084,
|
||||
0xbf0a7b7d, 0xbfa10040,
|
||||
0x7e008700, 0x7e028701,
|
||||
0x7e048702, 0x7e068703,
|
||||
0xc4068070, 0x008ce800,
|
||||
0x00000000, 0xc4068070,
|
||||
0x008ce801, 0x00008000,
|
||||
0xc4068070, 0x008ce802,
|
||||
0x00010000, 0xc4068070,
|
||||
0x008ce803, 0x00018000,
|
||||
0x807d847d, 0x8070ff70,
|
||||
0x00000200, 0xbf0a7b7d,
|
||||
0xbfa2ffeb, 0xbfa0002a,
|
||||
0xbfa00012, 0xbe8300ff,
|
||||
0x00000100, 0xbf800000,
|
||||
0xbf800000, 0xbf800000,
|
||||
0xd8d80000, 0x01000000,
|
||||
0xbf890000, 0xc4068070,
|
||||
0x008ce801, 0x00000000,
|
||||
0x807d037d, 0x80700370,
|
||||
0xd5250000, 0x0001ff00,
|
||||
0x00000100, 0xbf0a7b7d,
|
||||
0xbfa2fff3, 0xbefe00c1,
|
||||
0x857d9973, 0x8b7d817d,
|
||||
0xbf06817d, 0xbfa20004,
|
||||
0xbef000ff, 0x00000200,
|
||||
0xbeff0080, 0xbfa00003,
|
||||
0xbef000ff, 0x00000400,
|
||||
0xbeff00c1, 0xb8fb3b05,
|
||||
0x807b817b, 0x847b827b,
|
||||
0x857d9973, 0x8b7d817d,
|
||||
0xbf06817d, 0xbfa2001b,
|
||||
0xbef600ff, 0x01000000,
|
||||
0xbefd0084, 0xbf0a7b7d,
|
||||
0xbfa10015, 0x7e008700,
|
||||
0xbfa10040, 0x7e008700,
|
||||
0x7e028701, 0x7e048702,
|
||||
0x7e068703, 0xc4068070,
|
||||
0x008ce800, 0x00000000,
|
||||
0xc4068070, 0x008ce801,
|
||||
0x00010000, 0xc4068070,
|
||||
0x008ce802, 0x00020000,
|
||||
0x00008000, 0xc4068070,
|
||||
0x008ce802, 0x00010000,
|
||||
0xc4068070, 0x008ce803,
|
||||
0x00030000, 0x807d847d,
|
||||
0x8070ff70, 0x00000400,
|
||||
0x00018000, 0x807d847d,
|
||||
0x8070ff70, 0x00000200,
|
||||
0xbf0a7b7d, 0xbfa2ffeb,
|
||||
0xb8fb1e06, 0x8b7bc17b,
|
||||
0xbfa1000d, 0x847b837b,
|
||||
0x807b7d7b, 0xbefe00c1,
|
||||
0xbeff0080, 0x7e008700,
|
||||
0xbfa0002a, 0xbef600ff,
|
||||
0x01000000, 0xbefd0084,
|
||||
0xbf0a7b7d, 0xbfa10015,
|
||||
0x7e008700, 0x7e028701,
|
||||
0x7e048702, 0x7e068703,
|
||||
0xc4068070, 0x008ce800,
|
||||
0x00000000, 0x807d817d,
|
||||
0x8070ff70, 0x00000080,
|
||||
0xbf0a7b7d, 0xbfa2fff7,
|
||||
0xbfa00159, 0xbef4007e,
|
||||
0x8b75ff7f, 0x0000ffff,
|
||||
0x8c75ff75, 0x00040000,
|
||||
0xbef60080, 0xbef700ff,
|
||||
0x10807fac, 0xb8f20742,
|
||||
0x00000000, 0xc4068070,
|
||||
0x008ce801, 0x00010000,
|
||||
0xc4068070, 0x008ce802,
|
||||
0x00020000, 0xc4068070,
|
||||
0x008ce803, 0x00030000,
|
||||
0x807d847d, 0x8070ff70,
|
||||
0x00000400, 0xbf0a7b7d,
|
||||
0xbfa2ffeb, 0xb8fb1e06,
|
||||
0x8b7bc17b, 0xbfa1000d,
|
||||
0x847b837b, 0x807b7d7b,
|
||||
0xbefe00c1, 0xbeff0080,
|
||||
0x7e008700, 0xc4068070,
|
||||
0x008ce800, 0x00000000,
|
||||
0x807d817d, 0x8070ff70,
|
||||
0x00000080, 0xbf0a7b7d,
|
||||
0xbfa2fff7, 0xbfa0016b,
|
||||
0xbef4007e, 0x8b75ff7f,
|
||||
0x0000ffff, 0x8c75ff75,
|
||||
0x00040000, 0xbef60080,
|
||||
0xbef700ff, 0x10807fac,
|
||||
0xbef1007f, 0xb8f20742,
|
||||
0x84729972, 0x8b6eff7f,
|
||||
0x04000000, 0xbfa1003c,
|
||||
0xbefe00c1, 0x857d9972,
|
||||
|
|
@ -4064,49 +4067,58 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
|
|||
0xb8ee1e06, 0x846e8a6e,
|
||||
0x80786e78, 0x8078ff78,
|
||||
0x00000200, 0xbef600ff,
|
||||
0x01000000, 0xf4621bfa,
|
||||
0x01000000, 0xbeff0071,
|
||||
0xf4621bfa, 0xf0000000,
|
||||
0x80788478, 0xf4621b3a,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4621b3a, 0xf0000000,
|
||||
0x80788478, 0xf4621b7a,
|
||||
0xf4621b7a, 0xf0000000,
|
||||
0x80788478, 0xf4621c3a,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4621c3a, 0xf0000000,
|
||||
0x80788478, 0xf4621c7a,
|
||||
0xf4621c7a, 0xf0000000,
|
||||
0x80788478, 0xf4621eba,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4621eba, 0xf0000000,
|
||||
0x80788478, 0xf4621efa,
|
||||
0xf4621efa, 0xf0000000,
|
||||
0x80788478, 0xf4621e7a,
|
||||
0xf0000000, 0x80788478,
|
||||
0xf4621e7a, 0xf0000000,
|
||||
0x80788478, 0xf4621cfa,
|
||||
0xf4621cfa, 0xf0000000,
|
||||
0x80788478, 0xf4621bba,
|
||||
0xf0000000, 0x80788478,
|
||||
0xbf89fc07, 0xb96ef814,
|
||||
0xf4621bba, 0xf0000000,
|
||||
0x80788478, 0xbf89fc07,
|
||||
0xb96ef814, 0xf4621bba,
|
||||
0xb96ef815, 0xf4621bba,
|
||||
0xf0000000, 0x80788478,
|
||||
0xbf89fc07, 0xb96ef815,
|
||||
0xbf89fc07, 0xb96ef812,
|
||||
0xf4621bba, 0xf0000000,
|
||||
0x80788478, 0xbf89fc07,
|
||||
0xb96ef812, 0xf4621bba,
|
||||
0xb96ef813, 0x8b6eff7f,
|
||||
0x04000000, 0xbfa1000d,
|
||||
0x80788478, 0xf4621bba,
|
||||
0xf0000000, 0x80788478,
|
||||
0xbf89fc07, 0xb96ef813,
|
||||
0xbe804ec2, 0xbf94fffe,
|
||||
0xbefd006f, 0xbefe0070,
|
||||
0xbeff0071, 0xb97bf811,
|
||||
0xb973f801, 0xb8ee3b05,
|
||||
0x806e816e, 0xbf0d9972,
|
||||
0xbfa20002, 0x846e896e,
|
||||
0xbfa00001, 0x846e8a6e,
|
||||
0xb8ef1e06, 0x846f8a6f,
|
||||
0x806e6f6e, 0x806eff6e,
|
||||
0x00000200, 0x806e746e,
|
||||
0x826f8075, 0x8b6fff6f,
|
||||
0x0000ffff, 0xf4605c37,
|
||||
0xf8000050, 0xf4605d37,
|
||||
0xf8000060, 0xf4601e77,
|
||||
0xf8000074, 0xbf89fc07,
|
||||
0x8b6dff6d, 0x0000ffff,
|
||||
0x8bfe7e7e, 0x8bea6a6a,
|
||||
0xb97af804, 0xbe804a6c,
|
||||
0xbfb00000, 0xbf9f0000,
|
||||
0xbf89fc07, 0xbf0d806e,
|
||||
0xbfa10006, 0x856e906e,
|
||||
0x8b6e6e6e, 0xbfa10003,
|
||||
0xbe804ec1, 0x816ec16e,
|
||||
0xbfa0fffb, 0xbe804ec2,
|
||||
0xbf94fffe, 0xbefd006f,
|
||||
0xbefe0070, 0xbeff0071,
|
||||
0xb97bf811, 0xb973f801,
|
||||
0xb8ee3b05, 0x806e816e,
|
||||
0xbf0d9972, 0xbfa20002,
|
||||
0x846e896e, 0xbfa00001,
|
||||
0x846e8a6e, 0xb8ef1e06,
|
||||
0x846f8a6f, 0x806e6f6e,
|
||||
0x806eff6e, 0x00000200,
|
||||
0x806e746e, 0x826f8075,
|
||||
0x8b6fff6f, 0x0000ffff,
|
||||
0xf4605c37, 0xf8000050,
|
||||
0xf4605d37, 0xf8000060,
|
||||
0xf4601e77, 0xf8000074,
|
||||
0xbf89fc07, 0x8b6dff6d,
|
||||
0x0000ffff, 0x8bfe7e7e,
|
||||
0x8bea6a6a, 0xb97af804,
|
||||
0xbe804a6c, 0xbfb00000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000, 0xbf9f0000,
|
||||
0xbf9f0000, 0x00000000,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -154,6 +154,8 @@ var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV
|
|||
var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
|
||||
var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
|
||||
var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK|SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK
|
||||
var BARRIER_STATE_SIGNAL_OFFSET = 16
|
||||
var BARRIER_STATE_VALID_OFFSET = 0
|
||||
#endif
|
||||
|
||||
// bits [31:24] unused by SPI debug data
|
||||
|
|
@ -227,6 +229,7 @@ var s_restore_buf_rsrc3 = ttmp11
|
|||
var s_restore_size = ttmp6
|
||||
var s_restore_ttmps_lo = s_restore_tmp
|
||||
var s_restore_ttmps_hi = s_restore_alloc_size
|
||||
var s_restore_spi_init_hi_save = s_restore_exec_hi
|
||||
|
||||
shader main
|
||||
asic(DEFAULT)
|
||||
|
|
@ -639,6 +642,10 @@ L_SAVE_HWREG:
|
|||
|
||||
s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
|
||||
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
|
||||
s_get_barrier_state s_save_tmp, -1
|
||||
s_wait_kmcnt (0)
|
||||
write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
|
||||
#endif
|
||||
|
||||
#if NO_SQC_STORE
|
||||
|
|
@ -1001,6 +1008,11 @@ L_RESTORE:
|
|||
s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
|
||||
s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
|
||||
|
||||
#if ASIC_FAMILY >= CHIP_GFX12
|
||||
// Save s_restore_spi_init_hi for later use.
|
||||
s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
|
||||
#endif
|
||||
|
||||
//determine it is wave32 or wave64
|
||||
get_wave_size2(s_restore_size)
|
||||
|
||||
|
|
@ -1250,6 +1262,11 @@ L_RESTORE_HWREG:
|
|||
|
||||
s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
|
||||
|
||||
#if ASIC_FAMILY >= CHIP_GFX12
|
||||
// Restore s_restore_spi_init_hi before the saved value gets clobbered.
|
||||
s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
|
||||
#endif
|
||||
|
||||
read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
|
|
@ -1278,6 +1295,32 @@ L_RESTORE_HWREG:
|
|||
s_waitcnt lgkmcnt(0)
|
||||
s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
|
||||
|
||||
// Only the first wave needs to restore the workgroup barrier.
|
||||
s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
|
||||
s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
|
||||
|
||||
// Skip over WAVE_STATUS, since there is no state to restore from it
|
||||
s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
|
||||
|
||||
read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
|
||||
s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
|
||||
|
||||
// extract the saved signal count from s_restore_tmp
|
||||
s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
|
||||
|
||||
// We need to call s_barrier_signal repeatedly to restore the signal
|
||||
// count of the work group barrier. The member count is already
|
||||
// initialized with the number of waves in the work group.
|
||||
L_BARRIER_RESTORE_LOOP:
|
||||
s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
|
||||
s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
|
||||
s_barrier_signal -1
|
||||
s_add_i32 s_restore_tmp, s_restore_tmp, -1
|
||||
s_branch L_BARRIER_RESTORE_LOOP
|
||||
|
||||
L_SKIP_BARRIER_RESTORE:
|
||||
// Make barrier and LDS state visible to all waves in the group.
|
||||
s_barrier_signal -2
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user