mirror of
https://github.com/torvalds/linux.git
synced 2026-05-28 00:53:34 +02:00
drm/amdgpu: add dummy read for some GCVM status registers
The GRBM register interface is now capable of bursting 1 cycle per
register wr->wr, wr->rd much faster than previous muticycle per
transaction done interface. This has caused a problem where status
registers requiring HW to update have a 1 cycle delay, due to the
register update having to go through GRBM.
SW may operate on an incorrect value if they write a register and
immediately check the corresponding status register.
Registers requiring HW to clear or set fields may be delayed by 1 cycle.
For example,
1. write VM_INVALIDATE_ENG0_REQ mask = 5a
2. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a
a. HW will reset VM_INVALIDATE_ENG0_ACK = 0 until invalidation is complete
3. write VM_INVALIDATE_ENG0_REQ mask = 5a
4. read VM_INVALIDATE_ENG0_ACK till the ack is same as the request mask = 5a
a. First read of VM_INVALIDATE_ENG0_ACK = 5a instead of 0
b. Second read of VM_INVALIDATE_ENG0_ACK = 0 because
the remote GRBM h/w register takes one extra cycle to be cleared
c. In this case, SW will see a false ACK if they exit on first read
Affected registers (only GC variant) | Recommended Dummy Read
--------------------------------------+----------------------------
VM_INVALIDATE_ENG*_ACK | VM_INVALIDATE_ENG*_REQ
VM_L2_STATUS | VM_L2_STATUS
VM_L2_PROTECTION_FAULT_STATUS | VM_L2_PROTECTION_FAULT_STATUS
VM_L2_PROTECTION_FAULT_ADDR_HI/LO32 | VM_L2_PROTECTION_FAULT_ADDR_HI/LO32
VM_L2_IH_LOG_BUSY | VM_L2_IH_LOG_BUSY
MC_VM_L2_PERFCOUNTER_HI/LO | MC_VM_L2_PERFCOUNTER_HI/LO
ATC_L2_PERFCOUNTER_HI/LO | ATC_L2_PERFCOUNTER_HI/LO
ATC_L2_PERFCOUNTER2_HI/LO | ATC_L2_PERFCOUNTER2_HI/LO
Signed-off-by: Xiaojie Yuan <xiaojie.yuan@amd.com>
Reviewed-by: Jack Xiao <Jack.Xiao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
b4d857ded1
commit
534991731c
|
|
@ -136,6 +136,14 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
|
|||
addr |= ((u64)entry->src_data[1] & 0xf) << 44;
|
||||
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
/*
|
||||
* Issue a dummy read to wait for the status register to
|
||||
* be updated to avoid reading an incorrect value due to
|
||||
* the new fast GRBM interface.
|
||||
*/
|
||||
if (entry->vmid_src == AMDGPU_GFXHUB_0)
|
||||
RREG32(hub->vm_l2_pro_fault_status);
|
||||
|
||||
status = RREG32(hub->vm_l2_pro_fault_status);
|
||||
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
|
||||
}
|
||||
|
|
@ -229,6 +237,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
|
|||
|
||||
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
|
||||
|
||||
/*
|
||||
* Issue a dummy read to wait for the ACK register to be cleared
|
||||
* to avoid a false ACK due to the new fast GRBM interface.
|
||||
*/
|
||||
if (vmhub == AMDGPU_GFXHUB_0)
|
||||
RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
|
||||
|
||||
/* Wait for ACK with a delay.*/
|
||||
for (i = 0; i < adev->usec_timeout; i++) {
|
||||
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
|
||||
|
|
|
|||
|
|
@ -356,6 +356,14 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
|
|||
|
||||
/* If it's the first fault for this address, process it normally */
|
||||
if (!amdgpu_sriov_vf(adev)) {
|
||||
/*
|
||||
* Issue a dummy read to wait for the status register to
|
||||
* be updated to avoid reading an incorrect value due to
|
||||
* the new fast GRBM interface.
|
||||
*/
|
||||
if (entry->vmid_src == AMDGPU_GFXHUB_0)
|
||||
RREG32(hub->vm_l2_pro_fault_status);
|
||||
|
||||
status = RREG32(hub->vm_l2_pro_fault_status);
|
||||
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
|
||||
}
|
||||
|
|
@ -484,6 +492,14 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
|
|||
|
||||
spin_lock(&adev->gmc.invalidate_lock);
|
||||
WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
|
||||
|
||||
/*
|
||||
* Issue a dummy read to wait for the ACK register to be cleared
|
||||
* to avoid a false ACK due to the new fast GRBM interface.
|
||||
*/
|
||||
if (vmhub == AMDGPU_GFXHUB_0)
|
||||
RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng);
|
||||
|
||||
for (j = 0; j < adev->usec_timeout; j++) {
|
||||
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
|
||||
if (tmp & (1 << vmid))
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user