mirror of
https://github.com/torvalds/linux.git
synced 2026-05-31 10:33:41 +02:00
drm/amdgpu: introduce a kind of halt state for amdgpu device
It is useful to maintain error context when debugging SW/FW issues. Introduce amdgpu_device_halt() for this purpose. It will bring hardware to a kind of halt state, so that no one can touch it any more. Compare to a simple hang, the system will keep stable at least for SSH access. Then it should be trivial to inspect the hardware state and see what's going on. v2: - Set adev->no_hw_access earlier to avoid potential crashes.(Christian) Suggested-by: Christian Koenig <christian.koenig@amd.com> Suggested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Signed-off-by: Lang Yu <lang.yu@amd.com> Reviewed-by: Christian Koenig <christian.koenig@amd.co> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
cace4bff75
commit
34f3a4a98b
|
|
@ -1317,6 +1317,8 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
|
|||
void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
|
||||
struct amdgpu_ring *ring);
|
||||
|
||||
void amdgpu_device_halt(struct amdgpu_device *adev);
|
||||
|
||||
/* atpx handler */
|
||||
#if defined(CONFIG_VGA_SWITCHEROO)
|
||||
void amdgpu_register_atpx_handler(void);
|
||||
|
|
|
|||
|
|
@ -5663,3 +5663,42 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
|
|||
|
||||
amdgpu_asic_invalidate_hdp(adev, ring);
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_device_halt() - bring hardware to some kind of halt state
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
*
|
||||
* Bring hardware to some kind of halt state so that no one can touch it
|
||||
* any more. It will help to maintain error context when error occurred.
|
||||
* Compare to a simple hang, the system will keep stable at least for SSH
|
||||
* access. Then it should be trivial to inspect the hardware state and
|
||||
* see what's going on. Implemented as following:
|
||||
*
|
||||
* 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
|
||||
* clears all CPU mappings to device, disallows remappings through page faults
|
||||
* 2. amdgpu_irq_disable_all() disables all interrupts
|
||||
* 3. amdgpu_fence_driver_hw_fini() signals all HW fences
|
||||
* 4. set adev->no_hw_access to avoid potential crashes after setp 5
|
||||
* 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
|
||||
* 6. pci_disable_device() and pci_wait_for_pending_transaction()
|
||||
* flush any in flight DMA operations
|
||||
*/
|
||||
void amdgpu_device_halt(struct amdgpu_device *adev)
|
||||
{
|
||||
struct pci_dev *pdev = adev->pdev;
|
||||
struct drm_device *ddev = &adev->ddev;
|
||||
|
||||
drm_dev_unplug(ddev);
|
||||
|
||||
amdgpu_irq_disable_all(adev);
|
||||
|
||||
amdgpu_fence_driver_hw_fini(adev);
|
||||
|
||||
adev->no_hw_access = true;
|
||||
|
||||
amdgpu_device_unmap_mmio(adev);
|
||||
|
||||
pci_disable_device(pdev);
|
||||
pci_wait_for_pending_transaction(pdev);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user