accel/habanalabs/gaudi2: read preboot status after recovering from dirty state

Dirty state can occur when the host VM undergoes a reset while the
device does not. In such a case, the driver must reset the device before
it can be used again. As part of this reset, the device capabilities
are zeroed. Therefore, the driver must read the Preboot status again to
learn the Preboot state, capabilities, and security configuration.

Signed-off-by: Konstantin Sinyuk <konstantin.sinyuk@intel.com>
Reviewed-by: Koby Elbaz <koby.elbaz@intel.com>
Signed-off-by: Koby Elbaz <koby.elbaz@intel.com>
This commit is contained in:
Konstantin Sinyuk 2024-10-01 15:52:27 +03:00 committed by Koby Elbaz
parent 65a3f5bc33
commit a0d866bab1

View File

@ -3498,7 +3498,6 @@ static int gaudi2_early_init(struct hl_device *hdev)
rc = hl_fw_read_preboot_status(hdev);
if (rc) {
if (hdev->reset_on_preboot_fail)
/* we are already on failure flow, so don't check if hw_fini fails. */
hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini;
}
@ -3510,6 +3509,13 @@ static int gaudi2_early_init(struct hl_device *hdev)
dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
goto pci_fini;
}
rc = hl_fw_read_preboot_status(hdev);
if (rc) {
if (hdev->reset_on_preboot_fail)
hdev->asic_funcs->hw_fini(hdev, true, false);
goto pci_fini;
}
}
return 0;