habanalabs: use graceful hard reset for CS timeouts

Use graceful hard reset when detecting a CS timeout that requires a
device reset.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Tomer Tayar 2022-09-30 17:02:19 +03:00 committed by Oded Gabbay
parent d1ce7e5ea1
commit 1b363adc7f

View File

@ -798,7 +798,7 @@ static void cs_do_release(struct kref *ref)
static void cs_timedout(struct work_struct *work)
{
struct hl_device *hdev;
u64 event_mask;
u64 event_mask = 0x0;
int rc;
struct hl_cs *cs = container_of(work, struct hl_cs,
work_tdr.work);
@ -830,11 +830,7 @@ static void cs_timedout(struct work_struct *work)
if (rc) {
hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
hdev->captured_err_info.cs_timeout.seq = cs->sequence;
event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
hl_notifier_event_send_all(hdev, event_mask);
event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT;
}
switch (cs->type) {
@ -869,8 +865,12 @@ static void cs_timedout(struct work_struct *work)
cs_put(cs);
if (device_reset)
hl_device_reset(hdev, HL_DRV_RESET_TDR);
if (device_reset) {
event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask);
} else if (event_mask) {
hl_notifier_event_send_all(hdev, event_mask);
}
}
static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,