diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index 2b8a027f7d09..d89b539ee5b9 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -1168,6 +1168,22 @@ static void cs_completion(struct work_struct *work) hl_complete_job(hdev, job); } +u32 hl_get_active_cs_num(struct hl_device *hdev) +{ + u32 active_cs_num = 0; + struct hl_cs *cs; + + spin_lock(&hdev->cs_mirror_lock); + + list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) + if (!cs->completed) + active_cs_num++; + + spin_unlock(&hdev->cs_mirror_lock); + + return active_cs_num; +} + static int validate_queue_index(struct hl_device *hdev, struct hl_cs_chunk *chunk, enum hl_queue_type *queue_type, diff --git a/drivers/accel/habanalabs/common/device.c b/drivers/accel/habanalabs/common/device.c index 194c282d7e55..b8c74185eabd 100644 --- a/drivers/accel/habanalabs/common/device.c +++ b/drivers/accel/habanalabs/common/device.c @@ -492,6 +492,52 @@ int hl_hpriv_put(struct hl_fpriv *hpriv) return kref_put(&hpriv->refcount, hpriv_release); } +static void compose_device_in_use_info(char **buf, size_t *buf_size, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + int size; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + size = snprintf(*buf, *buf_size, "%pV", &vaf); + if (size >= *buf_size) + size = *buf_size; + + *buf += size; + *buf_size -= size; + + va_end(args); +} + +static void print_device_in_use_info(struct hl_device *hdev, const char *message) +{ + u32 active_cs_num, dmabuf_export_cnt; + char buf[64], *buf_ptr = buf; + size_t buf_size = sizeof(buf); + bool unknown_reason = true; + + active_cs_num = hl_get_active_cs_num(hdev); + if (active_cs_num) { + unknown_reason = false; + compose_device_in_use_info(&buf_ptr, &buf_size, " [%u active CS]", active_cs_num); + } + + dmabuf_export_cnt = atomic_read(&hdev->dmabuf_export_cnt); + if (dmabuf_export_cnt) { + unknown_reason = false; + compose_device_in_use_info(&buf_ptr, &buf_size, " [%u exported dma-buf]", + dmabuf_export_cnt); + } + + if (unknown_reason) + compose_device_in_use_info(&buf_ptr, &buf_size, " [unknown reason]"); + + dev_notice(hdev->dev, "%s%s\n", message, buf); +} + /* * hl_device_release - release function for habanalabs device * @@ -519,12 +565,11 @@ static int hl_device_release(struct inode *inode, struct file *filp) hdev->compute_ctx_in_release = 1; if (!hl_hpriv_put(hpriv)) { - dev_notice(hdev->dev, "User process closed FD but device still in use\n"); + print_device_in_use_info(hdev, "User process closed FD but device still in use"); hl_device_reset(hdev, HL_DRV_RESET_HARD); } - hdev->last_open_session_duration_jif = - jiffies - hdev->last_successful_open_jif; + hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; return 0; } diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index d98e6c0feb24..afdae5775eaa 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3200,6 +3200,7 @@ struct hl_reset_info { * drams are binned-out * @tpc_binning: contains mask of tpc engines that is received from the f/w which indicates which * tpc engines are binned-out + * @dmabuf_export_cnt: number of dma-buf exporting. * @card_type: Various ASICs have several card types. This indicates the card * type of the current device. * @major: habanalabs kernel driver major. @@ -3371,7 +3372,7 @@ struct hl_device { u64 fw_comms_poll_interval_usec; u64 dram_binning; u64 tpc_binning; - + atomic_t dmabuf_export_cnt; enum cpucp_card_types card_type; u32 major; u32 high_pll; @@ -3664,6 +3665,7 @@ bool cs_needs_timeout(struct hl_cs *cs); bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs); struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq); void hl_multi_cs_completion_init(struct hl_device *hdev); +u32 hl_get_active_cs_num(struct hl_device *hdev); void goya_set_asic_funcs(struct hl_device *hdev); void gaudi_set_asic_funcs(struct hl_device *hdev); diff --git a/drivers/accel/habanalabs/common/memory.c b/drivers/accel/habanalabs/common/memory.c index 64ccbd62f803..fd333dd2835e 100644 --- a/drivers/accel/habanalabs/common/memory.c +++ b/drivers/accel/habanalabs/common/memory.c @@ -1833,6 +1833,7 @@ static void hl_release_dmabuf(struct dma_buf *dmabuf) if (hl_dmabuf->memhash_hnode) memhash_node_export_put(ctx, hl_dmabuf->memhash_hnode); + atomic_dec(&ctx->hdev->dmabuf_export_cnt); hl_ctx_put(ctx); kfree(hl_dmabuf); } @@ -1872,6 +1873,7 @@ static int export_dmabuf(struct hl_ctx *ctx, hl_dmabuf->ctx = ctx; hl_ctx_get(hl_dmabuf->ctx); + atomic_inc(&ctx->hdev->dmabuf_export_cnt); *dmabuf_fd = fd;