From 89225ce4fc43da2db968923fc94e1469dbcdbd5a Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Wed, 1 May 2019 14:38:38 +0300 Subject: [PATCH 1/4] habanalabs: halt debug engines on user process close This patch fix a potential bug where a user's process has closed unexpectedly without disabling the debug engines. In that case, the debug engines might continue running but because the user's MMU mappings are going away, we will get page fault errors. This behavior is also opposed to the general rule where nothing runs on the device after the user process closes. The patch stops the debug H/W engines upon process termination and thus makes sure nothing runs on the device after the process goes away. Signed-off-by: Omer Shpigelman Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/context.c | 6 ++++++ drivers/misc/habanalabs/goya/goya.c | 3 ++- drivers/misc/habanalabs/goya/goyaP.h | 1 + drivers/misc/habanalabs/goya/goya_coresight.c | 17 +++++++++++++++++ drivers/misc/habanalabs/habanalabs.h | 2 ++ 5 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c index 4804cdcf4c48..f4c92f110a72 100644 --- a/drivers/misc/habanalabs/context.c +++ b/drivers/misc/habanalabs/context.c @@ -26,6 +26,12 @@ static void hl_ctx_fini(struct hl_ctx *ctx) dma_fence_put(ctx->cs_pending[i]); if (ctx->asid != HL_KERNEL_ASID_ID) { + /* + * The engines are stopped as there is no executing CS, but the + * Coresight might be still working by accessing addresses + * related to the stopped engines. Hence stop it explicitly. + */ + hdev->asic_funcs->halt_coresight(hdev); hl_vm_ctx_fini(ctx); hl_asid_free(hdev, ctx->asid); } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index a582e29c1ee4..02d116b01a1a 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4819,7 +4819,8 @@ static const struct hl_asic_funcs goya_funcs = { .set_dram_bar_base = goya_set_ddr_bar_base, .init_iatu = goya_init_iatu, .rreg = hl_rreg, - .wreg = hl_wreg + .wreg = hl_wreg, + .halt_coresight = goya_halt_coresight }; /* diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 14e216cb3668..c83cab0d641e 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -202,6 +202,7 @@ void goya_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_attr_grp); int goya_armcp_info_get(struct hl_device *hdev); int goya_debug_coresight(struct hl_device *hdev, void *data); +void goya_halt_coresight(struct hl_device *hdev); void goya_mmu_prepare(struct hl_device *hdev, u32 asid); int goya_mmu_clear_pgt_range(struct hl_device *hdev); diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index 1ac951f52d1e..39f62ce72660 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -626,3 +626,20 @@ int goya_debug_coresight(struct hl_device *hdev, void *data) return rc; } + +void goya_halt_coresight(struct hl_device *hdev) +{ + struct hl_debug_params params = {}; + int i, rc; + + for (i = GOYA_ETF_FIRST ; i <= GOYA_ETF_LAST ; i++) { + params.reg_idx = i; + rc = goya_config_etf(hdev, ¶ms); + if (rc) + dev_err(hdev->dev, "halt ETF failed, %d/%d\n", rc, i); + } + + rc = goya_config_etr(hdev, ¶ms); + if (rc) + dev_err(hdev->dev, "halt ETR failed, %d\n", rc); +} diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 71243b319920..adef7d9d7488 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -501,6 +501,7 @@ enum hl_pll_frequency { * @init_iatu: Initialize the iATU unit inside the PCI controller. * @rreg: Read a register. Needed for simulator support. * @wreg: Write a register. Needed for simulator support. + * @halt_coresight: stop the ETF and ETR traces. */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -578,6 +579,7 @@ struct hl_asic_funcs { int (*init_iatu)(struct hl_device *hdev); u32 (*rreg)(struct hl_device *hdev, u32 reg); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); + void (*halt_coresight)(struct hl_device *hdev); }; From 2add64ebaea25dfada27b3332b1c982f9fc5ae64 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 4 May 2019 16:30:00 +0300 Subject: [PATCH 2/4] uapi/habanalabs: add opcode for enable/disable device debug mode This patch defines a new opcode in the DEBUG IOCTL that is used by the user to notify the driver when the user wants to start or stop using the debug and profile infrastructure of the device. i.e. set the device to debug mode or to non-debug mode. There are a couple of restrictions that this new opcode introduces: 1. The user can't configure the debug/profiling infrastructure before he sets the device to debug mode, by using this new opcode. 2. The user can't set the device to debug mode unless he is the only user that is currently using (has an open FD) the device. 3. Other users can't use the device (open a new FD) in case an existing user has set the device into debug mode. These restrictions are needed because the debug and profiling infrastructure is a shared component in the ASIC and therefore, can't be used while multiple users are working on the device. Because the driver currently does NOT support multiple users, the implementation of the restrictions is not required at this point. However, the interface definition is needed in order to avoid changing the user API later on. Signed-off-by: Oded Gabbay --- include/uapi/misc/habanalabs.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 8ac292cf4d00..204ab9b4ae67 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -413,6 +413,10 @@ struct hl_debug_params_spmu { #define HL_DEBUG_OP_SPMU 5 /* Opcode for timestamp */ #define HL_DEBUG_OP_TIMESTAMP 6 +/* Opcode for setting the device into or out of debug mode. The enable + * variable should be 1 for enabling debug mode and 0 for disabling it + */ +#define HL_DEBUG_OP_SET_MODE 7 struct hl_debug_args { /* @@ -574,8 +578,22 @@ struct hl_debug_args { * * This IOCTL allows the user to get debug traces from the chip. * - * The user needs to provide the register index and essential data such as - * buffer address and size. + * Before the user can send configuration requests of the various + * debug/profile engines, it needs to set the device into debug mode. + * This is because the debug/profile infrastructure is shared component in the + * device and we can't allow multiple users to access it at the same time. + * + * Once a user set the device into debug mode, the driver won't allow other + * users to "work" with the device, i.e. open a FD. If there are multiple users + * opened on the device, the driver won't allow any user to debug the device. + * + * For each configuration request, the user needs to provide the register index + * and essential data such as buffer address and size. + * + * Once the user has finished using the debug/profile engines, he should + * set the device into non-debug mode, i.e. disable debug mode. + * + * The driver can decide to "kick out" the user if he abuses this interface. * */ #define HL_IOCTL_DEBUG \ From 8438846cce61e284a22316c13aa4b63772963070 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 4 May 2019 15:56:08 +0200 Subject: [PATCH 3/4] habanalabs: fix debugfs code This fixes multiple things in the habanalabs debugfs code, in particular: - mmu_write() was unnecessarily verbose, copying around between multiple buffers - mmu_write() could write a user-specified, unbounded amount of userspace memory into a kernel buffer (out-of-bounds write) - multiple debugfs read handlers ignored the user-supplied count, potentially corrupting out-of-bounds userspace data - hl_device_read() was unnecessarily verbose - hl_device_write() could read uninitialized stack memory - multiple debugfs read handlers copied terminating null characters to userspace Signed-off-by: Jann Horn Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay Cc: stable@vger.kernel.org --- drivers/misc/habanalabs/debugfs.c | 60 ++++++++++--------------------- 1 file changed, 18 insertions(+), 42 deletions(-) diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index a4447699ff4e..0ce5621c1324 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -459,41 +459,31 @@ static ssize_t mmu_write(struct file *file, const char __user *buf, struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - char kbuf[MMU_KBUF_SIZE], asid_kbuf[MMU_ASID_BUF_SIZE], - addr_kbuf[MMU_ADDR_BUF_SIZE]; + char kbuf[MMU_KBUF_SIZE]; char *c; ssize_t rc; if (!hdev->mmu_enable) return count; - memset(kbuf, 0, sizeof(kbuf)); - memset(asid_kbuf, 0, sizeof(asid_kbuf)); - memset(addr_kbuf, 0, sizeof(addr_kbuf)); - + if (count > sizeof(kbuf) - 1) + goto err; if (copy_from_user(kbuf, buf, count)) goto err; - - kbuf[MMU_KBUF_SIZE - 1] = 0; + kbuf[count] = 0; c = strchr(kbuf, ' '); if (!c) goto err; + *c = '\0'; - memcpy(asid_kbuf, kbuf, c - kbuf); - - rc = kstrtouint(asid_kbuf, 10, &dev_entry->mmu_asid); + rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid); if (rc) goto err; - c = strstr(kbuf, " 0x"); - if (!c) + if (strncmp(c+1, "0x", 2)) goto err; - - c += 3; - memcpy(addr_kbuf, c, (kbuf + count) - c); - - rc = kstrtoull(addr_kbuf, 16, &dev_entry->mmu_addr); + rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr); if (rc) goto err; @@ -600,10 +590,8 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf, } sprintf(tmp_buf, "0x%08x\n", val); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); } static ssize_t hl_data_write32(struct file *f, const char __user *buf, @@ -645,7 +633,6 @@ static ssize_t hl_get_power_state(struct file *f, char __user *buf, struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; char tmp_buf[200]; - ssize_t rc; int i; if (*ppos) @@ -660,10 +647,8 @@ static ssize_t hl_get_power_state(struct file *f, char __user *buf, sprintf(tmp_buf, "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); } static ssize_t hl_set_power_state(struct file *f, const char __user *buf, @@ -716,8 +701,8 @@ static ssize_t hl_i2c_data_read(struct file *f, char __user *buf, } sprintf(tmp_buf, "0x%02x\n", val); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); + rc = simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); return rc; } @@ -806,18 +791,9 @@ static ssize_t hl_led2_write(struct file *f, const char __user *buf, static ssize_t hl_device_read(struct file *f, char __user *buf, size_t count, loff_t *ppos) { - char tmp_buf[200]; - ssize_t rc; - - if (*ppos) - return 0; - - sprintf(tmp_buf, - "Valid values: disable, enable, suspend, resume, cpu_timeout\n"); - rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, - strlen(tmp_buf) + 1); - - return rc; + static const char *help = + "Valid values: disable, enable, suspend, resume, cpu_timeout\n"; + return simple_read_from_buffer(buf, count, ppos, help, strlen(help)); } static ssize_t hl_device_write(struct file *f, const char __user *buf, @@ -825,7 +801,7 @@ static ssize_t hl_device_write(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - char data[30]; + char data[30] = {0}; /* don't allow partial writes */ if (*ppos != 0) From 8d45f1de3994c566cb5ce9b3cb07ff1518e68ddb Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Mon, 13 May 2019 12:13:39 +0300 Subject: [PATCH 4/4] habanalabs: Avoid using a non-initialized MMU cache mutex The MMU cache mutex is used in the ASIC hw_init() functions, but it is initialized only later in hl_mmu_init(). This patch prevents it by moving the initialization to the device_early_init() function. Signed-off-by: Tomer Tayar Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 2 ++ drivers/misc/habanalabs/mmu.c | 8 +------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 91a9e47a3482..0b19d3eefb98 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -231,6 +231,7 @@ static int device_early_init(struct hl_device *hdev) mutex_init(&hdev->fd_open_cnt_lock); mutex_init(&hdev->send_cpu_message_lock); + mutex_init(&hdev->mmu_cache_lock); INIT_LIST_HEAD(&hdev->hw_queues_mirror_list); spin_lock_init(&hdev->hw_queues_mirror_lock); atomic_set(&hdev->in_reset, 0); @@ -260,6 +261,7 @@ static int device_early_init(struct hl_device *hdev) */ static void device_early_fini(struct hl_device *hdev) { + mutex_destroy(&hdev->mmu_cache_lock); mutex_destroy(&hdev->send_cpu_message_lock); hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 533d9315b6fb..10aee3141444 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -404,15 +404,12 @@ int hl_mmu_init(struct hl_device *hdev) /* MMU H/W init was already done in device hw_init() */ - mutex_init(&hdev->mmu_cache_lock); - hdev->mmu_pgt_pool = gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); if (!hdev->mmu_pgt_pool) { dev_err(hdev->dev, "Failed to create page gen pool\n"); - rc = -ENOMEM; - goto err_pool_create; + return -ENOMEM; } rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr + @@ -436,8 +433,6 @@ int hl_mmu_init(struct hl_device *hdev) err_pool_add: gen_pool_destroy(hdev->mmu_pgt_pool); -err_pool_create: - mutex_destroy(&hdev->mmu_cache_lock); return rc; } @@ -459,7 +454,6 @@ void hl_mmu_fini(struct hl_device *hdev) kvfree(hdev->mmu_shadow_hop0); gen_pool_destroy(hdev->mmu_pgt_pool); - mutex_destroy(&hdev->mmu_cache_lock); /* MMU H/W fini will be done in device hw_fini() */ }