From 64dab074fe5e8852b0c981564dc146f39535a81a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 15 Jun 2017 18:20:09 -0400 Subject: [PATCH 1/5] drm/amdgpu: don't check the default value for vm size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoids printing spurious messages like this: [ 3.102059] amdgpu 0000:01:00.0: VM size (-1) must be a power of 2 Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 875cde414be7..b2c960b2ea82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1073,6 +1073,10 @@ static void amdgpu_check_block_size(struct amdgpu_device *adev) static void amdgpu_check_vm_size(struct amdgpu_device *adev) { + /* no need to check the default value */ + if (amdgpu_vm_size == -1) + return; + if (!amdgpu_check_pot_argument(amdgpu_vm_size)) { dev_warn(adev->dev, "VM size (%d) must be a power of 2\n", amdgpu_vm_size); From 6f0308ebc196f18607f45cc753521eb8448dfdfd Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 9 Mar 2017 03:45:52 +0000 Subject: [PATCH 2/5] amdgpu/cs: split out fence dependency checking (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just splits out the fence depenency checking into it's own function to make it easier to add semaphore dependencies. v2: rebase onto other changes. v1-Reviewed-by: Christian König Signed-off-by: Dave Airlie Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 95 ++++++++++++++------------ 1 file changed, 52 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index a37bdf4f8e9b..29469e6b58b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -923,59 +923,68 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return 0; } +static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + unsigned num_deps; + int i, r; + struct drm_amdgpu_cs_chunk_dep *deps; + + deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_ring *ring; + struct amdgpu_ctx *ctx; + struct dma_fence *fence; + + ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); + if (ctx == NULL) + return -EINVAL; + + r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, + deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } + + fence = amdgpu_ctx_get_fence(ctx, ring, + deps[i].handle); + if (IS_ERR(fence)) { + r = PTR_ERR(fence); + amdgpu_ctx_put(ctx); + return r; + } else if (fence) { + r = amdgpu_sync_fence(p->adev, &p->job->sync, + fence); + dma_fence_put(fence); + amdgpu_ctx_put(ctx); + if (r) + return r; + } + } + return 0; +} + static int amdgpu_cs_dependencies(struct amdgpu_device *adev, struct amdgpu_cs_parser *p) { - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - int i, j, r; + int i, r; for (i = 0; i < p->nchunks; ++i) { - struct drm_amdgpu_cs_chunk_dep *deps; struct amdgpu_cs_chunk *chunk; - unsigned num_deps; chunk = &p->chunks[i]; - if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES) - continue; - - deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_dep); - - for (j = 0; j < num_deps; ++j) { - struct amdgpu_ring *ring; - struct amdgpu_ctx *ctx; - struct dma_fence *fence; - - ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); - if (ctx == NULL) - return -EINVAL; - - r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, - deps[j].ip_type, - deps[j].ip_instance, - deps[j].ring, &ring); - if (r) { - amdgpu_ctx_put(ctx); + if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) { + r = amdgpu_cs_process_fence_dep(p, chunk); + if (r) return r; - } - - fence = amdgpu_ctx_get_fence(ctx, ring, - deps[j].handle); - if (IS_ERR(fence)) { - r = PTR_ERR(fence); - amdgpu_ctx_put(ctx); - return r; - - } else if (fence) { - r = amdgpu_sync_fence(adev, &p->job->sync, - fence); - dma_fence_put(fence); - amdgpu_ctx_put(ctx); - if (r) - return r; - } } } From 660e855813f78b7fe63ff1ebc4f2ca07d94add0b Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 13 Mar 2017 22:18:15 +0000 Subject: [PATCH 3/5] amdgpu: use drm sync objects for shared semaphores (v6) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This creates a new command submission chunk for amdgpu to add in and out sync objects around the submission. Sync objects are managed via the drm syncobj ioctls. The command submission interface is enhanced with two new chunks, one for syncobj pre submission dependencies, and one for post submission sync obj signalling, and just takes a list of handles for each. This is based on work originally done by David Zhou at AMD, with input from Christian Konig on what things should look like. In theory VkFences could be backed with sync objects and just get passed into the cs as syncobj handles as well. NOTE: this interface addition needs a version bump to expose it to userspace. TODO: update to dep_sync when rebasing onto amdgpu master. (with this - r-b from Christian) v1.1: keep file reference on import. v2: move to using syncobjs v2.1: change some APIs to just use p pointer. v3: make more robust against CS failures, we now add the wait sems but only remove them once the CS job has been submitted. v4: rewrite names of API and base on new syncobj code. v5: move post deps earlier, rename some apis v6: lookup post deps earlier, and just replace fences in post deps stage (Christian) Reviewed-by: Christian König Signed-off-by: Dave Airlie Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 88 ++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- include/uapi/drm/amdgpu_drm.h | 6 ++ 4 files changed, 97 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e0adad590ecb..9f827aca90ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1159,6 +1159,9 @@ struct amdgpu_cs_parser { /* user fence */ struct amdgpu_bo_list_entry uf_entry; + + unsigned num_post_dep_syncobjs; + struct drm_syncobj **post_dep_syncobjs; }; #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 29469e6b58b8..aeee6840e82b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_trace.h" @@ -154,6 +155,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) break; case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: break; default: @@ -682,6 +685,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); } + + for (i = 0; i < parser->num_post_dep_syncobjs; i++) + drm_syncobj_put(parser->post_dep_syncobjs[i]); + kfree(parser->post_dep_syncobjs); + dma_fence_put(parser->fence); if (parser->ctx) @@ -971,6 +979,64 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, return 0; } +static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, + uint32_t handle) +{ + int r; + struct dma_fence *fence; + r = drm_syncobj_fence_get(p->filp, handle, &fence); + if (r) + return r; + + r = amdgpu_sync_fence(p->adev, &p->job->sync, fence); + dma_fence_put(fence); + + return r; +} + +static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + unsigned num_deps; + int i, r; + struct drm_amdgpu_cs_chunk_sem *deps; + + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle); + if (r) + return r; + } + return 0; +} + +static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + unsigned num_deps; + int i; + struct drm_amdgpu_cs_chunk_sem *deps; + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + p->post_dep_syncobjs = kmalloc_array(num_deps, + sizeof(struct drm_syncobj *), + GFP_KERNEL); + p->num_post_dep_syncobjs = 0; + + for (i = 0; i < num_deps; ++i) { + p->post_dep_syncobjs[i] = drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_dep_syncobjs[i]) + return -EINVAL; + p->num_post_dep_syncobjs++; + } + return 0; +} + static int amdgpu_cs_dependencies(struct amdgpu_device *adev, struct amdgpu_cs_parser *p) { @@ -985,12 +1051,30 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, r = amdgpu_cs_process_fence_dep(p, chunk); if (r) return r; + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_IN) { + r = amdgpu_cs_process_syncobj_in_dep(p, chunk); + if (r) + return r; + } else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SYNCOBJ_OUT) { + r = amdgpu_cs_process_syncobj_out_dep(p, chunk); + if (r) + return r; } } return 0; } +static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) +{ + int i; + + for (i = 0; i < p->num_post_dep_syncobjs; ++i) { + drm_syncobj_replace_fence(p->filp, p->post_dep_syncobjs[i], + p->fence); + } +} + static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { @@ -1011,6 +1095,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->owner = p->filp; job->fence_ctx = entity->fence_context; p->fence = dma_fence_get(&job->base.s_fence->finished); + + amdgpu_cs_post_dependencies(p); + cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; amdgpu_job_free_resources(job); @@ -1018,7 +1105,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8168f8ec711a..4c7c2628ace4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -782,7 +782,7 @@ static struct drm_driver kms_driver = { .driver_features = DRIVER_USE_AGP | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET, + DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 34128f698f5e..d9aa4a339650 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -440,6 +440,8 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_IB 0x01 #define AMDGPU_CHUNK_ID_FENCE 0x02 #define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 +#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 +#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 struct drm_amdgpu_cs_chunk { __u32 chunk_id; @@ -507,6 +509,10 @@ struct drm_amdgpu_cs_chunk_fence { __u32 offset; }; +struct drm_amdgpu_cs_chunk_sem { + __u32 handle; +}; + struct drm_amdgpu_cs_chunk_data { union { struct drm_amdgpu_cs_chunk_ib ib_data; From 99eea4df901907b9121ad1c726d86c352a5eaca5 Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Fri, 16 Jun 2017 00:23:41 -0400 Subject: [PATCH 4/5] drm/amdgpu: Optimization of AMDGPU_BO_LIST_OP_CREATE (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Remove duplication of zeroing of bo list (Christian König) Move idr_alloc function to end of ioctl (Christian König) Call kfree bo_list when amdgpu_bo_list_set return error. Combine the previous two patches into this patch. Add amdgpu_bo_list_set function prototype. Signed-off-by: Alex Xie Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 53 ++++++++++++--------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 9f0247cdda5e..7b5c3bb2142e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -35,33 +35,45 @@ #define AMDGPU_BO_LIST_MAX_PRIORITY 32u #define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) -static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, - struct amdgpu_bo_list **result, +static int amdgpu_bo_list_set(struct amdgpu_device *adev, + struct drm_file *filp, + struct amdgpu_bo_list *list, + struct drm_amdgpu_bo_list_entry *info, + unsigned num_entries); + +static int amdgpu_bo_list_create(struct amdgpu_device *adev, + struct drm_file *filp, + struct drm_amdgpu_bo_list_entry *info, + unsigned num_entries, int *id) { int r; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_bo_list *list; - *result = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); - if (!*result) + list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL); + if (!list) return -ENOMEM; + /* initialize bo list*/ + mutex_init(&list->lock); + + r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); + if (r) { + kfree(list); + return r; + } + + /* idr alloc should be called only after initialization of bo list. */ mutex_lock(&fpriv->bo_list_lock); - r = idr_alloc(&fpriv->bo_list_handles, *result, - 1, 0, GFP_KERNEL); + r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL); + mutex_unlock(&fpriv->bo_list_lock); if (r < 0) { - mutex_unlock(&fpriv->bo_list_lock); - kfree(*result); + kfree(list); return r; } *id = r; - mutex_init(&(*result)->lock); - (*result)->num_entries = 0; - (*result)->array = NULL; - - mutex_lock(&(*result)->lock); - mutex_unlock(&fpriv->bo_list_lock); - return 0; } @@ -77,6 +89,7 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) mutex_unlock(&list->lock); amdgpu_bo_list_free(list); } + mutex_unlock(&fpriv->bo_list_lock); } @@ -273,16 +286,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, switch (args->in.operation) { case AMDGPU_BO_LIST_OP_CREATE: - r = amdgpu_bo_list_create(fpriv, &list, &handle); + r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number, + &handle); if (r) goto error_free; - - r = amdgpu_bo_list_set(adev, filp, list, info, - args->in.bo_number); - amdgpu_bo_list_put(list); - if (r) - goto error_free; - break; case AMDGPU_BO_LIST_OP_DESTROY: From 5ac55629d6b3fcde69f46aa772c6e83be0bdcbbf Mon Sep 17 00:00:00 2001 From: Alex Xie Date: Fri, 16 Jun 2017 09:07:29 -0400 Subject: [PATCH 5/5] drm/amdgpu: Optimize mutex usage (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In original function amdgpu_bo_list_get, the waiting for result->lock can be quite long while mutex bo_list_lock was holding. It can make other tasks waiting for bo_list_lock for long period. Secondly, this patch allows several tasks(readers of idr) to proceed at the same time. v2: use rcu and kref (Dave Airlie and Christian König) v3: update v1 commit message (Michel Dänzer) v4: rebase on upstream (Alex Deucher) Signed-off-by: Alex Xie Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 ++++++++++++++------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9f827aca90ee..12d61edb3597 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -869,6 +869,8 @@ struct amdgpu_fpriv { struct amdgpu_bo_list { struct mutex lock; + struct rcu_head rhead; + struct kref refcount; struct amdgpu_bo *gds_obj; struct amdgpu_bo *gws_obj; struct amdgpu_bo *oa_obj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 7b5c3bb2142e..f621ee115c98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, struct drm_amdgpu_bo_list_entry *info, unsigned num_entries); +static void amdgpu_bo_list_release_rcu(struct kref *ref) +{ + unsigned i; + struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list, + refcount); + + for (i = 0; i < list->num_entries; ++i) + amdgpu_bo_unref(&list->array[i].robj); + + mutex_destroy(&list->lock); + kvfree(list->array); + kfree_rcu(list, rhead); +} + static int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, @@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev, /* initialize bo list*/ mutex_init(&list->lock); - + kref_init(&list->refcount); r = amdgpu_bo_list_set(adev, filp, list, info, num_entries); if (r) { kfree(list); @@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id) mutex_lock(&fpriv->bo_list_lock); list = idr_remove(&fpriv->bo_list_handles, id); - if (list) { - /* Another user may have a reference to this list still */ - mutex_lock(&list->lock); - mutex_unlock(&list->lock); - amdgpu_bo_list_free(list); - } - mutex_unlock(&fpriv->bo_list_lock); + if (list) + kref_put(&list->refcount, amdgpu_bo_list_release_rcu); } static int amdgpu_bo_list_set(struct amdgpu_device *adev, @@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) { struct amdgpu_bo_list *result; - mutex_lock(&fpriv->bo_list_lock); + rcu_read_lock(); result = idr_find(&fpriv->bo_list_handles, id); - if (result) - mutex_lock(&result->lock); - mutex_unlock(&fpriv->bo_list_lock); + + if (result) { + if (kref_get_unless_zero(&result->refcount)) + mutex_lock(&result->lock); + else + result = NULL; + } + rcu_read_unlock(); + return result; } @@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { mutex_unlock(&list->lock); + kref_put(&list->refcount, amdgpu_bo_list_release_rcu); } void amdgpu_bo_list_free(struct amdgpu_bo_list *list)