From 925b6e59138cefa47275c67891c65d48d3266d57 Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 8 Jul 2022 02:30:47 -0700 Subject: [PATCH 01/17] Revert "drm/amdgpu: add drm buddy support to amdgpu" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c9cad937c0c58618fe5b0310fd539a854dc1ae95. This is part of a revert of the following commits: commit 708d19d9f362 ("drm/amdgpu: move internal vram_mgr function into the C file") commit 5e3f1e7729ec ("drm/amdgpu: fix start calculation in amdgpu_vram_mgr_new") commit c9cad937c0c5 ("drm/amdgpu: add drm buddy support to amdgpu") [WHY] Few users reported garbaged graphics as soon as x starts, reverting until this can be resolved. Signed-off-by: Arunpravin Paneer Selvam Link: https://patchwork.freedesktop.org/patch/msgid/20220708093047.492662-3-Arunpravin.PaneerSelvam@amd.com Reviewed-by: Christian König Signed-off-by: Christian König --- drivers/gpu/drm/Kconfig | 1 - .../gpu/drm/amd/amdgpu/amdgpu_res_cursor.h | 97 +---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 349 +++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 89 ----- 5 files changed, 171 insertions(+), 375 deletions(-) delete mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index e88c497fa010..f65656df3619 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -256,7 +256,6 @@ config DRM_AMDGPU select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE - select DRM_BUDDY help Choose this option if you have a recent AMD Radeon graphics card. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 6546552e596c..acfa207cf970 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -30,15 +30,12 @@ #include #include -#include "amdgpu_vram_mgr.h" - /* state back for walking over vram_mgr and gtt_mgr allocations */ struct amdgpu_res_cursor { uint64_t start; uint64_t size; uint64_t remaining; - void *node; - uint32_t mem_type; + struct drm_mm_node *node; }; /** @@ -55,63 +52,27 @@ static inline void amdgpu_res_first(struct ttm_resource *res, uint64_t start, uint64_t size, struct amdgpu_res_cursor *cur) { - struct drm_buddy_block *block; - struct list_head *head, *next; struct drm_mm_node *node; - if (!res) - goto fallback; + if (!res || res->mem_type == TTM_PL_SYSTEM) { + cur->start = start; + cur->size = size; + cur->remaining = size; + cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); + return; + } BUG_ON(start + size > res->num_pages << PAGE_SHIFT); - cur->mem_type = res->mem_type; + node = to_ttm_range_mgr_node(res)->mm_nodes; + while (start >= node->size << PAGE_SHIFT) + start -= node++->size << PAGE_SHIFT; - switch (cur->mem_type) { - case TTM_PL_VRAM: - head = &to_amdgpu_vram_mgr_resource(res)->blocks; - - block = list_first_entry_or_null(head, - struct drm_buddy_block, - link); - if (!block) - goto fallback; - - while (start >= amdgpu_vram_mgr_block_size(block)) { - start -= amdgpu_vram_mgr_block_size(block); - - next = block->link.next; - if (next != head) - block = list_entry(next, struct drm_buddy_block, link); - } - - cur->start = amdgpu_vram_mgr_block_start(block) + start; - cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size); - cur->remaining = size; - cur->node = block; - break; - case TTM_PL_TT: - node = to_ttm_range_mgr_node(res)->mm_nodes; - while (start >= node->size << PAGE_SHIFT) - start -= node++->size << PAGE_SHIFT; - - cur->start = (node->start << PAGE_SHIFT) + start; - cur->size = min((node->size << PAGE_SHIFT) - start, size); - cur->remaining = size; - cur->node = node; - break; - default: - goto fallback; - } - - return; - -fallback: - cur->start = start; - cur->size = size; + cur->start = (node->start << PAGE_SHIFT) + start; + cur->size = min((node->size << PAGE_SHIFT) - start, size); cur->remaining = size; - cur->node = NULL; - WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); - return; + cur->node = node; } /** @@ -124,9 +85,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, */ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) { - struct drm_buddy_block *block; - struct drm_mm_node *node; - struct list_head *next; + struct drm_mm_node *node = cur->node; BUG_ON(size > cur->remaining); @@ -140,27 +99,9 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) return; } - switch (cur->mem_type) { - case TTM_PL_VRAM: - block = cur->node; - - next = block->link.next; - block = list_entry(next, struct drm_buddy_block, link); - - cur->node = block; - cur->start = amdgpu_vram_mgr_block_start(block); - cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining); - break; - case TTM_PL_TT: - node = cur->node; - - cur->node = ++node; - cur->start = node->start << PAGE_SHIFT; - cur->size = min(node->size << PAGE_SHIFT, cur->remaining); - break; - default: - return; - } + cur->node = ++node; + cur->start = node->start << PAGE_SHIFT; + cur->size = min(node->size << PAGE_SHIFT, cur->remaining); } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6a70818039dd..9120ae80ef52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -26,7 +26,6 @@ #include #include -#include "amdgpu_vram_mgr.h" #include "amdgpu.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) @@ -39,6 +38,15 @@ #define AMDGPU_POISON 0xd0bed0be +struct amdgpu_vram_mgr { + struct ttm_resource_manager manager; + struct drm_mm mm; + spinlock_t lock; + struct list_head reservations_pending; + struct list_head reserved_pages; + atomic64_t vis_usage; +}; + struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 49e4092f447f..0a7611648573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -32,10 +32,8 @@ #include "atom.h" struct amdgpu_vram_reservation { - u64 start; - u64 size; - struct list_head allocated; - struct list_head blocks; + struct list_head node; + struct drm_mm_node mm_node; }; static inline struct amdgpu_vram_mgr * @@ -188,18 +186,18 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = { }; /** - * amdgpu_vram_mgr_vis_size - Calculate visible block size + * amdgpu_vram_mgr_vis_size - Calculate visible node size * * @adev: amdgpu_device pointer - * @block: DRM BUDDY block structure + * @node: MM node structure * - * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM + * Calculate how many bytes of the MM node are inside visible VRAM */ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, - struct drm_buddy_block *block) + struct drm_mm_node *node) { - u64 start = amdgpu_vram_mgr_block_start(block); - u64 end = start + amdgpu_vram_mgr_block_size(block); + uint64_t start = node->start << PAGE_SHIFT; + uint64_t end = (node->size + node->start) << PAGE_SHIFT; if (start >= adev->gmc.visible_vram_size) return 0; @@ -220,9 +218,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_resource *res = bo->tbo.resource; - struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); - struct drm_buddy_block *block; - u64 usage = 0; + unsigned pages = res->num_pages; + struct drm_mm_node *mm; + u64 usage; if (amdgpu_gmc_vram_full_visible(&adev->gmc)) return amdgpu_bo_size(bo); @@ -230,8 +228,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) return 0; - list_for_each_entry(block, &vres->blocks, link) - usage += amdgpu_vram_mgr_vis_size(adev, block); + mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0]; + for (usage = 0; pages; pages -= mm->size, mm++) + usage += amdgpu_vram_mgr_vis_size(adev, mm); return usage; } @@ -241,30 +240,23 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_buddy *mm = &mgr->mm; + struct drm_mm *mm = &mgr->mm; struct amdgpu_vram_reservation *rsv, *temp; - struct drm_buddy_block *block; uint64_t vis_usage; - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) { - if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size, - rsv->size, mm->chunk_size, &rsv->allocated, - DRM_BUDDY_RANGE_ALLOCATION)) - continue; - - block = amdgpu_vram_mgr_first_block(&rsv->allocated); - if (!block) + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { + if (drm_mm_reserve_node(mm, &rsv->mm_node)) continue; dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n", - rsv->start, rsv->size); + rsv->mm_node.start, rsv->mm_node.size); - vis_usage = amdgpu_vram_mgr_vis_size(adev, block); + vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); atomic64_add(vis_usage, &mgr->vis_usage); spin_lock(&man->bdev->lru_lock); - man->usage += rsv->size; + man->usage += rsv->mm_node.size << PAGE_SHIFT; spin_unlock(&man->bdev->lru_lock); - list_move(&rsv->blocks, &mgr->reserved_pages); + list_move(&rsv->node, &mgr->reserved_pages); } } @@ -286,16 +278,14 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, if (!rsv) return -ENOMEM; - INIT_LIST_HEAD(&rsv->allocated); - INIT_LIST_HEAD(&rsv->blocks); + INIT_LIST_HEAD(&rsv->node); + rsv->mm_node.start = start >> PAGE_SHIFT; + rsv->mm_node.size = size >> PAGE_SHIFT; - rsv->start = start; - rsv->size = size; - - mutex_lock(&mgr->lock); - list_add_tail(&rsv->blocks, &mgr->reservations_pending); + spin_lock(&mgr->lock); + list_add_tail(&rsv->node, &mgr->reservations_pending); amdgpu_vram_mgr_do_reserve(&mgr->manager); - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); return 0; } @@ -317,19 +307,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, struct amdgpu_vram_reservation *rsv; int ret; - mutex_lock(&mgr->lock); + spin_lock(&mgr->lock); - list_for_each_entry(rsv, &mgr->reservations_pending, blocks) { - if (rsv->start <= start && - (start < (rsv->start + rsv->size))) { + list_for_each_entry(rsv, &mgr->reservations_pending, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { ret = -EBUSY; goto out; } } - list_for_each_entry(rsv, &mgr->reserved_pages, blocks) { - if (rsv->start <= start && - (start < (rsv->start + rsv->size))) { + list_for_each_entry(rsv, &mgr->reserved_pages, node) { + if ((rsv->mm_node.start <= start) && + (start < (rsv->mm_node.start + rsv->mm_node.size))) { ret = 0; goto out; } @@ -337,10 +327,32 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, ret = -ENOENT; out: - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); return ret; } +/** + * amdgpu_vram_mgr_virt_start - update virtual start address + * + * @mem: ttm_resource to update + * @node: just allocated node + * + * Calculate a virtual BO start address to easily check if everything is CPU + * accessible. + */ +static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, + struct drm_mm_node *node) +{ + unsigned long start; + + start = node->start + node->size; + if (start > mem->num_pages) + start -= mem->num_pages; + else + start = 0; + mem->start = max(mem->start, start); +} + /** * amdgpu_vram_mgr_new - allocate new ranges * @@ -356,44 +368,46 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource **res) { - u64 vis_usage = 0, max_bytes, cur_size, min_block_size; + unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct amdgpu_vram_mgr_resource *vres; - u64 size, remaining_size, lpfn, fpfn; - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; - unsigned long pages_per_block; + uint64_t vis_usage = 0, mem_bytes, max_bytes; + struct ttm_range_mgr_node *node; + struct drm_mm *mm = &mgr->mm; + enum drm_mm_insert_mode mode; + unsigned i; int r; - lpfn = place->lpfn << PAGE_SHIFT; + lpfn = place->lpfn; if (!lpfn) - lpfn = man->size; - - fpfn = place->fpfn << PAGE_SHIFT; + lpfn = man->size >> PAGE_SHIFT; max_bytes = adev->gmc.mc_vram_size; if (tbo->type != ttm_bo_type_kernel) max_bytes -= AMDGPU_VM_RESERVED_VRAM; + mem_bytes = tbo->base.size; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - pages_per_block = ~0ul; + pages_per_node = ~0ul; + num_nodes = 1; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - pages_per_block = HPAGE_PMD_NR; + pages_per_node = HPAGE_PMD_NR; #else /* default to 2MB */ - pages_per_block = 2UL << (20UL - PAGE_SHIFT); + pages_per_node = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_block = max_t(uint32_t, pages_per_block, - tbo->page_alignment); + pages_per_node = max_t(uint32_t, pages_per_node, + tbo->page_alignment); + num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node); } - vres = kzalloc(sizeof(*vres), GFP_KERNEL); - if (!vres) + node = kvmalloc(struct_size(node, mm_nodes, num_nodes), + GFP_KERNEL | __GFP_ZERO); + if (!node) return -ENOMEM; - ttm_resource_init(tbo, place, &vres->base); + ttm_resource_init(tbo, place, &node->base); /* bail out quickly if there's likely not enough VRAM for this BO */ if (ttm_resource_manager_usage(man) > max_bytes) { @@ -401,130 +415,66 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, goto error_fini; } - INIT_LIST_HEAD(&vres->blocks); - + mode = DRM_MM_INSERT_BEST; if (place->flags & TTM_PL_FLAG_TOPDOWN) - vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + mode = DRM_MM_INSERT_HIGH; - if (fpfn || lpfn != man->size) - /* Allocate blocks in desired range */ - vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + pages_left = node->base.num_pages; - remaining_size = vres->base.num_pages << PAGE_SHIFT; + /* Limit maximum size to 2GB due to SG table limitations */ + pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); - mutex_lock(&mgr->lock); - while (remaining_size) { - if (tbo->page_alignment) - min_block_size = tbo->page_alignment << PAGE_SHIFT; - else - min_block_size = mgr->default_page_size; + i = 0; + spin_lock(&mgr->lock); + while (pages_left) { + uint32_t alignment = tbo->page_alignment; - BUG_ON(min_block_size < mm->chunk_size); + if (pages >= pages_per_node) + alignment = pages_per_node; - /* Limit maximum size to 2GiB due to SG table limitations */ - size = min(remaining_size, 2ULL << 30); - - if (size >= pages_per_block << PAGE_SHIFT) - min_block_size = pages_per_block << PAGE_SHIFT; - - cur_size = size; - - if (fpfn + size != place->lpfn << PAGE_SHIFT) { - /* - * Except for actual range allocation, modify the size and - * min_block_size conforming to continuous flag enablement - */ - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - size = roundup_pow_of_two(size); - min_block_size = size; - /* - * Modify the size value if size is not - * aligned with min_block_size - */ - } else if (!IS_ALIGNED(size, min_block_size)) { - size = round_up(size, min_block_size); + r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages, + alignment, 0, place->fpfn, + lpfn, mode); + if (unlikely(r)) { + if (pages > pages_per_node) { + if (is_power_of_2(pages)) + pages = pages / 2; + else + pages = rounddown_pow_of_two(pages); + continue; } + goto error_free; } - r = drm_buddy_alloc_blocks(mm, fpfn, - lpfn, - size, - min_block_size, - &vres->blocks, - vres->flags); - if (unlikely(r)) - goto error_free_blocks; + vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]); + amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]); + pages_left -= pages; + ++i; - if (size > remaining_size) - remaining_size = 0; - else - remaining_size -= size; + if (pages > pages_left) + pages = pages_left; } - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); - if (cur_size != size) { - struct drm_buddy_block *block; - struct list_head *trim_list; - u64 original_size; - LIST_HEAD(temp); - - trim_list = &vres->blocks; - original_size = vres->base.num_pages << PAGE_SHIFT; - - /* - * If size value is rounded up to min_block_size, trim the last - * block to the required size - */ - if (!list_is_singular(&vres->blocks)) { - block = list_last_entry(&vres->blocks, typeof(*block), link); - list_move_tail(&block->link, &temp); - trim_list = &temp; - /* - * Compute the original_size value by subtracting the - * last block size with (aligned size - original size) - */ - original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size); - } - - mutex_lock(&mgr->lock); - drm_buddy_block_trim(mm, - original_size, - trim_list); - mutex_unlock(&mgr->lock); - - if (!list_empty(&temp)) - list_splice_tail(trim_list, &vres->blocks); - } - - list_for_each_entry(block, &vres->blocks, link) - vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - - block = amdgpu_vram_mgr_first_block(&vres->blocks); - if (!block) { - r = -EINVAL; - goto error_fini; - } - - vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT; - - if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks)) - vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + if (i == 1) + node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; if (adev->gmc.xgmi.connected_to_cpu) - vres->base.bus.caching = ttm_cached; + node->base.bus.caching = ttm_cached; else - vres->base.bus.caching = ttm_write_combined; + node->base.bus.caching = ttm_write_combined; atomic64_add(vis_usage, &mgr->vis_usage); - *res = &vres->base; + *res = &node->base; return 0; -error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks); - mutex_unlock(&mgr->lock); +error_free: + while (i--) + drm_mm_remove_node(&node->mm_nodes[i]); + spin_unlock(&mgr->lock); error_fini: - ttm_resource_fini(man, &vres->base); - kfree(vres); + ttm_resource_fini(man, &node->base); + kvfree(node); return r; } @@ -540,26 +490,27 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, struct ttm_resource *res) { - struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; uint64_t vis_usage = 0; + unsigned i, pages; - mutex_lock(&mgr->lock); - list_for_each_entry(block, &vres->blocks, link) - vis_usage += amdgpu_vram_mgr_vis_size(adev, block); + spin_lock(&mgr->lock); + for (i = 0, pages = res->num_pages; pages; + pages -= node->mm_nodes[i].size, ++i) { + struct drm_mm_node *mm = &node->mm_nodes[i]; + drm_mm_remove_node(mm); + vis_usage += amdgpu_vram_mgr_vis_size(adev, mm); + } amdgpu_vram_mgr_do_reserve(man); - - drm_buddy_free_list(mm, &vres->blocks); - mutex_unlock(&mgr->lock); + spin_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); ttm_resource_fini(man, res); - kfree(vres); + kvfree(node); } /** @@ -591,7 +542,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, if (!*sgt) return -ENOMEM; - /* Determine the number of DRM_BUDDY blocks to export */ + /* Determine the number of DRM_MM nodes to export */ amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; @@ -607,10 +558,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, sg->length = 0; /* - * Walk down DRM_BUDDY blocks to populate scatterlist nodes - * @note: Use iterator api to get first the DRM_BUDDY block + * Walk down DRM_MM nodes to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_MM node * and the number of bytes from it. Access the following - * DRM_BUDDY block(s) if more buffer needs to exported + * DRM_MM node(s) if more buffer needs to exported */ amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { @@ -697,22 +648,13 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); - struct drm_buddy *mm = &mgr->mm; - struct drm_buddy_block *block; drm_printf(printer, " vis usage:%llu\n", amdgpu_vram_mgr_vis_usage(mgr)); - mutex_lock(&mgr->lock); - drm_printf(printer, "default_page_size: %lluKiB\n", - mgr->default_page_size >> 10); - - drm_buddy_print(mm, printer); - - drm_printf(printer, "reserved:\n"); - list_for_each_entry(block, &mgr->reserved_pages, link) - drm_buddy_block_print(mm, block, printer); - mutex_unlock(&mgr->lock); + spin_lock(&mgr->lock); + drm_mm_print(&mgr->mm, printer); + spin_unlock(&mgr->lock); } static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { @@ -732,21 +674,16 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) { struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; - int err; ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); man->func = &amdgpu_vram_mgr_func; - err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); - if (err) - return err; - - mutex_init(&mgr->lock); + drm_mm_init(&mgr->mm, 0, man->size >> PAGE_SHIFT); + spin_lock_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); - mgr->default_page_size = PAGE_SIZE; ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_resource_manager_set_used(man, true); @@ -774,16 +711,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) if (ret) return; - mutex_lock(&mgr->lock); - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) + spin_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) kfree(rsv); - list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->blocks); + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { + drm_mm_remove_node(&rsv->mm_node); kfree(rsv); } - drm_buddy_fini(&mgr->mm); - mutex_unlock(&mgr->lock); + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h deleted file mode 100644 index 9a2db87186c7..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ /dev/null @@ -1,89 +0,0 @@ -/* SPDX-License-Identifier: MIT - * Copyright 2021 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __AMDGPU_VRAM_MGR_H__ -#define __AMDGPU_VRAM_MGR_H__ - -#include - -struct amdgpu_vram_mgr { - struct ttm_resource_manager manager; - struct drm_buddy mm; - /* protects access to buffer objects */ - struct mutex lock; - struct list_head reservations_pending; - struct list_head reserved_pages; - atomic64_t vis_usage; - u64 default_page_size; -}; - -struct amdgpu_vram_mgr_resource { - struct ttm_resource base; - struct list_head blocks; - unsigned long flags; -}; - -static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) -{ - return drm_buddy_block_offset(block); -} - -static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) -{ - return PAGE_SIZE << drm_buddy_block_order(block); -} - -static inline struct drm_buddy_block * -amdgpu_vram_mgr_first_block(struct list_head *list) -{ - return list_first_entry_or_null(list, struct drm_buddy_block, link); -} - -static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) -{ - struct drm_buddy_block *block; - u64 start, size; - - block = amdgpu_vram_mgr_first_block(head); - if (!block) - return false; - - while (head != block->link.next) { - start = amdgpu_vram_mgr_block_start(block); - size = amdgpu_vram_mgr_block_size(block); - - block = list_entry(block->link.next, struct drm_buddy_block, link); - if (start + size != amdgpu_vram_mgr_block_start(block)) - return false; - } - - return true; -} - -static inline struct amdgpu_vram_mgr_resource * -to_amdgpu_vram_mgr_resource(struct ttm_resource *res) -{ - return container_of(res, struct amdgpu_vram_mgr_resource, base); -} - -#endif From e87197fbd137c888fd6c871c72fe7e89445dd015 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Jul 2022 11:41:06 +0300 Subject: [PATCH 02/17] drm/i915/gvt: IS_ERR() vs NULL bug in intel_gvt_update_reg_whitelist() The shmem_pin_map() function returns NULL, it doesn't return error pointers. Fixes: 97ea656521c8 ("drm/i915/gvt: Parse default state to update reg whitelist") Reviewed-by: Andrzej Hajda Signed-off-by: Dan Carpenter Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/Ysftoia2BPUyqVcD@kili Acked-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index c4118b808268..11971ee929f8 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -3115,9 +3115,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu) continue; vaddr = shmem_pin_map(engine->default_state); - if (IS_ERR(vaddr)) { - gvt_err("failed to map %s->default state, err:%zd\n", - engine->name, PTR_ERR(vaddr)); + if (!vaddr) { + gvt_err("failed to map %s->default state\n", + engine->name); return; } From 046cd8a2a9eec7c2b46b03958a2b6252ddff55b2 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Fri, 24 Jun 2022 06:04:06 -0700 Subject: [PATCH 03/17] drm/i915: fix a possible refcount leak in intel_dp_add_mst_connector() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If drm_connector_init fails, intel_connector_free will be called to take care of proper free. So it is necessary to drop the refcount of port before intel_connector_free. Fixes: 091a4f91942a ("drm/i915: Handle drm-layer errors in intel_dp_add_mst_connector") Signed-off-by: Hangyu Hua Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20220624130406.17996-1-jose.souza@intel.com Signed-off-by: José Roberto de Souza (cherry picked from commit cea9ed611e85d36a05db52b6457bf584b7d969e2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 061b277e5ce7..14d2a64193b2 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -839,6 +839,7 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs, DRM_MODE_CONNECTOR_DisplayPort); if (ret) { + drm_dp_mst_put_port_malloc(port); intel_connector_free(intel_connector); return NULL; } From 1391b9cfd35bb8f10785a17cb4bb5ea8d10faaae Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 21 Jun 2022 16:30:05 -0700 Subject: [PATCH 04/17] drm/i915/guc: ADL-N should use the same GuC FW as ADL-S The only difference between the ADL S and P GuC FWs is the HWConfig support. ADL-N does not support HWConfig, so we should use the same binary as ADL-S, otherwise the GuC might attempt to fetch a config table that does not exist. ADL-N is internally identified as an ADL-P, so we need to special-case it in the FW selection code. Fixes: 7e28d0b26759 ("drm/i915/adl-n: Enable ADL-N platform") Cc: John Harrison Cc: Tejas Upadhyay Cc: Anusha Srivatsa Cc: Jani Nikula Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220621233005.3952293-1-daniele.ceraolospurio@intel.com (cherry picked from commit 971e4a9781742aaad1587e25fd5582b2dd595ef8) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index f0d7b57b741e..2ff55b9994bc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -162,6 +162,15 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) u8 rev = INTEL_REVID(i915); int i; + /* + * The only difference between the ADL GuC FWs is the HWConfig support. + * ADL-N does not support HWConfig, so we should use the same binary as + * ADL-S, otherwise the GuC might attempt to fetch a config table that + * does not exist. + */ + if (IS_ADLP_N(i915)) + p = INTEL_ALDERLAKE_S; + GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); fw_blobs = blobs_all[uc_fw->type].blobs; fw_count = blobs_all[uc_fw->type].count; From 48da0f67c53eecd2594c302be6c8a665b7740eaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 20 Jun 2022 14:36:59 +0200 Subject: [PATCH 05/17] drm/i915: Fix vm use-after-free in vma destruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In vma destruction, the following race may occur: Thread 1: Thread 2: i915_vma_destroy(); ... list_del_init(vma->vm_link); ... mutex_unlock(vma->vm->mutex); __i915_vm_release(); release_references(); And in release_reference() we dereference vma->vm to get to the vm gt pointer, leading to a use-after free. However, __i915_vm_release() grabs the vm->mutex so the vm won't be destroyed before vma->vm->mutex is released, so extract the gt pointer under the vm->mutex to avoid the vma->vm dereference in release_references(). v2: Fix a typo in the commit message (Andi Shyti) Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5944 Fixes: e1a7ab4fca0c ("drm/i915: Remove the vm open count") Cc: Niranjana Vishwanathapura Cc: Matthew Auld Signed-off-by: Thomas Hellström Acked-by: Nirmoy Das Reviewed-by: Andrzej Hajda Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220620123659.381772-1-thomas.hellstrom@linux.intel.com (cherry picked from commit 1926a6b75954fc1a8b44d10bd0c67db957b78cf7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0bffb70b3c5f..04d12f278f57 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1637,10 +1637,10 @@ static void force_unbind(struct i915_vma *vma) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); } -static void release_references(struct i915_vma *vma, bool vm_ddestroy) +static void release_references(struct i915_vma *vma, struct intel_gt *gt, + bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj; - struct intel_gt *gt = vma->vm->gt; GEM_BUG_ON(i915_vma_is_active(vma)); @@ -1695,11 +1695,12 @@ void i915_vma_destroy_locked(struct i915_vma *vma) force_unbind(vma); list_del_init(&vma->vm_link); - release_references(vma, false); + release_references(vma, vma->vm->gt, false); } void i915_vma_destroy(struct i915_vma *vma) { + struct intel_gt *gt; bool vm_ddestroy; mutex_lock(&vma->vm->mutex); @@ -1707,8 +1708,11 @@ void i915_vma_destroy(struct i915_vma *vma) list_del_init(&vma->vm_link); vm_ddestroy = vma->vm_ddestroy; vma->vm_ddestroy = false; + + /* vma->vm may be freed when releasing vma->vm->mutex. */ + gt = vma->vm->gt; mutex_unlock(&vma->vm->mutex); - release_references(vma, vm_ddestroy); + release_references(vma, gt, vm_ddestroy); } void i915_vma_parked(struct intel_gt *gt) From 896dcabd1f8f613c533d948df17408c41f8929f5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 8 Jul 2022 12:41:04 +0300 Subject: [PATCH 06/17] drm/i915/selftests: fix a couple IS_ERR() vs NULL tests The shmem_pin_map() function doesn't return error pointers, it returns NULL. Fixes: be1cb55a07bf ("drm/i915/gt: Keep a no-frills swappable copy of the default context state") Signed-off-by: Dan Carpenter Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220708094104.GL2316@kadam (cherry picked from commit d50f5a109cf4ed50c5b575c1bb5fc3bd17b23308) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/selftest_lrc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8b2c11dbe354..1109088fe8f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -176,8 +176,8 @@ static int live_lrc_layout(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); @@ -365,8 +365,8 @@ static int live_lrc_fixed(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); From aff1e0b09b54b64944b7fe32997229552737b9e9 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 11 Jul 2022 09:58:59 +0100 Subject: [PATCH 07/17] drm/i915/ttm: fix sg_table construction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we encounter some monster sized local-memory page that exceeds the maximum sg length (UINT32_MAX), ensure that don't end up with some misaligned address in the entry that follows, leading to fireworks later. Also ensure we have some coverage of this in the selftests. v2(Chris): - Use round_down consistently to avoid udiv errors v3(Nirmoy): - Also update the max_segment in the selftest Fixes: f701b16d4cc5 ("drm/i915/ttm: add i915_sg_from_buddy_resource") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6379 Signed-off-by: Matthew Auld Cc: Thomas Hellström Cc: Nirmoy Das Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20220711085859.24198-1-matthew.auld@intel.com (cherry picked from commit bc99f1209f19fefa3ee11e77464ccfae541f4291) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 11 ++++++++-- drivers/gpu/drm/i915/i915_scatterlist.c | 19 +++++++++++++---- drivers/gpu/drm/i915/i915_scatterlist.h | 6 ++++-- drivers/gpu/drm/i915/intel_region_ttm.c | 10 ++++++--- drivers/gpu/drm/i915/intel_region_ttm.h | 3 ++- .../drm/i915/selftests/intel_memory_region.c | 21 +++++++++++++++++-- drivers/gpu/drm/i915/selftests/mock_region.c | 3 ++- 7 files changed, 58 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..d30ebcaec8b9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -620,10 +620,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + u64 page_alignment; if (!i915_ttm_gtt_binds_lmem(res)) return i915_ttm_tt_get_st(bo->ttm); + page_alignment = bo->page_alignment << PAGE_SHIFT; + if (!page_alignment) + page_alignment = obj->mm.region->min_page_size; + /* * If CPU mapping differs, we need to add the ttm_tt pages to * the resulting st. Might make sense for GGTT. @@ -634,7 +639,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct i915_refct_sgt *rsgt; rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - res); + res, + page_alignment); if (IS_ERR(rsgt)) return rsgt; @@ -643,7 +649,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, return i915_refct_sgt_get(obj->ttm.cached_io_rsgt); } - return intel_region_ttm_resource_to_rsgt(obj->mm.region, res); + return intel_region_ttm_resource_to_rsgt(obj->mm.region, res, + page_alignment); } static int i915_ttm_truncate(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 159571b9bd24..f63b50b71e10 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -68,6 +68,7 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * drm_mm_node * @node: The drm_mm_node. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from a struct drm_mm_node, * taking a maximum segment length into account, splitting into segments @@ -77,15 +78,18 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start) + u64 region_start, + u64 page_alignment) { - const u64 max_segment = SZ_1G; /* Do we have a limit on this? */ + const u64 max_segment = round_down(UINT_MAX, page_alignment); u64 segment_pages = max_segment >> PAGE_SHIFT; u64 block_size, offset, prev_end; struct i915_refct_sgt *rsgt; struct sg_table *st; struct scatterlist *sg; + GEM_BUG_ON(!max_segment); + rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) return ERR_PTR(-ENOMEM); @@ -112,6 +116,8 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; @@ -138,6 +144,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * i915_buddy_block list * @res: The struct i915_ttm_buddy_resource. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from struct i915_buddy_block list, * taking a maximum segment length into account, splitting into segments @@ -147,11 +154,12 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start) + u64 region_start, + u64 page_alignment) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); const u64 size = res->num_pages << PAGE_SHIFT; - const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE); + const u64 max_segment = round_down(UINT_MAX, page_alignment); struct drm_buddy *mm = bman_res->mm; struct list_head *blocks = &bman_res->blocks; struct drm_buddy_block *block; @@ -161,6 +169,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, resource_size_t prev_end; GEM_BUG_ON(list_empty(blocks)); + GEM_BUG_ON(!max_segment); rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) @@ -191,6 +200,8 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 12c6a1684081..b13e4cdea923 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -213,9 +213,11 @@ static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt, void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size); struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start); + u64 region_start, + u64 page_alignment); struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start); + u64 region_start, + u64 page_alignment); #endif diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..6873808a7015 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -152,6 +152,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) * Convert an opaque TTM resource manager resource to a refcounted sg_table. * @mem: The memory region. * @res: The resource manager resource obtained from the TTM resource manager. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * The gem backends typically use sg-tables for operations on the underlying * io_memory. So provide a way for the backends to translate the @@ -161,16 +162,19 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) */ struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res) + struct ttm_resource *res, + u64 page_alignment) { if (mem->is_range_manager) { struct ttm_range_mgr_node *range_node = to_ttm_range_mgr_node(res); return i915_rsgt_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + mem->region.start, + page_alignment); } else { - return i915_rsgt_from_buddy_resource(res, mem->region.start); + return i915_rsgt_from_buddy_resource(res, mem->region.start, + page_alignment); } } diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index cf9d86dcf409..98fba5155619 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -24,7 +24,8 @@ int intel_region_ttm_fini(struct intel_memory_region *mem); struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res); + struct ttm_resource *res, + u64 page_alignment); void intel_region_ttm_resource_free(struct intel_memory_region *mem, struct ttm_resource *res); diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 73eb53edb8de..3b18e5905c86 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -451,7 +451,6 @@ static int igt_mock_splintered_region(void *arg) static int igt_mock_max_segment(void *arg) { - const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; struct i915_ttm_buddy_resource *res; @@ -460,7 +459,10 @@ static int igt_mock_max_segment(void *arg) struct drm_buddy *mm; struct list_head *blocks; struct scatterlist *sg; + I915_RND_STATE(prng); LIST_HEAD(objects); + unsigned int max_segment; + unsigned int ps; u64 size; int err = 0; @@ -472,7 +474,13 @@ static int igt_mock_max_segment(void *arg) */ size = SZ_8G; - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); + ps = PAGE_SIZE; + if (i915_prandom_u64_state(&prng) & 1) + ps = SZ_64K; /* For something like DG2 */ + + max_segment = round_down(UINT_MAX, ps); + + mem = mock_region_create(i915, 0, size, ps, 0, 0); if (IS_ERR(mem)) return PTR_ERR(mem); @@ -498,12 +506,21 @@ static int igt_mock_max_segment(void *arg) } for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + dma_addr_t daddr = sg_dma_address(sg); + if (sg->length > max_segment) { pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", __func__, sg->length, max_segment); err = -EINVAL; goto out_close; } + + if (!IS_ALIGNED(daddr, ps)) { + pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", + __func__, &daddr, ps); + err = -EINVAL; + goto out_close; + } } out_close: diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 670557ce1024..bac21fe84ca5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -33,7 +33,8 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) return PTR_ERR(obj->mm.res); obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - obj->mm.res); + obj->mm.res, + obj->mm.region->min_page_size); if (IS_ERR(obj->mm.rsgt)) { err = PTR_ERR(obj->mm.rsgt); goto err_free_resource; From b24dcf1dc507f69ed3b5c66c2b6a0209ae80d4d4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jul 2022 16:21:32 +0100 Subject: [PATCH 08/17] drm/i915/gt: Serialize GRDOM access between multiple engine resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't allow two engines to be reset in parallel, as they would both try to select a reset bit (and send requests to common registers) and wait on that register, at the same time. Serialize control of the reset requests/acks using the uncore->lock, which will also ensure that no other GT state changes at the same time as the actual reset. Cc: stable@vger.kernel.org # v4.4 and upper Reported-by: Mika Kuoppala Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Acked-by: Thomas Hellström Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7dbac3011729.1657639152.git.mchehab@kernel.org (cherry picked from commit 336561a914fc0c6f1218228718f633b31b7af1c3) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_reset.c | 37 ++++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index a5338c3fde7a..c68d36fb5bbd 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) return err; } -static int gen6_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; u32 hw_mask; @@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } +static int gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(>->uncore->lock, flags); + ret = __gen6_reset_engines(gt, engine_mask, retry); + spin_unlock_irqrestore(>->uncore->lock, flags); + + return ret; +} + static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) { int vecs_id; @@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } -static int gen11_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen11_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt, struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; intel_engine_mask_t tmp; + unsigned long flags; int ret; + spin_lock_irqsave(>->uncore->lock, flags); + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) @@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt, * This is best effort, so ignore any error from the initial reset. */ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) - gen11_reset_engines(gt, gt->info.engine_mask, 0); + __gen11_reset_engines(gt, gt->info.engine_mask, 0); if (GRAPHICS_VER(gt->i915) >= 11) - ret = gen11_reset_engines(gt, engine_mask, retry); + ret = __gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(gt, engine_mask, retry); + ret = __gen6_reset_engines(gt, engine_mask, retry); skip_reset: for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); + spin_unlock_irqrestore(>->uncore->lock, flags); + return ret; } From a1c5a7bf79c1faa5633b918b5c0666545e84c4d1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 12 Jul 2022 16:21:33 +0100 Subject: [PATCH 09/17] drm/i915/gt: Serialize TLB invalidates with GT resets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid trying to invalidate the TLB in the middle of performing an engine reset, as this may result in the reset timing out. Currently, the TLB invalidate is only serialised by its own mutex, forgoing the uncore lock, but we can take the uncore->lock as well to serialise the mmio access, thereby serialising with the GDRST. Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with i915 selftest/hangcheck. Cc: stable@vger.kernel.org # v4.4 and upper Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") Reported-by: Mauro Carvalho Chehab Tested-by: Mauro Carvalho Chehab Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Andi Shyti Acked-by: Thomas Hellström Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721ac6ea86c0677.1657639152.git.mchehab@kernel.org (cherry picked from commit 33da97894758737895e90c909f16786052680ef4) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/intel_gt.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 51a0fe60c050..531af6ad7007 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -1209,6 +1209,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + for_each_engine(engine, gt, id) { /* * HW architecture suggest typical invalidation time at 40us, @@ -1223,7 +1237,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, From ad765fae792e16ce3c1d0b69ce939e3f7dba40ab Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 Jul 2022 16:20:11 +0200 Subject: [PATCH 10/17] drm/i915/gem: Look for waitboosting across the whole object prior to individual waits We employ a "waitboost" heuristic to detect when userspace is stalled waiting for results from earlier execution. Under latency sensitive work mixed between the gpu/cpu, the GPU is typically under-utilised and so RPS sees that low utilisation as a reason to downclock the frequency, causing longer stalls and lower throughput. The user left waiting for the results is not impressed. On applying commit 047a1b877ed4 ("dma-buf & drm/amdgpu: remove dma_resv workaround") it was observed that deinterlacing h264 on Haswell performance dropped by 2-5x. The reason being that the natural workload was not intense enough to trigger RPS (using HW evaluation intervals) to upclock, and so it was depending on waitboosting for the throughput. Commit 047a1b877ed4 ("dma-buf & drm/amdgpu: remove dma_resv workaround") changes the composition of dma-resv from keeping a single write fence + multiple read fences, to a single array of multiple write and read fences (a maximum of one pair of write/read fences per context). The iteration order was also changed implicitly from all-read fences then the single write fence, to a mix of write fences followed by read fences. It is that ordering change that belied the fragility of waitboosting. Currently, a waitboost is inspected at the point of waiting on an outstanding fence. If the GPU is backlogged such that we haven't yet stated the request we need to wait on, we force the GPU to upclock until the completion of that request. By changing the order in which we waited upon requests, we ended up waiting on those requests in sequence and as such we saw that each request was already started and so not a suitable candidate for waitboosting. Instead of asking whether to boost each fence in turn, we can look at whether boosting is required for the dma-resv ensemble prior to waiting on any fence, making the heuristic more robust to the order in which fences are stored in the dma-resv. Reported-by: Thomas Voegtle Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6284 Fixes: 047a1b877ed4 ("dma-buf & drm/amdgpu: remove dma_resv workaround") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Karolina Drobnik Tested-by: Thomas Voegtle Reviewed-by: Andi Shyti Acked-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/07e05518d9f6620d20cc1101ec1849203fe973f9.1657289332.git.karolina.drobnik@intel.com (cherry picked from commit 394e2b57a989113de494c52d4683444bcb02d4e1) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_wait.c | 34 ++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 319936f91ac5..e6e01c2a74a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -9,6 +9,7 @@ #include #include "gt/intel_engine.h" +#include "gt/intel_rps.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -31,6 +32,37 @@ i915_gem_object_wait_fence(struct dma_fence *fence, timeout); } +static void +i915_gem_object_boost(struct dma_resv *resv, unsigned int flags) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + + /* + * Prescan all fences for potential boosting before we begin waiting. + * + * When we wait, we wait on outstanding fences serially. If the + * dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced + * form 1:2, then as we look at each wait in turn we see that each + * request is currently executing and not worthy of boosting. But if + * we only happen to look at the final fence in the sequence (because + * of request coalescing or splitting between read/write arrays by + * the iterator), then we would boost. As such our decision to boost + * or not is delicately balanced on the order we wait on fences. + * + * So instead of looking for boosts sequentially, look for all boosts + * upfront and then wait on the outstanding fences. + */ + + dma_resv_iter_begin(&cursor, resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); + dma_resv_for_each_fence_unlocked(&cursor, fence) + if (dma_fence_is_i915(fence) && + !i915_request_started(to_request(fence))) + intel_rps_boost(to_request(fence)); + dma_resv_iter_end(&cursor); +} + static long i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, @@ -40,6 +72,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, struct dma_fence *fence; long ret = timeout ?: 1; + i915_gem_object_boost(resv, flags); + dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) { From 333991c4e66b3d4b5613315f18016da80344f659 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 24 Jun 2022 13:35:28 +0200 Subject: [PATCH 11/17] drm/i915/selftests: fix subtraction overflow bug On some machines hole_end can be small enough to cause subtraction overflow. On the other side (addr + 2 * min_alignment) can overflow in case of mock tests. This patch should handle both cases. Fixes: e1c5f754067b59 ("drm/i915: Avoid overflow in computing pot_hole loop termination") Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3674 Signed-off-by: Andrzej Hajda Reviewed-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20220624113528.2159210-1-andrzej.hajda@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit ab3edc679c552a466e4bf0b11af3666008bd65a2) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 8633bec18fa7..ab9f17fc85bc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -742,7 +742,7 @@ static int pot_hole(struct i915_address_space *vm, u64 addr; for (addr = round_up(hole_start + min_alignment, step) - min_alignment; - addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment; + hole_end > addr && hole_end - addr >= 2 * min_alignment; addr += step) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { From acea108fa067d140bd155161a79b1fcd967f4137 Mon Sep 17 00:00:00 2001 From: Fangzhi Zuo Date: Wed, 6 Jul 2022 15:52:46 -0400 Subject: [PATCH 12/17] drm/amd/display: Ignore First MST Sideband Message Return Error [why] First MST sideband message returns AUX_RET_ERROR_HPD_DISCON on certain intel platform. Aux transaction considered failure if HPD unexpected pulled low. The actual aux transaction success in such case, hence do not return error. [how] Not returning error when AUX_RET_ERROR_HPD_DISCON detected on the first sideband message. v2: squash in additional DMI entries v3: squash in static fix Signed-off-by: Fangzhi Zuo Acked-by: Solomon Chiu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 39 +++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 8 ++++ .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 17 ++++++++ 3 files changed, 64 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 1c2984bbda51..ff9820498cce 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include @@ -1382,6 +1383,41 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } +static const struct dmi_system_id hpd_disconnect_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), + }, + }, + {} +}; + +static void retrieve_dmi_info(struct amdgpu_display_manager *dm) +{ + const struct dmi_system_id *dmi_id; + + dm->aux_hpd_discon_quirk = false; + + dmi_id = dmi_first_match(hpd_disconnect_quirk_table); + if (dmi_id) { + dm->aux_hpd_discon_quirk = true; + DRM_INFO("aux_hpd_discon_quirk attached\n"); + } +} + static int amdgpu_dm_init(struct amdgpu_device *adev) { struct dc_init_data init_data; @@ -1508,6 +1544,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } INIT_LIST_HEAD(&adev->dm.da_list); + + retrieve_dmi_info(&adev->dm); + /* Display Core create. */ adev->dm.dc = dc_create(&init_data); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index aa34c0068f41..e80ef93f6550 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -540,6 +540,14 @@ struct amdgpu_display_manager { * last successfully applied backlight values. */ u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; + + /** + * @aux_hpd_discon_quirk: + * + * quirk for hpd discon while aux is on-going. + * occurred on certain intel platform + */ + bool aux_hpd_discon_quirk; }; enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 9221b6690a4a..2b9b095e5f03 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -56,6 +56,8 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, ssize_t result = 0; struct aux_payload payload; enum aux_return_code_type operation_result; + struct amdgpu_device *adev; + struct ddc_service *ddc; if (WARN_ON(msg->size > 16)) return -E2BIG; @@ -74,6 +76,21 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload, &operation_result); + /* + * w/a on certain intel platform where hpd is unexpected to pull low during + * 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON + * aux transaction is succuess in such case, therefore bypass the error + */ + ddc = TO_DM_AUX(aux)->ddc_service; + adev = ddc->ctx->driver_context; + if (adev->dm.aux_hpd_discon_quirk) { + if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE && + operation_result == AUX_RET_ERROR_HPD_DISCON) { + result = 0; + operation_result = AUX_RET_SUCCESS; + } + } + if (payload.write && result >= 0) result = msg->size; From c0044865480a162146b9dfe7783e73a08e97b2b9 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 11 Jul 2022 16:03:08 +0800 Subject: [PATCH 13/17] drm/amdkfd: correct the MEC atomic support firmware checking for GC 10.3.7 On the GC 10.3.7 platform the initial MEC release version #3 can support atomic operation,so need correct and set its MEC atomic support version to #3. Signed-off-by: Prike Liang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 5.18.x --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index bf4200457772..a08769c5e94b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -184,6 +184,8 @@ static void kfd_device_info_init(struct kfd_dev *kfd, /* Navi2x+, Navi1x+ */ if (gc_version == IP_VERSION(10, 3, 6)) kfd->device_info.no_atomic_fw_version = 14; + else if (gc_version == IP_VERSION(10, 3, 7)) + kfd->device_info.no_atomic_fw_version = 3; else if (gc_version >= IP_VERSION(10, 3, 0)) kfd->device_info.no_atomic_fw_version = 92; else if (gc_version >= IP_VERSION(10, 1, 1)) From add61d3c31de6a4b5e11a2ab96aaf4c873481568 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Mon, 11 Jul 2022 19:39:28 +0200 Subject: [PATCH 14/17] drm/amd/display: Only use depth 36 bpp linebuffers on DCN display engines. Various DCE versions had trouble with 36 bpp lb depth, requiring fixes, last time in commit 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display on CIK GPUs") for DCE-8. So far >= DCE-11.2 was considered ok, but now I found out that on DCE-11.2 it causes dithering when there shouldn't be any, so identity pixel passthrough with identity gamma LUTs doesn't work when it should. This breaks various important neuroscience applications, as reported to me by scientific users of Polaris cards under Ubuntu 22.04 with Linux 5.15, and confirmed by testing it myself on DCE-11.2. Lets only use depth 36 for DCN engines, where my testing showed that it is both necessary for high color precision output, e.g., RGBA16 fb's, and not harmful, as far as more than one year in real-world use showed. DCE engines seem to work fine for high precision output at 30 bpp, so this ("famous last words") depth 30 should hopefully fix all known problems without introducing new ones. Successfully retested on DCE-11.2 Polaris and DCN-1.0 Raven Ridge on top of Linux 5.19.0-rc2 + drm-next. Fixes: 353ca0fa5630 ("drm/amd/display: Fix 10bit 4K display on CIK GPUs") Signed-off-by: Mario Kleiner Tested-by: Mario Kleiner Cc: stable@vger.kernel.org # 5.14.0 Cc: Alex Deucher Cc: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 6774dd8bb53e..3fe3fbac1e63 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1117,12 +1117,13 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) * on certain displays, such as the Sharp 4k. 36bpp is needed * to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and * SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc - * precision on at least DCN display engines. However, at least - * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, - * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3 - * did not show such problems, so this seems to be the exception. + * precision on DCN display engines, but apparently not for DCE, as + * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have + * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth, + * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel + * passthrough). Therefore only use 36 bpp on DCN where it is actually needed. */ - if (plane_state->ctx->dce_version > DCE_VERSION_11_0) + if (plane_state->ctx->dce_version > DCE_VERSION_MAX) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP; else pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; From 0638c98c17aa12fe914459c82cd178247e21fb2b Mon Sep 17 00:00:00 2001 From: Yefim Barashkin Date: Mon, 11 Jul 2022 14:35:11 -0800 Subject: [PATCH 15/17] drm/amd/pm: Prevent divide by zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit divide error: 0000 [#1] SMP PTI CPU: 3 PID: 78925 Comm: tee Not tainted 5.15.50-1-lts #1 Hardware name: MSI MS-7A59/Z270 SLI PLUS (MS-7A59), BIOS 1.90 01/30/2018 RIP: 0010:smu_v11_0_set_fan_speed_rpm+0x11/0x110 [amdgpu] Speed is user-configurable through a file. I accidentally set it to zero, and the driver crashed. Reviewed-by: Evan Quan Reviewed-by: André Almeida Signed-off-by: Yefim Barashkin Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 5f8809f6990d..2fbd2926a531 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -1228,6 +1228,8 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t crystal_clock_freq = 2500; uint32_t tach_period; + if (speed == 0) + return -EINVAL; /* * To prevent from possible overheat, some ASICs may have requirement * for minimum fan speed: From 47053b1e7382628dd30415685ae257f766a311e4 Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Tue, 12 Jul 2022 10:32:39 -0100 Subject: [PATCH 16/17] drm/amd/display: correct check of coverage blend mode Check the value of per_pixel_alpha to decide whether the Coverage pixel blend mode is applicable or not. Fixes: 76818cdd11a2 ("drm/amd/display: add Coverage blend mode for overlay plane") Reported-by: kernel test robot Reported-by: Dan Carpenter Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ff9820498cce..d2e628358429 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5446,7 +5446,7 @@ fill_blending_from_plane_state(const struct drm_plane_state *plane_state, } } - if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) + if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE) *pre_multiplied_alpha = false; } From 3283c83eb6fcfbda8ea03d7149d8e42e71c5d45e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 11 Jul 2022 16:51:31 +0200 Subject: [PATCH 17/17] drm/amd/display: Ensure valid event timestamp for cursor-only commits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Requires enabling the vblank machinery for them. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2030 Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d2e628358429..93ac33a8de9a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -463,6 +463,26 @@ static void dm_pflip_high_irq(void *interrupt_params) vrr_active, (int) !e); } +static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc) +{ + struct drm_crtc *crtc = &acrtc->base; + struct drm_device *dev = crtc->dev; + unsigned long flags; + + drm_crtc_handle_vblank(crtc); + + spin_lock_irqsave(&dev->event_lock, flags); + + /* Send completion event for cursor-only commits */ + if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { + drm_crtc_send_vblank_event(crtc, acrtc->event); + drm_crtc_vblank_put(crtc); + acrtc->event = NULL; + } + + spin_unlock_irqrestore(&dev->event_lock, flags); +} + static void dm_vupdate_high_irq(void *interrupt_params) { struct common_irq_params *irq_params = interrupt_params; @@ -501,7 +521,7 @@ static void dm_vupdate_high_irq(void *interrupt_params) * if a pageflip happened inside front-porch. */ if (vrr_active) { - drm_crtc_handle_vblank(&acrtc->base); + dm_crtc_handle_vblank(acrtc); /* BTR processing for pre-DCE12 ASICs */ if (acrtc->dm_irq_params.stream && @@ -553,7 +573,7 @@ static void dm_crtc_high_irq(void *interrupt_params) * to dm_vupdate_high_irq after end of front-porch. */ if (!vrr_active) - drm_crtc_handle_vblank(&acrtc->base); + dm_crtc_handle_vblank(acrtc); /** * Following stuff must happen at start of vblank, for crc @@ -9174,6 +9194,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct amdgpu_bo *abo; uint32_t target_vblank, last_flip_vblank; bool vrr_active = amdgpu_dm_vrr_active(acrtc_state); + bool cursor_update = false; bool pflip_present = false; struct { struct dc_surface_update surface_updates[MAX_SURFACES]; @@ -9209,8 +9230,13 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state); /* Cursor plane is handled after stream updates */ - if (plane->type == DRM_PLANE_TYPE_CURSOR) + if (plane->type == DRM_PLANE_TYPE_CURSOR) { + if ((fb && crtc == pcrtc) || + (old_plane_state->fb && old_plane_state->crtc == pcrtc)) + cursor_update = true; + continue; + } if (!fb || !crtc || pcrtc != crtc) continue; @@ -9373,6 +9399,17 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->stream_update.vrr_infopacket = &acrtc_state->stream->vrr_infopacket; } + } else if (cursor_update && acrtc_state->active_planes > 0 && + !acrtc_state->force_dpms_off && + acrtc_attach->base.state->event) { + drm_crtc_vblank_get(pcrtc); + + spin_lock_irqsave(&pcrtc->dev->event_lock, flags); + + acrtc_attach->event = acrtc_attach->base.state->event; + acrtc_attach->base.state->event = NULL; + + spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags); } /* Update the planes if changed or disable if we don't have any. */