Merge tag 'drm-xe-next-2026-03-25' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-next

Hi Dave and Sima,

Here goes our third, perhaps, final drm-xe-next PR towards 7.1.

In the big things we have:
- THP support in drm_pagemap
- xe_vm_get_property_ioctl

Thanks,
Matt

UAPI Changes:
- Implement xe_vm_get_property_ioctl (Jonathan)

Cross-subsystem Changes:
- Enable THP support in drm_pagemap (Francois, Brost)

Core Changes:
- Improve VF FLR synchronization for Xe VFIO (Piotr)

Driver Changes:
- Fix confusion with locals on context creation (Tomasz, Fixes)
- Add new SVM copy GT stats per size (Francois)
- always keep track of remap prev/next (Auld, Fixes)
- AuxCCS handling and render compression modifiers (Tvrtko)
- Implement recent spec updates to Wa_16025250150 (Roper)
- xe3p_lpg: L2 flush optimization (Tejas)
- vf: Improve getting clean NULL context (Wajdeczko)
- pf: Fix use-after-free in migration restore (Winiarski. Fixes)
- Fix format specifier for printing pointer differences (Nathan Chancellor, Fixes)
- Extend Wa_14026781792 for xe3lpg (Niton)
- xe3p_lpg: Add Wa_16029437861 (Varun)
- Fix spelling mistakes and comment style in ttm_resource.c (Varun)
- Merge drm/drm-next into drm-xe-next (Thomas)
- Fix missing runtime PM reference in ccs_mode_store (Sanjay, Fixes)
- Fix uninitialized new_ts when capturing context timestamp (Umesh)
- Allow reading after disabling OA stream (Ashutosh)
- Page Reclamation Fixes (Brian Nguyen, Fixes)
- Include running dword offset in default_lrc dumps (Roper)
- Assert/Deassert I2C IRQ (Raag)
- Fixup reset, wedge, unload corner cases (Zhanjun, Brost)
- Fail immediately on GuC load error (Daniele)
- Fix kernel-doc for DRM_XE_VM_BIND_FLAG_DECOMPRESS (Niton, Fixes)
- Drop redundant entries for Wa_16021867713 & Wa_14019449301 (Roper, Fixes)

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Matthew Brost <matthew.brost@intel.com>
Link: https://patch.msgid.link/acS5xmWC3ivPTmyV@gsse-cloud1.jf.intel.com
This commit is contained in:
Dave Airlie 2026-03-27 11:01:44 +10:00
commit 72b585da55
56 changed files with 1340 additions and 293 deletions

View File

@ -1488,12 +1488,15 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
order = drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
if (is_device_private_page(page) ||
is_device_coherent_page(page)) {
struct drm_pagemap_zdd *__zdd =
drm_pagemap_page_zone_device_data(page);
if (!ctx->allow_mixed &&
zdd != page->zone_device_data && i > 0) {
zdd != __zdd && i > 0) {
err = -EOPNOTSUPP;
goto err_unmap;
}
zdd = page->zone_device_data;
zdd = __zdd;
if (pagemap != page_pgmap(page)) {
if (pagemap) {
err = -EOPNOTSUPP;

View File

@ -154,15 +154,15 @@ static void drm_pagemap_zdd_put(struct drm_pagemap_zdd *zdd)
}
/**
* drm_pagemap_migration_unlock_put_page() - Put a migration page
* @page: Pointer to the page to put
* drm_pagemap_migration_unlock_put_folio() - Put a migration folio
* @folio: Pointer to the folio to put
*
* This function unlocks and puts a page.
* This function unlocks and puts a folio.
*/
static void drm_pagemap_migration_unlock_put_page(struct page *page)
static void drm_pagemap_migration_unlock_put_folio(struct folio *folio)
{
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
}
/**
@ -177,31 +177,42 @@ static void drm_pagemap_migration_unlock_put_pages(unsigned long npages,
{
unsigned long i;
for (i = 0; i < npages; ++i) {
for (i = 0; i < npages;) {
struct page *page;
struct folio *folio;
unsigned int order = 0;
if (!migrate_pfn[i])
continue;
goto next;
page = migrate_pfn_to_page(migrate_pfn[i]);
drm_pagemap_migration_unlock_put_page(page);
folio = page_folio(page);
order = folio_order(folio);
drm_pagemap_migration_unlock_put_folio(folio);
migrate_pfn[i] = 0;
next:
i += NR_PAGES(order);
}
}
/**
* drm_pagemap_get_devmem_page() - Get a reference to a device memory page
* @page: Pointer to the page
* @order: Order
* @zdd: Pointer to the GPU SVM zone device data
*
* This function associates the given page with the specified GPU SVM zone
* device data and initializes it for zone device usage.
*/
static void drm_pagemap_get_devmem_page(struct page *page,
unsigned int order,
struct drm_pagemap_zdd *zdd)
{
page->zone_device_data = drm_pagemap_zdd_get(zdd);
zone_device_page_init(page, page_pgmap(page), 0);
zone_device_folio_init((struct folio *)page, zdd->dpagemap->pagemap,
order);
folio_set_zone_device_data(page_folio(page), drm_pagemap_zdd_get(zdd));
}
/**
@ -244,7 +255,7 @@ static int drm_pagemap_migrate_map_pages(struct device *dev,
order = folio_order(folio);
if (is_device_private_page(page)) {
struct drm_pagemap_zdd *zdd = page->zone_device_data;
struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page);
struct drm_pagemap *dpagemap = zdd->dpagemap;
struct drm_pagemap_addr addr;
@ -315,7 +326,7 @@ static void drm_pagemap_migrate_unmap_pages(struct device *dev,
goto next;
if (is_zone_device_page(page)) {
struct drm_pagemap_zdd *zdd = page->zone_device_data;
struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page);
struct drm_pagemap *dpagemap = zdd->dpagemap;
dpagemap->ops->device_unmap(dpagemap, dev, &pagemap_addr[i]);
@ -444,6 +455,41 @@ static int drm_pagemap_migrate_range(struct drm_pagemap_devmem *devmem,
return ret;
}
/**
* drm_pagemap_cpages() - Count collected pages
* @migrate_pfn: Array of migrate_pfn entries to account
* @npages: Number of entries in @migrate_pfn
*
* Compute the total number of minimum-sized pages represented by the
* collected entries in @migrate_pfn. The total is derived from the
* order encoded in each entry.
*
* Return: Total number of minimum-sized pages.
*/
static int drm_pagemap_cpages(unsigned long *migrate_pfn, unsigned long npages)
{
unsigned long i, cpages = 0;
for (i = 0; i < npages;) {
struct page *page = migrate_pfn_to_page(migrate_pfn[i]);
struct folio *folio;
unsigned int order = 0;
if (page) {
folio = page_folio(page);
order = folio_order(folio);
cpages += NR_PAGES(order);
} else if (migrate_pfn[i] & MIGRATE_PFN_COMPOUND) {
order = HPAGE_PMD_ORDER;
cpages += NR_PAGES(order);
}
i += NR_PAGES(order);
}
return cpages;
}
/**
* drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory
* @devmem_allocation: The device memory allocation to migrate to.
@ -481,7 +527,7 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
.end = end,
.pgmap_owner = pagemap->owner,
.flags = MIGRATE_VMA_SELECT_SYSTEM | MIGRATE_VMA_SELECT_DEVICE_COHERENT |
MIGRATE_VMA_SELECT_DEVICE_PRIVATE,
MIGRATE_VMA_SELECT_DEVICE_PRIVATE | MIGRATE_VMA_SELECT_COMPOUND,
};
unsigned long i, npages = npages_in_range(start, end);
unsigned long own_pages = 0, migrated_pages = 0;
@ -546,7 +592,8 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
goto err_free;
}
if (migrate.cpages != npages) {
if (migrate.cpages != npages &&
drm_pagemap_cpages(migrate.src, npages) != npages) {
/*
* Some pages to migrate. But we want to migrate all or
* nothing. Raced or unknown device pages.
@ -586,20 +633,23 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
own_pages = 0;
for (i = 0; i < npages; ++i) {
for (i = 0; i < npages;) {
unsigned long j;
struct page *page = pfn_to_page(migrate.dst[i]);
struct page *src_page = migrate_pfn_to_page(migrate.src[i]);
cur.start = i;
unsigned int order = 0;
cur.start = i;
pages[i] = NULL;
if (src_page && is_device_private_page(src_page)) {
struct drm_pagemap_zdd *src_zdd = src_page->zone_device_data;
struct drm_pagemap_zdd *src_zdd =
drm_pagemap_page_zone_device_data(src_page);
if (page_pgmap(src_page) == pagemap &&
!mdetails->can_migrate_same_pagemap) {
migrate.dst[i] = 0;
own_pages++;
continue;
goto next;
}
if (mdetails->source_peer_migrates) {
cur.dpagemap = src_zdd->dpagemap;
@ -615,7 +665,20 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
pages[i] = page;
}
migrate.dst[i] = migrate_pfn(migrate.dst[i]);
drm_pagemap_get_devmem_page(page, zdd);
if (migrate.src[i] & MIGRATE_PFN_COMPOUND) {
drm_WARN_ONCE(dpagemap->drm, src_page &&
folio_order(page_folio(src_page)) != HPAGE_PMD_ORDER,
"Unexpected folio order\n");
order = HPAGE_PMD_ORDER;
migrate.dst[i] |= MIGRATE_PFN_COMPOUND;
for (j = 1; j < NR_PAGES(order) && i + j < npages; j++)
migrate.dst[i + j] = 0;
}
drm_pagemap_get_devmem_page(page, order, zdd);
/* If we switched the migrating drm_pagemap, migrate previous pages now */
err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst,
@ -625,7 +688,11 @@ int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation,
npages = i + 1;
goto err_finalize;
}
next:
i += NR_PAGES(order);
}
cur.start = npages;
cur.ops = NULL; /* Force migration */
err = drm_pagemap_migrate_range(devmem_allocation, migrate.src, migrate.dst,
@ -715,8 +782,8 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
goto next;
if (fault_page) {
if (src_page->zone_device_data !=
fault_page->zone_device_data)
if (drm_pagemap_page_zone_device_data(src_page) !=
drm_pagemap_page_zone_device_data(fault_page))
goto next;
}
@ -734,6 +801,8 @@ static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas,
page = folio_page(folio, 0);
mpfn[i] = migrate_pfn(page_to_pfn(page));
if (order)
mpfn[i] |= MIGRATE_PFN_COMPOUND;
next:
if (page)
addr += page_size(page);
@ -989,8 +1058,15 @@ int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation)
if (err)
goto err_finalize;
for (i = 0; i < npages; ++i)
for (i = 0; i < npages;) {
unsigned int order = 0;
pages[i] = migrate_pfn_to_page(src[i]);
if (pages[i])
order = folio_order(page_folio(pages[i]));
i += NR_PAGES(order);
}
err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
if (err)
@ -1043,7 +1119,8 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
.vma = vas,
.pgmap_owner = page_pgmap(page)->owner,
.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE |
MIGRATE_VMA_SELECT_DEVICE_COHERENT,
MIGRATE_VMA_SELECT_DEVICE_COHERENT |
MIGRATE_VMA_SELECT_COMPOUND,
.fault_page = page,
};
struct drm_pagemap_migrate_details mdetails = {};
@ -1057,7 +1134,7 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
void *buf;
int i, err = 0;
zdd = page->zone_device_data;
zdd = drm_pagemap_page_zone_device_data(page);
if (time_before64(get_jiffies_64(), zdd->devmem_allocation->timeslice_expiration))
return 0;
@ -1109,8 +1186,15 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
if (err)
goto err_finalize;
for (i = 0; i < npages; ++i)
for (i = 0; i < npages;) {
unsigned int order = 0;
pages[i] = migrate_pfn_to_page(migrate.src[i]);
if (pages[i])
order = folio_order(page_folio(pages[i]));
i += NR_PAGES(order);
}
err = ops->copy_to_ram(pages, pagemap_addr, npages, NULL);
if (err)
@ -1140,7 +1224,9 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas,
*/
static void drm_pagemap_folio_free(struct folio *folio)
{
drm_pagemap_zdd_put(folio->page.zone_device_data);
struct page *page = folio_page(folio, 0);
drm_pagemap_zdd_put(drm_pagemap_page_zone_device_data(page));
}
/**
@ -1156,7 +1242,7 @@ static void drm_pagemap_folio_free(struct folio *folio)
*/
static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf)
{
struct drm_pagemap_zdd *zdd = vmf->page->zone_device_data;
struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(vmf->page);
int err;
err = __drm_pagemap_migrate_to_ram(vmf->vma,
@ -1166,9 +1252,22 @@ static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf)
return err ? VM_FAULT_SIGBUS : 0;
}
static void drm_pagemap_folio_split(struct folio *orig_folio, struct folio *new_folio)
{
struct drm_pagemap_zdd *zdd;
if (!new_folio)
return;
new_folio->pgmap = orig_folio->pgmap;
zdd = folio_zone_device_data(orig_folio);
folio_set_zone_device_data(new_folio, drm_pagemap_zdd_get(zdd));
}
static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = {
.folio_free = drm_pagemap_folio_free,
.migrate_to_ram = drm_pagemap_migrate_to_ram,
.folio_split = drm_pagemap_folio_split,
};
/**
@ -1222,7 +1321,7 @@ EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init);
*/
struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page)
{
struct drm_pagemap_zdd *zdd = page->zone_device_data;
struct drm_pagemap_zdd *zdd = drm_pagemap_page_zone_device_data(page);
return zdd->devmem_allocation->dpagemap;
}

View File

@ -37,7 +37,7 @@
#include <drm/drm_print.h>
#include <drm/drm_util.h>
/* Detach the cursor from the bulk move list*/
/* Detach the cursor from the bulk move list */
static void
ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor)
{
@ -105,9 +105,9 @@ void ttm_resource_cursor_init(struct ttm_resource_cursor *cursor,
* ttm_resource_cursor_fini() - Finalize the LRU list cursor usage
* @cursor: The struct ttm_resource_cursor to finalize.
*
* The function pulls the LRU list cursor off any lists it was previusly
* The function pulls the LRU list cursor off any lists it was previously
* attached to. Needs to be called with the LRU lock held. The function
* can be called multiple times after eachother.
* can be called multiple times after each other.
*/
void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor)
{
@ -317,10 +317,10 @@ void ttm_resource_move_to_lru_tail(struct ttm_resource *res)
}
/**
* ttm_resource_init - resource object constructure
* @bo: buffer object this resources is allocated for
* ttm_resource_init - resource object constructor
* @bo: buffer object this resource is allocated for
* @place: placement of the resource
* @res: the resource object to inistilize
* @res: the resource object to initialize
*
* Initialize a new resource object. Counterpart of ttm_resource_fini().
*/
@ -435,7 +435,7 @@ EXPORT_SYMBOL(ttm_resource_free);
* @size: How many bytes the new allocation needs.
*
* Test if @res intersects with @place and @size. Used for testing if evictions
* are valueable or not.
* are valuable or not.
*
* Returns true if the res placement intersects with @place and @size.
*/
@ -513,7 +513,7 @@ void ttm_resource_set_bo(struct ttm_resource *res,
* @bdev: ttm device this manager belongs to
* @size: size of managed resources in arbitrary units
*
* Initialise core parts of a manager object.
* Initialize core parts of a manager object.
*/
void ttm_resource_manager_init(struct ttm_resource_manager *man,
struct ttm_device *bdev,
@ -536,8 +536,8 @@ EXPORT_SYMBOL(ttm_resource_manager_init);
/*
* ttm_resource_manager_evict_all
*
* @bdev - device to use
* @man - manager to use
* @bdev: device to use
* @man: manager to use
*
* Evict all the objects out of a memory manager until it is empty.
* Part of memory manager cleanup sequence.
@ -882,7 +882,7 @@ ttm_kmap_iter_linear_io_init(struct ttm_kmap_iter_linear_io *iter_io,
/**
* ttm_kmap_iter_linear_io_fini - Clean up an iterator for linear io memory
* @iter_io: The iterator to initialize
* @iter_io: The iterator to finalize
* @bdev: The TTM device
* @mem: The ttm resource representing the iomap.
*
@ -921,15 +921,15 @@ DEFINE_SHOW_ATTRIBUTE(ttm_resource_manager);
/**
* ttm_resource_manager_create_debugfs - Create debugfs entry for specified
* resource manager.
* @man: The TTM resource manager for which the debugfs stats file be creates
* @man: The TTM resource manager for which the debugfs stats file to be created
* @parent: debugfs directory in which the file will reside
* @name: The filename to create.
*
* This function setups up a debugfs file that can be used to look
* This function sets up a debugfs file that can be used to look
* at debug statistics of the specified ttm_resource_manager.
*/
void ttm_resource_manager_create_debugfs(struct ttm_resource_manager *man,
struct dentry * parent,
struct dentry *parent,
const char *name)
{
#if defined(CONFIG_DEBUG_FS)

View File

@ -56,9 +56,11 @@ struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size
if (intel_fbdev_fb_prefer_stolen(drm, size)) {
obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe),
size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
ttm_bo_type_kernel,
XE_BO_FLAG_FORCE_WC |
XE_BO_FLAG_STOLEN |
XE_BO_FLAG_GGTT, false);
XE_BO_FLAG_GGTT,
false);
if (!IS_ERR(obj))
drm_info(&xe->drm, "Allocated fbdev into stolen\n");
else
@ -69,9 +71,11 @@ struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size
if (IS_ERR(obj)) {
obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size,
ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
ttm_bo_type_kernel,
XE_BO_FLAG_FORCE_WC |
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
XE_BO_FLAG_GGTT, false);
XE_BO_FLAG_GGTT,
false);
}
if (IS_ERR(obj)) {

View File

@ -541,6 +541,13 @@ static const struct intel_display_irq_interface xe_display_irq_interface = {
.synchronize = irq_synchronize,
};
static bool has_auxccs(struct drm_device *drm)
{
struct xe_device *xe = to_xe_device(drm);
return xe->info.platform == XE_ALDERLAKE_P;
}
static const struct intel_display_parent_interface parent = {
.bo = &xe_display_bo_interface,
.dsb = &xe_display_dsb_interface,
@ -552,6 +559,7 @@ static const struct intel_display_parent_interface parent = {
.pcode = &xe_display_pcode_interface,
.rpm = &xe_display_rpm_interface,
.stolen = &xe_display_stolen_interface,
.has_auxccs = has_auxccs,
};
/**

View File

@ -42,9 +42,9 @@ static int xe_display_bo_framebuffer_init(struct drm_gem_object *obj,
if (ret)
goto err;
if (!(bo->flags & XE_BO_FLAG_SCANOUT)) {
if (!(bo->flags & XE_BO_FLAG_FORCE_WC)) {
/*
* XE_BO_FLAG_SCANOUT should ideally be set at creation, or is
* XE_BO_FLAG_FORCE_WC should ideally be set at creation, or is
* automatically set when creating FB. We cannot change caching
* mode when the bo is VM_BINDed, so we can only set
* coherency with display when unbound.
@ -54,7 +54,7 @@ static int xe_display_bo_framebuffer_init(struct drm_gem_object *obj,
ret = -EINVAL;
goto err;
}
bo->flags |= XE_BO_FLAG_SCANOUT;
bo->flags |= XE_BO_FLAG_FORCE_WC;
}
ttm_bo_unreserve(&bo->ttm);
return 0;

View File

@ -54,7 +54,9 @@ static struct intel_dsb_buffer *xe_dsb_buffer_create(struct drm_device *drm, siz
PAGE_ALIGN(size),
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT, false);
XE_BO_FLAG_FORCE_WC |
XE_BO_FLAG_GGTT,
false);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
goto err_pin_map;

View File

@ -49,33 +49,94 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_
*dpt_ofs = ALIGN(*dpt_ofs, 4096);
}
static void
write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
u32 bo_ofs, u32 width, u32 height, u32 src_stride,
u32 dst_stride)
static unsigned int
write_dpt_padding(struct iosys_map *map, unsigned int dest, unsigned int pad)
{
/* The DE ignores the PTEs for the padding tiles */
return dest + pad * sizeof(u64);
}
static unsigned int
write_dpt_remapped_linear(struct xe_bo *bo, struct iosys_map *map,
unsigned int dest,
const struct intel_remapped_plane_info *plane)
{
struct xe_device *xe = xe_bo_device(bo);
struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
u32 column, row;
u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]);
const u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo,
xe->pat.idx[XE_CACHE_NONE]);
unsigned int offset = plane->offset * XE_PAGE_SIZE;
unsigned int size = plane->size;
for (row = 0; row < height; row++) {
u32 src_idx = src_stride * row + bo_ofs;
while (size--) {
u64 addr = xe_bo_addr(bo, offset, XE_PAGE_SIZE);
for (column = 0; column < width; column++) {
u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE);
iosys_map_wr(map, *dpt_ofs, u64, pte | addr);
*dpt_ofs += 8;
src_idx++;
}
/* The DE ignores the PTEs for the padding tiles */
*dpt_ofs += (dst_stride - width) * 8;
iosys_map_wr(map, dest, u64, addr | pte);
dest += sizeof(u64);
offset += XE_PAGE_SIZE;
}
/* Align to next page */
*dpt_ofs = ALIGN(*dpt_ofs, 4096);
return dest;
}
static unsigned int
write_dpt_remapped_tiled(struct xe_bo *bo, struct iosys_map *map,
unsigned int dest,
const struct intel_remapped_plane_info *plane)
{
struct xe_device *xe = xe_bo_device(bo);
struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
const u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo,
xe->pat.idx[XE_CACHE_NONE]);
unsigned int offset, column, row;
for (row = 0; row < plane->height; row++) {
offset = (plane->offset + plane->src_stride * row) *
XE_PAGE_SIZE;
for (column = 0; column < plane->width; column++) {
u64 addr = xe_bo_addr(bo, offset, XE_PAGE_SIZE);
iosys_map_wr(map, dest, u64, addr | pte);
dest += sizeof(u64);
offset += XE_PAGE_SIZE;
}
dest = write_dpt_padding(map, dest,
plane->dst_stride - plane->width);
}
return dest;
}
static void
write_dpt_remapped(struct xe_bo *bo,
const struct intel_remapped_info *remap_info,
struct iosys_map *map)
{
unsigned int i, dest = 0;
for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++) {
const struct intel_remapped_plane_info *plane =
&remap_info->plane[i];
if (!plane->linear && !plane->width && !plane->height)
continue;
if (dest && remap_info->plane_alignment) {
const unsigned int index = dest / sizeof(u64);
const unsigned int pad =
ALIGN(index, remap_info->plane_alignment) -
index;
dest = write_dpt_padding(map, dest, pad);
}
if (plane->linear)
dest = write_dpt_remapped_linear(bo, map, dest, plane);
else
dest = write_dpt_remapped_tiled(bo, map, dest, plane);
}
}
static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
@ -122,7 +183,8 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
ttm_bo_type_kernel,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_PAGETABLE,
XE_BO_FLAG_PAGETABLE |
XE_BO_FLAG_FORCE_WC,
alignment, false);
if (IS_ERR(dpt))
return PTR_ERR(dpt);
@ -137,17 +199,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
iosys_map_wr(&dpt->vmap, x * 8, u64, pte | addr);
}
} else if (view->type == I915_GTT_VIEW_REMAPPED) {
const struct intel_remapped_info *remap_info = &view->remapped;
u32 i, dpt_ofs = 0;
for (i = 0; i < ARRAY_SIZE(remap_info->plane); i++)
write_dpt_remapped(bo, &dpt->vmap, &dpt_ofs,
remap_info->plane[i].offset,
remap_info->plane[i].width,
remap_info->plane[i].height,
remap_info->plane[i].src_stride,
remap_info->plane[i].dst_stride);
write_dpt_remapped(bo, &view->remapped, &dpt->vmap);
} else {
const struct intel_rotation_info *rot_info = &view->rotated;
u32 i, dpt_ofs = 0;
@ -429,7 +481,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state,
return 0;
/* We reject creating !SCANOUT fb's, so this is weird.. */
drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT));
drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_FORCE_WC));
vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, alignment);

View File

@ -48,7 +48,7 @@ initial_plane_bo(struct xe_device *xe,
if (plane_config->size == 0)
return NULL;
flags = XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT;
flags = XE_BO_FLAG_FORCE_WC | XE_BO_FLAG_GGTT;
base = round_down(plane_config->base, page_size);
if (IS_DGFX(xe)) {

View File

@ -34,6 +34,19 @@
#define MI_FORCE_WAKEUP __MI_INSTR(0x1D)
#define MI_MATH(n) (__MI_INSTR(0x1A) | XE_INSTR_NUM_DW((n) + 1))
#define MI_SEMAPHORE_WAIT (__MI_INSTR(0x1c) | XE_INSTR_NUM_DW(5))
#define MI_SEMW_GGTT REG_BIT(22)
#define MI_SEMW_POLL REG_BIT(15)
#define MI_SEMW_COMPARE_OP_MASK REG_GENMASK(14, 12)
#define COMPARE_OP_SAD_GT_SDD 0
#define COMPARE_OP_SAD_GTE_SDD 1
#define COMPARE_OP_SAD_LT_SDD 2
#define COMPARE_OP_SAD_LTE_SDD 3
#define COMPARE_OP_SAD_EQ_SDD 4
#define COMPARE_OP_SAD_NEQ_SDD 5
#define MI_SEMW_COMPARE(OP) REG_FIELD_PREP(MI_SEMW_COMPARE_OP_MASK, COMPARE_OP_##OP)
#define MI_SEMW_TOKEN(token) REG_FIELD_PREP(REG_GENMASK(9, 2), (token))
#define MI_STORE_DATA_IMM __MI_INSTR(0x20)
#define MI_SDI_GGTT REG_BIT(22)
#define MI_SDI_LEN_DW GENMASK(9, 0)
@ -81,4 +94,10 @@
#define MI_SET_APPID_SESSION_ID_MASK REG_GENMASK(6, 0)
#define MI_SET_APPID_SESSION_ID(x) REG_FIELD_PREP(MI_SET_APPID_SESSION_ID_MASK, x)
#define MI_SEMAPHORE_WAIT_TOKEN (__MI_INSTR(0x1c) | XE_INSTR_NUM_DW(5)) /* XeLP+ */
#define MI_SEMAPHORE_REGISTER_POLL REG_BIT(16)
#define MI_SEMAPHORE_POLL REG_BIT(15)
#define MI_SEMAPHORE_CMP_OP_MASK REG_GENMASK(14, 12)
#define MI_SEMAPHORE_SAD_EQ_SDD REG_FIELD_PREP(MI_SEMAPHORE_CMP_OP_MASK, 4)
#endif

View File

@ -132,6 +132,14 @@
#define RING_BBADDR(base) XE_REG((base) + 0x140)
#define RING_BBADDR_UDW(base) XE_REG((base) + 0x168)
#define PR_CTR_CTRL(base) XE_REG((base) + 0x178)
#define CTR_COUNT_SELECT_FF REG_BIT(31)
#define CTR_LOGIC_OP_MASK REG_GENMASK(30, 0)
#define CTR_START 0
#define CTR_STOP 1
#define CTR_LOGIC_OP(OP) REG_FIELD_PREP(CTR_LOGIC_OP_MASK, CTR_##OP)
#define PR_CTR_THRSH(base) XE_REG((base) + 0x17c)
#define BCS_SWCTRL(base) XE_REG((base) + 0x200, XE_REG_OPTION_MASKED)
#define BCS_SWCTRL_DISABLE_256B REG_BIT(2)

View File

@ -578,6 +578,7 @@
#define ENABLE_SMP_LD_RENDER_SURFACE_CONTROL REG_BIT(44 - 32)
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
#define L3_128B_256B_WRT_DIS REG_BIT(40 - 32)
#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
#define LSCFE_SAME_ADDRESS_ATOMICS_COALESCING_DISABLE REG_BIT(35 - 32)

View File

@ -510,13 +510,11 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
/*
* Display scanout is always non-coherent with the CPU cache.
*
* For Xe_LPG and beyond up to NVL-P (excluding), PPGTT PTE
* lookups are also non-coherent and require a CPU:WC mapping.
*/
if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
(!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE))
if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_FORCE_WC) ||
(!xe->info.has_cached_pt && bo->flags & XE_BO_FLAG_PAGETABLE))
caching = ttm_write_combined;
}
@ -689,7 +687,12 @@ static int xe_bo_trigger_rebind(struct xe_device *xe, struct xe_bo *bo,
if (!xe_vm_in_fault_mode(vm)) {
drm_gpuvm_bo_evict(vm_bo, true);
continue;
/*
* L2 cache may not be flushed, so ensure that is done in
* xe_vm_invalidate_vma() below
*/
if (!xe_device_is_l2_flush_optimized(xe))
continue;
}
if (!idle) {
@ -3196,8 +3199,11 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
if (args->flags & DRM_XE_GEM_CREATE_FLAG_DEFER_BACKING)
bo_flags |= XE_BO_FLAG_DEFER_BACKING;
/*
* Display scanout is always non-coherent with the CPU cache.
*/
if (args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT)
bo_flags |= XE_BO_FLAG_SCANOUT;
bo_flags |= XE_BO_FLAG_FORCE_WC;
if (args->flags & DRM_XE_GEM_CREATE_FLAG_NO_COMPRESSION) {
if (XE_IOCTL_DBG(xe, GRAPHICS_VER(xe) < 20))
@ -3209,7 +3215,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
/* CCS formats need physical placement at a 64K alignment in VRAM. */
if ((bo_flags & XE_BO_FLAG_VRAM_MASK) &&
(bo_flags & XE_BO_FLAG_SCANOUT) &&
(args->flags & DRM_XE_GEM_CREATE_FLAG_SCANOUT) &&
!(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) &&
IS_ALIGNED(args->size, SZ_64K))
bo_flags |= XE_BO_FLAG_NEEDS_64K;
@ -3229,7 +3235,7 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
args->cpu_caching != DRM_XE_GEM_CPU_CACHING_WC))
return -EINVAL;
if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_SCANOUT &&
if (XE_IOCTL_DBG(xe, bo_flags & XE_BO_FLAG_FORCE_WC &&
args->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB))
return -EINVAL;
@ -3697,7 +3703,7 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
bo = xe_bo_create_user(xe, NULL, args->size,
DRM_XE_GEM_CPU_CACHING_WC,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
XE_BO_FLAG_SCANOUT |
XE_BO_FLAG_FORCE_WC |
XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL);
if (IS_ERR(bo))
return PTR_ERR(bo);

View File

@ -35,7 +35,7 @@
#define XE_BO_FLAG_PINNED BIT(7)
#define XE_BO_FLAG_NO_RESV_EVICT BIT(8)
#define XE_BO_FLAG_DEFER_BACKING BIT(9)
#define XE_BO_FLAG_SCANOUT BIT(10)
#define XE_BO_FLAG_FORCE_WC BIT(10)
#define XE_BO_FLAG_FIXED_PLACEMENT BIT(11)
#define XE_BO_FLAG_PAGETABLE BIT(12)
#define XE_BO_FLAG_NEEDS_CPU_ACCESS BIT(13)

View File

@ -211,6 +211,8 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY, xe_exec_queue_set_property_ioctl,
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_VM_GET_PROPERTY, xe_vm_get_property_ioctl,
DRM_RENDER_ALLOW),
};
static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@ -1094,6 +1096,29 @@ static void tdf_request_sync(struct xe_device *xe)
}
}
/**
* xe_device_is_l2_flush_optimized - if L2 flush is optimized by HW
* @xe: The device to check.
*
* Return: true if the HW device optimizing L2 flush, false otherwise.
*/
bool xe_device_is_l2_flush_optimized(struct xe_device *xe)
{
/* XA is *always* flushed, like at the end-of-submssion (and maybe other
* places), just that internally as an optimisation hw doesn't need to make
* that a full flush (which will also include XA) when Media is
* off/powergated, since it doesn't need to worry about GT caches vs Media
* coherency, and only CPU vs GPU coherency, so can make that flush a
* targeted XA flush, since stuff tagged with XA now means it's shared with
* the CPU. The main implication is that we now need to somehow flush non-XA before
* freeing system memory pages, otherwise dirty cachelines could be flushed after the free
* (like if Media suddenly turns on and does a full flush)
*/
if (GRAPHICS_VER(xe) >= 35 && !IS_DGFX(xe))
return true;
return false;
}
void xe_device_l2_flush(struct xe_device *xe)
{
struct xe_gt *gt;
@ -1140,6 +1165,14 @@ void xe_device_td_flush(struct xe_device *xe)
{
struct xe_gt *root_gt;
/*
* From Xe3p onward the HW takes care of flush of TD entries also along
* with flushing XA entries, which will be at the usual sync points,
* like at the end of submission, so no manual flush is needed here.
*/
if (GRAPHICS_VER(xe) >= 35)
return;
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
return;

View File

@ -188,6 +188,7 @@ void xe_device_snapshot_print(struct xe_device *xe, struct drm_printer *p);
u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
bool xe_device_is_l2_flush_optimized(struct xe_device *xe);
void xe_device_td_flush(struct xe_device *xe);
void xe_device_l2_flush(struct xe_device *xe);

View File

@ -66,6 +66,9 @@
* give us the correct placement for free.
*/
#define XE_GGTT_FLAGS_64K BIT(0)
#define XE_GGTT_FLAGS_ONLINE BIT(1)
/**
* struct xe_ggtt_node - A node in GGTT.
*
@ -117,6 +120,8 @@ struct xe_ggtt {
* @flags: Flags for this GGTT
* Acceptable flags:
* - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K.
* - %XE_GGTT_FLAGS_ONLINE - is GGTT online, protected by ggtt->lock
* after init
*/
unsigned int flags;
/** @scratch: Internal object allocation used as a scratch page */
@ -367,6 +372,8 @@ static void dev_fini_ggtt(void *arg)
{
struct xe_ggtt *ggtt = arg;
scoped_guard(mutex, &ggtt->lock)
ggtt->flags &= ~XE_GGTT_FLAGS_ONLINE;
drain_workqueue(ggtt->wq);
}
@ -437,6 +444,7 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
if (err)
return err;
ggtt->flags |= XE_GGTT_FLAGS_ONLINE;
return devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt);
}
ALLOW_ERROR_INJECTION(xe_ggtt_init_early, ERRNO); /* See xe_pci_probe() */
@ -465,13 +473,10 @@ static void ggtt_node_fini(struct xe_ggtt_node *node)
static void ggtt_node_remove(struct xe_ggtt_node *node)
{
struct xe_ggtt *ggtt = node->ggtt;
struct xe_device *xe = tile_to_xe(ggtt->tile);
bool bound;
int idx;
bound = drm_dev_enter(&xe->drm, &idx);
mutex_lock(&ggtt->lock);
bound = ggtt->flags & XE_GGTT_FLAGS_ONLINE;
if (bound)
xe_ggtt_clear(ggtt, xe_ggtt_node_addr(node), xe_ggtt_node_size(node));
drm_mm_remove_node(&node->base);
@ -484,8 +489,6 @@ static void ggtt_node_remove(struct xe_ggtt_node *node)
if (node->invalidate_on_remove)
xe_ggtt_invalidate(ggtt);
drm_dev_exit(idx);
free_node:
ggtt_node_fini(node);
}

View File

@ -171,7 +171,7 @@ static void xe_gt_enable_comp_1wcoh(struct xe_gt *gt)
static void gt_reset_worker(struct work_struct *w);
static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
long timeout_jiffies)
long timeout_jiffies, bool force_reset)
{
struct xe_sched_job *job;
struct dma_fence *fence;
@ -181,6 +181,8 @@ static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
if (IS_ERR(job))
return PTR_ERR(job);
job->ring_ops_force_reset = force_reset;
xe_sched_job_arm(job);
fence = dma_fence_get(&job->drm.s_fence->finished);
xe_sched_job_push(job);
@ -204,7 +206,7 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
if (IS_ERR(bb))
return PTR_ERR(bb);
ret = emit_job_sync(q, bb, HZ);
ret = emit_job_sync(q, bb, HZ, false);
xe_bb_free(bb, NULL);
return ret;
@ -369,7 +371,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
bb->len = cs - bb->cs;
ret = emit_job_sync(q, bb, HZ);
/* only VFs need to trigger reset to get a clean NULL context */
ret = emit_job_sync(q, bb, HZ, IS_SRIOV_VF(gt_to_xe(gt)));
xe_bb_free(bb, NULL);

View File

@ -12,6 +12,7 @@
#include "xe_gt_printk.h"
#include "xe_gt_sysfs.h"
#include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_sriov.h"
#include "xe_sriov_pf.h"
@ -163,6 +164,7 @@ ccs_mode_store(struct device *kdev, struct device_attribute *attr,
xe_gt_info(gt, "Setting compute mode to %d\n", num_engines);
gt->ccs_mode = num_engines;
xe_gt_record_user_engines(gt);
guard(xe_pm_runtime)(xe);
xe_gt_reset(gt);
/* We may end PF lockdown once CCS mode is default again */

View File

@ -171,6 +171,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
case XE_GT_SRIOV_STATE_##_X: return #_X
CASE2STR(WIP);
CASE2STR(FLR_WIP);
CASE2STR(FLR_PREPARE);
CASE2STR(FLR_SEND_START);
CASE2STR(FLR_WAIT_GUC);
CASE2STR(FLR_GUC_DONE);
@ -1486,11 +1487,15 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
* The VF FLR state machine looks like::
*
* (READY,PAUSED,STOPPED)<------------<--------------o
* | \
* flr \
* | \
* ....V..........................FLR_WIP........... \
* : \ : \
* | | \
* flr prepare \
* | | \
* ....V.............V............FLR_WIP........... \
* : | | : \
* : | FLR_PREPARE : |
* : | / : |
* : \ flr : |
* : \ / : |
* : \ o----<----busy : |
* : \ / / : |
* : FLR_SEND_START---failed----->-----------o--->(FLR_FAILED)<---o
@ -1539,20 +1544,28 @@ static void pf_enter_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
pf_queue_vf(gt, vfid);
}
static void pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
static bool pf_exit_vf_flr_prepare(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
return;
}
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_PREPARE))
return false;
pf_enter_vf_flr_send_start(gt, vfid);
return true;
}
static bool pf_enter_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP))
return false;
pf_enter_vf_wip(gt, vfid);
pf_enter_vf_flr_send_start(gt, vfid);
return true;
}
static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
{
if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP)) {
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_PREPARE);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_FINISH);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_MMIO);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA);
@ -1759,6 +1772,41 @@ static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
pf_queue_vf(gt, vfid);
}
/**
* xe_gt_sriov_pf_control_prepare_flr() - Notify PF that VF FLR request was issued.
* @gt: the &xe_gt
* @vfid: the VF identifier
*
* This is an optional early notification path used to mark pending FLR before
* the GuC notifies the PF with a FLR event.
*
* This function is for PF only.
*
* Return: 0 on success or a negative error code on failure.
*/
int xe_gt_sriov_pf_control_prepare_flr(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_enter_vf_flr_wip(gt, vfid))
return -EALREADY;
pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_PREPARE);
return 0;
}
static int pf_begin_vf_flr(struct xe_gt *gt, unsigned int vfid)
{
if (pf_enter_vf_flr_wip(gt, vfid)) {
pf_enter_vf_flr_send_start(gt, vfid);
return 0;
}
if (pf_exit_vf_flr_prepare(gt, vfid))
return 0;
xe_gt_sriov_dbg(gt, "VF%u FLR is already in progress\n", vfid);
return -EALREADY;
}
/**
* xe_gt_sriov_pf_control_trigger_flr - Start a VF FLR sequence.
* @gt: the &xe_gt
@ -1770,9 +1818,7 @@ static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
*/
int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
{
pf_enter_vf_flr_wip(gt, vfid);
return 0;
return pf_begin_vf_flr(gt, vfid);
}
/**
@ -1879,9 +1925,9 @@ static void pf_handle_vf_flr(struct xe_gt *gt, u32 vfid)
if (needs_dispatch_flr(xe)) {
for_each_gt(gtit, xe, gtid)
pf_enter_vf_flr_wip(gtit, vfid);
pf_begin_vf_flr(gtit, vfid);
} else {
pf_enter_vf_flr_wip(gt, vfid);
pf_begin_vf_flr(gt, vfid);
}
}

View File

@ -27,6 +27,7 @@ int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int v
int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_prepare_flr(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync);
int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid);

View File

@ -15,6 +15,7 @@
*
* @XE_GT_SRIOV_STATE_WIP: indicates that some operations are in progress.
* @XE_GT_SRIOV_STATE_FLR_WIP: indicates that a VF FLR is in progress.
* @XE_GT_SRIOV_STATE_FLR_PREPARE: indicates that the PF received early VF FLR prepare notification.
* @XE_GT_SRIOV_STATE_FLR_SEND_START: indicates that the PF wants to send a FLR START command.
* @XE_GT_SRIOV_STATE_FLR_WAIT_GUC: indicates that the PF awaits for a response from the GuC.
* @XE_GT_SRIOV_STATE_FLR_GUC_DONE: indicates that the PF has received a response from the GuC.
@ -56,6 +57,7 @@ enum xe_gt_sriov_control_bits {
XE_GT_SRIOV_STATE_WIP = 1,
XE_GT_SRIOV_STATE_FLR_WIP,
XE_GT_SRIOV_STATE_FLR_PREPARE,
XE_GT_SRIOV_STATE_FLR_SEND_START,
XE_GT_SRIOV_STATE_FLR_WAIT_GUC,
XE_GT_SRIOV_STATE_FLR_GUC_DONE,

View File

@ -85,7 +85,13 @@ static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"),
DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"),
DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"),
DEF_STAT_STR(SVM_4K_DEVICE_COPY_KB, "svm_4K_device_copy_kb"),
DEF_STAT_STR(SVM_64K_DEVICE_COPY_KB, "svm_64K_device_copy_kb"),
DEF_STAT_STR(SVM_2M_DEVICE_COPY_KB, "svm_2M_device_copy_kb"),
DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"),
DEF_STAT_STR(SVM_4K_CPU_COPY_KB, "svm_4K_cpu_copy_kb"),
DEF_STAT_STR(SVM_64K_CPU_COPY_KB, "svm_64K_cpu_copy_kb"),
DEF_STAT_STR(SVM_2M_CPU_COPY_KB, "svm_2M_cpu_copy_kb"),
DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"),
DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"),
DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"),

View File

@ -40,7 +40,13 @@ enum xe_gt_stats_id {
XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
XE_GT_STATS_ID_SVM_DEVICE_COPY_KB,
XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_KB,
XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_KB,
XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_KB,
XE_GT_STATS_ID_SVM_CPU_COPY_KB,
XE_GT_STATS_ID_SVM_4K_CPU_COPY_KB,
XE_GT_STATS_ID_SVM_64K_CPU_COPY_KB,
XE_GT_STATS_ID_SVM_2M_CPU_COPY_KB,
XE_GT_STATS_ID_SVM_4K_GET_PAGES_US,
XE_GT_STATS_ID_SVM_64K_GET_PAGES_US,
XE_GT_STATS_ID_SVM_2M_GET_PAGES_US,

View File

@ -98,6 +98,9 @@ static u32 guc_ctl_feature_flags(struct xe_guc *guc)
if (xe_guc_using_main_gamctrl_queues(guc))
flags |= GUC_CTL_MAIN_GAMCTRL_QUEUES;
if (GRAPHICS_VER(xe) >= 35 && !IS_DGFX(xe) && xe_gt_is_media_type(guc_to_gt(guc)))
flags |= GUC_CTL_ENABLE_L2FLUSH_OPT;
return flags;
}
@ -1176,14 +1179,14 @@ static int guc_wait_ucode(struct xe_guc *guc)
struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
u32 before_freq, act_freq, cur_freq;
u32 status = 0, tries = 0;
int load_result, ret;
ktime_t before;
u64 delta_ms;
int ret;
before_freq = xe_guc_pc_get_act_freq(guc_pc);
before = ktime_get();
ret = poll_timeout_us(ret = guc_load_done(gt, &status, &tries), ret,
ret = poll_timeout_us(load_result = guc_load_done(gt, &status, &tries), load_result,
10 * USEC_PER_MSEC,
GUC_LOAD_TIMEOUT_SEC * USEC_PER_SEC, false);
@ -1191,7 +1194,7 @@ static int guc_wait_ucode(struct xe_guc *guc)
act_freq = xe_guc_pc_get_act_freq(guc_pc);
cur_freq = xe_guc_pc_get_cur_freq_fw(guc_pc);
if (ret) {
if (ret || load_result <= 0) {
xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz)\n",
status, delta_ms, xe_guc_pc_get_act_freq(guc_pc),
xe_guc_pc_get_cur_freq_fw(guc_pc));
@ -1399,15 +1402,37 @@ int xe_guc_enable_communication(struct xe_guc *guc)
return 0;
}
int xe_guc_suspend(struct xe_guc *guc)
/**
* xe_guc_softreset() - Soft reset GuC
* @guc: The GuC object
*
* Send soft reset command to GuC through mmio send.
*
* Return: 0 if success, otherwise error code
*/
int xe_guc_softreset(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
u32 action[] = {
XE_GUC_ACTION_CLIENT_SOFT_RESET,
};
int ret;
if (!xe_uc_fw_is_running(&guc->fw))
return 0;
ret = xe_guc_mmio_send(guc, action, ARRAY_SIZE(action));
if (ret)
return ret;
return 0;
}
int xe_guc_suspend(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
int ret;
ret = xe_guc_softreset(guc);
if (ret) {
xe_gt_err(gt, "GuC suspend failed: %pe\n", ERR_PTR(ret));
return ret;

View File

@ -44,6 +44,7 @@ int xe_guc_opt_in_features_enable(struct xe_guc *guc);
void xe_guc_runtime_suspend(struct xe_guc *guc);
void xe_guc_runtime_resume(struct xe_guc *guc);
int xe_guc_suspend(struct xe_guc *guc);
int xe_guc_softreset(struct xe_guc *guc);
void xe_guc_notify(struct xe_guc *guc);
int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
int xe_guc_mmio_send(struct xe_guc *guc, const u32 *request, u32 len);

View File

@ -31,6 +31,7 @@
#include "xe_guc_submit.h"
#include "xe_guc_tlb_inval.h"
#include "xe_map.h"
#include "xe_page_reclaim.h"
#include "xe_pm.h"
#include "xe_sleep.h"
#include "xe_sriov_vf.h"
@ -352,6 +353,7 @@ static void guc_action_disable_ct(void *arg)
{
struct xe_guc_ct *ct = arg;
xe_guc_ct_stop(ct);
guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED);
}
@ -1629,17 +1631,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = xe_guc_pagefault_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
/*
* Page reclamation is an extension of TLB invalidation. Both
* operations share the same seqno and fence. When either
* action completes, we need to signal the corresponding
* fence. Since the handling logic (lookup fence by seqno,
* fence signalling) is identical, we use the same handler
* for both G2H events.
*/
ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
ret = xe_guc_page_reclaim_done_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
break;
@ -1847,15 +1843,13 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = xe_guc_pagefault_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
/*
* Seqno and fence handling of page reclamation and TLB
* invalidation is identical, so we can use the same handler
* for both actions.
*/
__g2h_release_space(ct, len);
ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_PAGE_RECLAMATION_DONE:
__g2h_release_space(ct, len);
ret = xe_guc_page_reclaim_done_handler(guc, payload, adj_len);
break;
default:
xe_gt_warn(gt, "NOT_POSSIBLE\n");
}

View File

@ -67,6 +67,7 @@ struct guc_update_exec_queue_policy {
#define GUC_CTL_ENABLE_PSMI_LOGGING BIT(7)
#define GUC_CTL_MAIN_GAMCTRL_QUEUES BIT(9)
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
#define GUC_CTL_ENABLE_L2FLUSH_OPT BIT(15)
#define GUC_CTL_DEBUG 3
#define GUC_LOG_VERBOSITY REG_GENMASK(1, 0)

View File

@ -47,6 +47,8 @@
#define XE_GUC_EXEC_QUEUE_CGP_CONTEXT_ERROR_LEN 6
static int guc_submit_reset_prepare(struct xe_guc *guc);
static struct xe_guc *
exec_queue_to_guc(struct xe_exec_queue *q)
{
@ -238,7 +240,7 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
EXEC_QUEUE_STATE_BANNED));
}
static void guc_submit_fini(struct drm_device *drm, void *arg)
static void guc_submit_sw_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
struct xe_device *xe = guc_to_xe(guc);
@ -256,6 +258,19 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
xa_destroy(&guc->submission_state.exec_queue_lookup);
}
static void guc_submit_fini(void *arg)
{
struct xe_guc *guc = arg;
/* Forcefully kill any remaining exec queues */
xe_guc_ct_stop(&guc->ct);
guc_submit_reset_prepare(guc);
xe_guc_softreset(guc);
xe_guc_submit_stop(guc);
xe_uc_fw_sanitize(&guc->fw);
xe_guc_submit_pause_abort(guc);
}
static void guc_submit_wedged_fini(void *arg)
{
struct xe_guc *guc = arg;
@ -325,7 +340,11 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
guc->submission_state.initialized = true;
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
err = drmm_add_action_or_reset(&xe->drm, guc_submit_sw_fini, guc);
if (err)
return err;
return devm_add_action_or_reset(xe->drm.dev, guc_submit_fini, guc);
}
/*
@ -1300,6 +1319,7 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
*/
void xe_guc_submit_wedge(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
unsigned long index;
@ -1314,20 +1334,29 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
if (!guc->submission_state.initialized)
return;
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
guc_submit_wedged_fini, guc);
if (err) {
xe_gt_err(gt, "Failed to register clean-up in wedged.mode=%s; "
"Although device is wedged.\n",
xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
return;
}
if (xe->wedged.mode == XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET) {
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
guc_submit_wedged_fini, guc);
if (err) {
xe_gt_err(gt, "Failed to register clean-up on wedged.mode=%s; "
"Although device is wedged.\n",
xe_wedged_mode_to_string(XE_WEDGED_MODE_UPON_ANY_HANG_NO_RESET));
return;
}
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
if (xe_exec_queue_get_unless_zero(q))
set_exec_queue_wedged(q);
mutex_unlock(&guc->submission_state.lock);
mutex_lock(&guc->submission_state.lock);
xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
if (xe_exec_queue_get_unless_zero(q))
set_exec_queue_wedged(q);
mutex_unlock(&guc->submission_state.lock);
} else {
/* Forcefully kill any remaining exec queues, signal fences */
guc_submit_reset_prepare(guc);
xe_guc_submit_stop(guc);
xe_guc_softreset(guc);
xe_uc_fw_sanitize(&guc->fw);
xe_guc_submit_pause_abort(guc);
}
}
static bool guc_submit_hint_wedged(struct xe_guc *guc)
@ -2298,6 +2327,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched = &q->guc->sched;
bool do_destroy = false;
/* Stop scheduling + flush any DRM scheduler operations */
xe_sched_submission_stop(sched);
@ -2305,7 +2335,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
/* Clean up lost G2H + reset engine state */
if (exec_queue_registered(q)) {
if (exec_queue_destroyed(q))
__guc_exec_queue_destroy(guc, q);
do_destroy = true;
}
if (q->guc->suspend_pending) {
set_exec_queue_suspended(q);
@ -2341,18 +2371,15 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
xe_guc_exec_queue_trigger_cleanup(q);
}
}
if (do_destroy)
__guc_exec_queue_destroy(guc, q);
}
int xe_guc_submit_reset_prepare(struct xe_guc *guc)
static int guc_submit_reset_prepare(struct xe_guc *guc)
{
int ret;
if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
return 0;
if (!guc->submission_state.initialized)
return 0;
/*
* Using an atomic here rather than submission_state.lock as this
* function can be called while holding the CT lock (engine reset
@ -2367,6 +2394,17 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
return ret;
}
int xe_guc_submit_reset_prepare(struct xe_guc *guc)
{
if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
return 0;
if (!guc->submission_state.initialized)
return 0;
return guc_submit_reset_prepare(guc);
}
void xe_guc_submit_reset_wait(struct xe_guc *guc)
{
wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) ||
@ -2763,8 +2801,7 @@ void xe_guc_submit_pause_abort(struct xe_guc *guc)
continue;
xe_sched_submission_start(sched);
if (exec_queue_killed_or_banned_or_wedged(q))
xe_guc_exec_queue_trigger_cleanup(q);
guc_exec_queue_kill(q);
}
mutex_unlock(&guc->submission_state.lock);
}

View File

@ -176,11 +176,18 @@ static bool xe_i2c_irq_present(struct xe_device *xe)
*/
void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl)
{
if (!xe_i2c_irq_present(xe))
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
if (!(master_ctl & I2C_IRQ) || !xe_i2c_irq_present(xe))
return;
if (master_ctl & I2C_IRQ)
generic_handle_irq_safe(xe->i2c->adapter_irq);
/* Forward interrupt to I2C adapter */
generic_handle_irq_safe(xe->i2c->adapter_irq);
/* Deassert after I2C adapter clears the interrupt */
xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_INTX_DISABLE);
/* Reassert to allow subsequent interrupt generation */
xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, PCI_COMMAND_INTX_DISABLE, 0);
}
void xe_i2c_irq_reset(struct xe_device *xe)
@ -190,6 +197,7 @@ void xe_i2c_irq_reset(struct xe_device *xe)
if (!xe_i2c_irq_present(xe))
return;
xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_INTX_DISABLE);
xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, ACPI_INTR_EN, 0);
}
@ -201,6 +209,7 @@ void xe_i2c_irq_postinstall(struct xe_device *xe)
return;
xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, 0, ACPI_INTR_EN);
xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, PCI_COMMAND_INTX_DISABLE, 0);
}
static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq,

View File

@ -28,6 +28,7 @@
#include "xe_map.h"
#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_ring_ops.h"
#include "xe_sriov.h"
#include "xe_trace_lrc.h"
#include "xe_vm.h"
@ -94,6 +95,9 @@ gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
class, NULL))
return true;
if (gt->ring_ops[class]->emit_aux_table_inv)
return true;
return false;
}
@ -1217,6 +1221,23 @@ static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc,
return cmd - batch;
}
static ssize_t setup_invalidate_auxccs_wa(struct xe_lrc *lrc,
struct xe_hw_engine *hwe,
u32 *batch, size_t max_len)
{
struct xe_gt *gt = lrc->gt;
u32 *(*emit)(struct xe_gt *gt, u32 *cmd) =
gt->ring_ops[hwe->class]->emit_aux_table_inv;
if (!emit)
return 0;
if (xe_gt_WARN_ON(gt, max_len < 8))
return -ENOSPC;
return emit(gt, batch) - batch;
}
struct bo_setup {
ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
u32 *batch, size_t max_size);
@ -1349,9 +1370,11 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
{
static const struct bo_setup rcs_funcs[] = {
{ .setup = setup_timestamp_wa },
{ .setup = setup_invalidate_auxccs_wa },
{ .setup = setup_configfs_mid_ctx_restore_bb },
};
static const struct bo_setup xcs_funcs[] = {
{ .setup = setup_invalidate_auxccs_wa },
{ .setup = setup_configfs_mid_ctx_restore_bb },
};
struct bo_setup_state state = {
@ -1607,8 +1630,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_v
bo = xe_bo_create_pin_map_novm(xe, tile, bo_size,
ttm_bo_type_kernel,
bo_flags, false);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
if (IS_ERR(bo))
return PTR_ERR(bo);
lrc->bo = bo;
@ -1902,6 +1925,7 @@ static int instr_dw(u32 cmd_header)
static int dump_mi_command(struct drm_printer *p,
struct xe_gt *gt,
u32 *start,
u32 *dw,
int remaining_dw)
{
@ -1917,15 +1941,18 @@ static int dump_mi_command(struct drm_printer *p,
while (num_noop < remaining_dw &&
(*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
num_noop++;
drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
drm_printf(p, "LRC[%#5tx] = [%#010x] MI_NOOP (%d dwords)\n",
dw - num_noop - start, inst_header, num_noop);
return num_noop;
case MI_TOPOLOGY_FILTER:
drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
drm_printf(p, "LRC[%#5tx] = [%#010x] MI_TOPOLOGY_FILTER\n",
dw - start, inst_header);
return 1;
case MI_BATCH_BUFFER_END:
drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
drm_printf(p, "LRC[%#5tx] = [%#010x] MI_BATCH_BUFFER_END\n",
dw - start, inst_header);
/* Return 'remaining_dw' to consume the rest of the LRC */
return remaining_dw;
}
@ -1939,39 +1966,43 @@ static int dump_mi_command(struct drm_printer *p,
switch (inst_header & MI_OPCODE) {
case MI_LOAD_REGISTER_IMM:
drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
inst_header, (numdw - 1) / 2);
drm_printf(p, "LRC[%#5tx] = [%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
dw - start, inst_header, (numdw - 1) / 2);
for (int i = 1; i < numdw; i += 2)
drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
drm_printf(p, "LRC[%#5tx] = - %#6x = %#010x\n",
&dw[i] - start, dw[i], dw[i + 1]);
return numdw;
case MI_LOAD_REGISTER_MEM & MI_OPCODE:
drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
inst_header,
drm_printf(p, "LRC[%#5tx] = [%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
dw - start, inst_header,
dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
if (numdw == 4)
drm_printf(p, " - %#6x = %#010llx\n",
drm_printf(p, "LRC[%#5tx] = - %#6x = %#010llx\n",
dw - start,
dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
else
drm_printf(p, " - %*ph (%s)\n",
(int)sizeof(u32) * (numdw - 1), dw + 1,
numdw < 4 ? "truncated" : "malformed");
drm_printf(p, "LRC[%#5tx] = - %*ph (%s)\n",
dw - start, (int)sizeof(u32) * (numdw - 1),
dw + 1, numdw < 4 ? "truncated" : "malformed");
return numdw;
case MI_FORCE_WAKEUP:
drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
drm_printf(p, "LRC[%#5tx] = [%#010x] MI_FORCE_WAKEUP\n",
dw - start, inst_header);
return numdw;
default:
drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
inst_header, opcode, numdw);
drm_printf(p, "LRC[%#5tx] = [%#010x] unknown MI opcode %#x, likely %d dwords\n",
dw - start, inst_header, opcode, numdw);
return numdw;
}
}
static int dump_gfxpipe_command(struct drm_printer *p,
struct xe_gt *gt,
u32 *start,
u32 *dw,
int remaining_dw)
{
@ -1990,11 +2021,13 @@ static int dump_gfxpipe_command(struct drm_printer *p,
switch (*dw & GFXPIPE_MATCH_MASK) {
#define MATCH(cmd) \
case cmd: \
drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
drm_printf(p, "LRC[%#5tx] = [%#010x] " #cmd " (%d dwords)\n", \
dw - start, *dw, numdw); \
return numdw
#define MATCH3D(cmd) \
case CMD_##cmd: \
drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
drm_printf(p, "LRC[%#5tx] = [%#010x] " #cmd " (%d dwords)\n", \
dw - start, *dw, numdw); \
return numdw
MATCH(STATE_BASE_ADDRESS);
@ -2126,14 +2159,15 @@ static int dump_gfxpipe_command(struct drm_printer *p,
MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTER_2);
default:
drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
*dw, pipeline, opcode, subopcode, numdw);
drm_printf(p, "LRC[%#5tx] = [%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
dw - start, *dw, pipeline, opcode, subopcode, numdw);
return numdw;
}
}
static int dump_gfx_state_command(struct drm_printer *p,
struct xe_gt *gt,
u32 *start,
u32 *dw,
int remaining_dw)
{
@ -2151,8 +2185,8 @@ static int dump_gfx_state_command(struct drm_printer *p,
MATCH(STATE_WRITE_INLINE);
default:
drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
*dw, opcode, numdw);
drm_printf(p, "LRC[%#5tx] = [%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
dw - start, *dw, opcode, numdw);
return numdw;
}
}
@ -2161,7 +2195,7 @@ void xe_lrc_dump_default(struct drm_printer *p,
struct xe_gt *gt,
enum xe_engine_class hwe_class)
{
u32 *dw;
u32 *dw, *start;
int remaining_dw, num_dw;
if (!gt->default_lrc[hwe_class]) {
@ -2174,18 +2208,20 @@ void xe_lrc_dump_default(struct drm_printer *p,
* hardware status page.
*/
dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
start = dw;
remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
while (remaining_dw > 0) {
if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
num_dw = dump_mi_command(p, gt, dw, remaining_dw);
num_dw = dump_mi_command(p, gt, start, dw, remaining_dw);
} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
num_dw = dump_gfxpipe_command(p, gt, start, dw, remaining_dw);
} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
num_dw = dump_gfx_state_command(p, gt, start, dw, remaining_dw);
} else {
num_dw = min(instr_dw(*dw), remaining_dw);
drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
drm_printf(p, "LRC[%#5tx] = [%#10x] Unknown instruction of type %#x, likely %d dwords\n",
dw - start,
*dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
num_dw);
}
@ -2563,14 +2599,14 @@ static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
* @lrc: Pointer to the lrc.
*
* Return latest ctx timestamp. With support for active contexts, the
* calculation may bb slightly racy, so follow a read-again logic to ensure that
* calculation may be slightly racy, so follow a read-again logic to ensure that
* the context is still active before returning the right timestamp.
*
* Returns: New ctx timestamp value
*/
u64 xe_lrc_timestamp(struct xe_lrc *lrc)
{
u64 lrc_ts, reg_ts, new_ts;
u64 lrc_ts, reg_ts, new_ts = lrc->ctx_timestamp;
u32 engine_id;
lrc_ts = xe_lrc_ctx_timestamp(lrc);

View File

@ -543,8 +543,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf,
size_t offset = 0;
int ret;
/* Can't read from disabled streams */
if (!stream->enabled || !stream->sample)
if (!stream->sample)
return -EINVAL;
if (!(file->f_flags & O_NONBLOCK)) {
@ -1460,6 +1459,10 @@ static void xe_oa_stream_disable(struct xe_oa_stream *stream)
if (stream->sample)
hrtimer_cancel(&stream->poll_check_timer);
/* Update stream->oa_buffer.tail to allow any final reports to be read */
if (xe_oa_buffer_check_unlocked(stream))
wake_up(&stream->poll_wq);
}
static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream)

View File

@ -11,6 +11,7 @@
#include "xe_page_reclaim.h"
#include "xe_gt_stats.h"
#include "xe_guc_tlb_inval.h"
#include "xe_macros.h"
#include "xe_pat.h"
#include "xe_sa.h"
@ -26,12 +27,18 @@
* flushes.
* - pat_index is transient display (1)
*
* For cases of NULL VMA, there should be no corresponding PRL entry
* so skip over.
*
* Return: true when page reclamation is unnecessary, false otherwise.
*/
bool xe_page_reclaim_skip(struct xe_tile *tile, struct xe_vma *vma)
{
u8 l3_policy;
if (xe_vma_is_null(vma))
return true;
l3_policy = xe_pat_index_get_l3_policy(tile->xe, vma->attr.pat_index);
/*
@ -130,3 +137,22 @@ int xe_page_reclaim_list_alloc_entries(struct xe_page_reclaim_list *prl)
return page ? 0 : -ENOMEM;
}
/**
* xe_guc_page_reclaim_done_handler() - Page reclaim done handler
* @guc: guc
* @msg: message indicating page reclamation done
* @len: length of message
*
* Page reclamation is an extension of TLB invalidation. Both
* operations share the same seqno and fence. When either
* action completes, we need to signal the corresponding
* fence. Since the handling logic is currently identical, this
* function delegates to the TLB invalidation handler.
*
* Return: 0 on success, -EPROTO for malformed messages.
*/
int xe_guc_page_reclaim_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
{
return xe_guc_tlb_inval_done_handler(guc, msg, len);
}

View File

@ -20,6 +20,7 @@ struct xe_tlb_inval;
struct xe_tlb_inval_fence;
struct xe_tile;
struct xe_gt;
struct xe_guc;
struct xe_vma;
struct xe_guc_page_reclaim_entry {
@ -122,4 +123,6 @@ static inline void xe_page_reclaim_entries_put(struct xe_guc_page_reclaim_entry
put_page(virt_to_page(entries));
}
int xe_guc_page_reclaim_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
#endif /* _XE_PAGE_RECLAIM_H_ */

View File

@ -187,6 +187,12 @@ static int xe_pagefault_service(struct xe_pagefault *pf)
goto unlock_vm;
}
if (xe_vma_read_only(vma) &&
pf->consumer.access_type != XE_PAGEFAULT_ACCESS_TYPE_READ) {
err = -EPERM;
goto unlock_vm;
}
atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type);
if (xe_vma_is_cpu_addr_mirror(vma))
@ -244,6 +250,31 @@ static void xe_pagefault_print(struct xe_pagefault *pf)
pf->consumer.engine_instance);
}
static void xe_pagefault_save_to_vm(struct xe_device *xe, struct xe_pagefault *pf)
{
struct xe_vm *vm;
/*
* Pagefault may be asociated to VM that is not in fault mode.
* Perform asid_to_vm behavior, except if VM is not in fault
* mode, return VM anyways.
*/
down_read(&xe->usm.lock);
vm = xa_load(&xe->usm.asid_to_vm, pf->consumer.asid);
if (vm)
xe_vm_get(vm);
else
vm = ERR_PTR(-EINVAL);
up_read(&xe->usm.lock);
if (IS_ERR(vm))
return;
xe_vm_add_fault_entry_pf(vm, pf);
xe_vm_put(vm);
}
static void xe_pagefault_queue_work(struct work_struct *w)
{
struct xe_pagefault_queue *pf_queue =
@ -262,6 +293,7 @@ static void xe_pagefault_queue_work(struct work_struct *w)
err = xe_pagefault_service(&pf);
if (err) {
xe_pagefault_save_to_vm(gt_to_xe(pf.gt), &pf);
if (!(pf.consumer.access_type & XE_PAGEFAULT_ACCESS_PREFETCH)) {
xe_pagefault_print(&pf);
xe_gt_info(pf.gt, "Fault response: Unsuccessful %pe\n",

View File

@ -92,7 +92,7 @@ struct xe_pat_ops {
};
static const struct xe_pat_table_entry xelp_pat_table[] = {
[0] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
[0] = { XELP_PAT_WB, XE_COH_1WAY },
[1] = { XELP_PAT_WC, XE_COH_NONE },
[2] = { XELP_PAT_WT, XE_COH_NONE },
[3] = { XELP_PAT_UC, XE_COH_NONE },
@ -102,19 +102,19 @@ static const struct xe_pat_table_entry xehpc_pat_table[] = {
[0] = { XELP_PAT_UC, XE_COH_NONE },
[1] = { XELP_PAT_WC, XE_COH_NONE },
[2] = { XELP_PAT_WT, XE_COH_NONE },
[3] = { XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
[3] = { XELP_PAT_WB, XE_COH_1WAY },
[4] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WT, XE_COH_NONE },
[5] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
[5] = { XEHPC_PAT_CLOS(1) | XELP_PAT_WB, XE_COH_1WAY },
[6] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WT, XE_COH_NONE },
[7] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WB, XE_COH_AT_LEAST_1WAY },
[7] = { XEHPC_PAT_CLOS(2) | XELP_PAT_WB, XE_COH_1WAY },
};
static const struct xe_pat_table_entry xelpg_pat_table[] = {
[0] = { XELPG_PAT_0_WB, XE_COH_NONE },
[1] = { XELPG_PAT_1_WT, XE_COH_NONE },
[2] = { XELPG_PAT_3_UC, XE_COH_NONE },
[3] = { XELPG_PAT_0_WB | XELPG_2_COH_1W, XE_COH_AT_LEAST_1WAY },
[4] = { XELPG_PAT_0_WB | XELPG_3_COH_2W, XE_COH_AT_LEAST_1WAY },
[3] = { XELPG_PAT_0_WB | XELPG_2_COH_1W, XE_COH_1WAY },
[4] = { XELPG_PAT_0_WB | XELPG_3_COH_2W, XE_COH_2WAY },
};
/*
@ -147,7 +147,7 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
REG_FIELD_PREP(XE2_L3_POLICY, l3_policy) | \
REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
.coh_mode = __coh_mode ? XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \
.coh_mode = __coh_mode ? __coh_mode : XE_COH_NONE, \
.valid = 1 \
}

View File

@ -28,8 +28,9 @@ struct xe_pat_table_entry {
/**
* @coh_mode: The GPU coherency mode that @value maps to.
*/
#define XE_COH_NONE 1
#define XE_COH_AT_LEAST_1WAY 2
#define XE_COH_NONE 1
#define XE_COH_1WAY 2
#define XE_COH_2WAY 3
u16 coh_mode;
/**

View File

@ -1442,9 +1442,9 @@ static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op,
err = vma_check_userptr(vm, op->map.vma, pt_update);
break;
case DRM_GPUVA_OP_REMAP:
if (op->remap.prev)
if (op->remap.prev && !op->remap.skip_prev)
err = vma_check_userptr(vm, op->remap.prev, pt_update);
if (!err && op->remap.next)
if (!err && op->remap.next && !op->remap.skip_next)
err = vma_check_userptr(vm, op->remap.next, pt_update);
break;
case DRM_GPUVA_OP_UNMAP:
@ -1655,14 +1655,35 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
XE_WARN_ON(!level);
/* Check for leaf node */
if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) &&
(!xe_child->base.children || !xe_child->base.children[first])) {
xe_child->level <= MAX_HUGEPTE_LEVEL) {
struct iosys_map *leaf_map = &xe_child->bo->vmap;
pgoff_t count = xe_pt_num_entries(addr, next, xe_child->level, walk);
for (pgoff_t i = 0; i < count; i++) {
u64 pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64);
u64 pte;
int ret;
/*
* If not a leaf pt, skip unless non-leaf pt is interleaved between
* leaf ptes which causes the page walk to skip over the child leaves
*/
if (xe_child->base.children && xe_child->base.children[first + i]) {
u64 pt_size = 1ULL << walk->shifts[xe_child->level];
bool edge_pt = (i == 0 && !IS_ALIGNED(addr, pt_size)) ||
(i == count - 1 && !IS_ALIGNED(next, pt_size));
if (!edge_pt) {
xe_page_reclaim_list_abort(xe_walk->tile->primary_gt,
xe_walk->prl,
"PT is skipped by walk at level=%u offset=%lu",
xe_child->level, first + i);
break;
}
continue;
}
pte = xe_map_rd(xe, leaf_map, (first + i) * sizeof(u64), u64);
/*
* In rare scenarios, pte may not be written yet due to racy conditions.
* In such cases, invalidate the PRL and fallback to full PPC invalidation.
@ -1674,9 +1695,8 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
}
/* Ensure it is a defined page */
xe_tile_assert(xe_walk->tile,
xe_child->level == 0 ||
(pte & (XE_PTE_PS64 | XE_PDE_PS_2M | XE_PDPE_PS_1G)));
xe_tile_assert(xe_walk->tile, xe_child->level == 0 ||
(pte & (XE_PDE_PS_2M | XE_PDPE_PS_1G)));
/* An entry should be added for 64KB but contigious 4K have XE_PTE_PS64 */
if (pte & XE_PTE_PS64)
@ -1701,11 +1721,11 @@ static int xe_pt_stage_unbind_entry(struct xe_ptw *parent, pgoff_t offset,
killed = xe_pt_check_kill(addr, next, level - 1, xe_child, action, walk);
/*
* Verify PRL is active and if entry is not a leaf pte (base.children conditions),
* there is a potential need to invalidate the PRL if any PTE (num_live) are dropped.
* Verify if any PTE are potentially dropped at non-leaf levels, either from being
* killed or the page walk covers the region.
*/
if (xe_walk->prl && level > 1 && xe_child->num_live &&
xe_child->base.children && xe_child->base.children[first]) {
if (xe_walk->prl && xe_page_reclaim_list_valid(xe_walk->prl) &&
xe_child->level > MAX_HUGEPTE_LEVEL && xe_child->num_live) {
bool covered = xe_pt_covers(addr, next, xe_child->level, &xe_walk->base);
/*
@ -2178,12 +2198,12 @@ static int op_prepare(struct xe_vm *vm,
err = unbind_op_prepare(tile, pt_update_ops, old);
if (!err && op->remap.prev) {
if (!err && op->remap.prev && !op->remap.skip_prev) {
err = bind_op_prepare(vm, tile, pt_update_ops,
op->remap.prev, false);
pt_update_ops->wait_vm_bookkeep = true;
}
if (!err && op->remap.next) {
if (!err && op->remap.next && !op->remap.skip_next) {
err = bind_op_prepare(vm, tile, pt_update_ops,
op->remap.next, false);
pt_update_ops->wait_vm_bookkeep = true;
@ -2408,10 +2428,10 @@ static void op_commit(struct xe_vm *vm,
unbind_op_commit(vm, tile, pt_update_ops, old, fence, fence2);
if (op->remap.prev)
if (op->remap.prev && !op->remap.skip_prev)
bind_op_commit(vm, tile, pt_update_ops, op->remap.prev,
fence, fence2, false);
if (op->remap.next)
if (op->remap.next && !op->remap.skip_next)
bind_op_commit(vm, tile, pt_update_ops, op->remap.next,
fence, fence2, false);
break;

View File

@ -48,15 +48,48 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | BIT(8) | state;
}
static int emit_aux_table_inv(struct xe_gt *gt, struct xe_reg reg,
u32 *dw, int i)
static u32 *
__emit_aux_table_inv(u32 *cmd, const struct xe_reg reg, u32 adj_offset)
{
dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_MMIO_REMAP_EN;
dw[i++] = reg.addr + gt->mmio.adj_offset;
dw[i++] = AUX_INV;
dw[i++] = MI_NOOP;
*cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) |
MI_LRI_MMIO_REMAP_EN;
*cmd++ = reg.addr + adj_offset;
*cmd++ = AUX_INV;
*cmd++ = MI_SEMAPHORE_WAIT_TOKEN | MI_SEMAPHORE_REGISTER_POLL |
MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_EQ_SDD;
*cmd++ = 0;
*cmd++ = reg.addr + adj_offset;
*cmd++ = 0;
*cmd++ = 0;
return i;
return cmd;
}
static u32 *emit_aux_table_inv_render_compute(struct xe_gt *gt, u32 *cmd)
{
return __emit_aux_table_inv(cmd, CCS_AUX_INV, gt->mmio.adj_offset);
}
static u32 *emit_aux_table_inv_video_decode(struct xe_gt *gt, u32 *cmd)
{
return __emit_aux_table_inv(cmd, VD0_AUX_INV, gt->mmio.adj_offset);
}
static u32 *emit_aux_table_inv_video_enhance(struct xe_gt *gt, u32 *cmd)
{
return __emit_aux_table_inv(cmd, VE0_AUX_INV, gt->mmio.adj_offset);
}
static int emit_aux_table_inv(struct xe_hw_engine *hwe, u32 *dw, int i)
{
struct xe_gt *gt = hwe->gt;
u32 *(*emit)(struct xe_gt *gt, u32 *cmd) =
gt->ring_ops[hwe->class]->emit_aux_table_inv;
if (emit)
return emit(gt, dw + i) - dw;
else
return i;
}
static int emit_user_interrupt(u32 *dw, int i)
@ -256,6 +289,32 @@ static int emit_copy_timestamp(struct xe_device *xe, struct xe_lrc *lrc,
return i;
}
static int emit_fake_watchdog(struct xe_lrc *lrc, u32 *dw, int i)
{
/*
* Setup a watchdog with impossible condition to always trigger an
* hardware interrupt that would force the GuC to reset the engine.
*/
dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | MI_LRI_LRM_CS_MMIO;
dw[i++] = PR_CTR_THRSH(0).addr;
dw[i++] = 2; /* small threshold */
dw[i++] = PR_CTR_CTRL(0).addr;
dw[i++] = CTR_LOGIC_OP(START);
dw[i++] = MI_SEMAPHORE_WAIT | MI_SEMW_GGTT | MI_SEMW_POLL | MI_SEMW_COMPARE(SAD_EQ_SDD);
dw[i++] = 0xdead; /* this should never be seen */
dw[i++] = lower_32_bits(xe_lrc_ggtt_addr(lrc));
dw[i++] = upper_32_bits(xe_lrc_ggtt_addr(lrc));
dw[i++] = 0; /* unused token */
dw[i++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1) | MI_LRI_LRM_CS_MMIO;
dw[i++] = PR_CTR_CTRL(0).addr;
dw[i++] = CTR_LOGIC_OP(STOP);
return i;
}
/* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc,
u64 batch_addr, u32 *head, u32 seqno)
@ -266,6 +325,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
*head = lrc->ring.tail;
if (job->ring_ops_force_reset)
i = emit_fake_watchdog(lrc, dw, i);
i = emit_copy_timestamp(gt_to_xe(gt), lrc, dw, i);
if (job->ring_ops_flush_tlb) {
@ -305,9 +367,9 @@ static bool has_aux_ccs(struct xe_device *xe)
* PVC is a special case that has no compression of either type
* (FlatCCS or AuxCCS). Also, AuxCCS is no longer used from Xe2
* onward, so any future platforms with no FlatCCS will not have
* AuxCCS either.
* AuxCCS, and we explicitly do not want to support it on MTL.
*/
if (GRAPHICS_VER(xe) >= 20 || xe->info.platform == XE_PVC)
if (GRAPHICS_VERx100(xe) >= 1270 || xe->info.platform == XE_PVC)
return false;
return !xe->info.has_flat_ccs;
@ -320,21 +382,18 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
u32 ppgtt_flag = get_ppgtt_flag(job);
struct xe_gt *gt = job->q->gt;
struct xe_device *xe = gt_to_xe(gt);
bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
*head = lrc->ring.tail;
if (job->ring_ops_force_reset)
i = emit_fake_watchdog(lrc, dw, i);
i = emit_copy_timestamp(xe, lrc, dw, i);
dw[i++] = preparser_disable(true);
/* hsdes: 1809175790 */
if (has_aux_ccs(xe)) {
if (decode)
i = emit_aux_table_inv(gt, VD0_AUX_INV, dw, i);
else
i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
}
i = emit_aux_table_inv(job->q->hwe, dw, i);
if (job->ring_ops_flush_tlb)
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
@ -381,8 +440,18 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
*head = lrc->ring.tail;
if (job->ring_ops_force_reset)
i = emit_fake_watchdog(lrc, dw, i);
i = emit_copy_timestamp(xe, lrc, dw, i);
/*
* On AuxCCS platforms the invalidation of the Aux table requires
* quiescing the memory traffic beforehand.
*/
if (has_aux_ccs(xe))
i = emit_render_cache_flush(job, dw, i);
dw[i++] = preparser_disable(true);
if (lacks_render)
mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
@ -393,8 +462,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
i = emit_pipe_invalidate(job->q, mask_flags, job->ring_ops_flush_tlb, dw, i);
/* hsdes: 1809175790 */
if (has_aux_ccs(xe))
i = emit_aux_table_inv(gt, CCS_AUX_INV, dw, i);
i = emit_aux_table_inv(job->q->hwe, dw, i);
dw[i++] = preparser_disable(false);
@ -433,6 +501,8 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
*head = lrc->ring.tail;
xe_gt_assert(gt, !job->ring_ops_force_reset);
i = emit_copy_timestamp(xe, lrc, dw, i);
i = emit_store_imm_ggtt(saddr, seqno, dw, i);
@ -519,7 +589,11 @@ static const struct xe_ring_ops ring_ops_gen12_copy = {
.emit_job = emit_job_gen12_copy,
};
static const struct xe_ring_ops ring_ops_gen12_video = {
static const struct xe_ring_ops ring_ops_gen12_video_decode = {
.emit_job = emit_job_gen12_video,
};
static const struct xe_ring_ops ring_ops_gen12_video_enhance = {
.emit_job = emit_job_gen12_video,
};
@ -527,20 +601,47 @@ static const struct xe_ring_ops ring_ops_gen12_render_compute = {
.emit_job = emit_job_gen12_render_compute,
};
static const struct xe_ring_ops auxccs_ring_ops_gen12_video_decode = {
.emit_job = emit_job_gen12_video,
.emit_aux_table_inv = emit_aux_table_inv_video_decode,
};
static const struct xe_ring_ops auxccs_ring_ops_gen12_video_enhance = {
.emit_job = emit_job_gen12_video,
.emit_aux_table_inv = emit_aux_table_inv_video_enhance,
};
static const struct xe_ring_ops auxccs_ring_ops_gen12_render_compute = {
.emit_job = emit_job_gen12_render_compute,
.emit_aux_table_inv = emit_aux_table_inv_render_compute,
};
const struct xe_ring_ops *
xe_ring_ops_get(struct xe_gt *gt, enum xe_engine_class class)
{
struct xe_device *xe = gt_to_xe(gt);
switch (class) {
case XE_ENGINE_CLASS_OTHER:
return &ring_ops_gen12_gsc;
case XE_ENGINE_CLASS_COPY:
return &ring_ops_gen12_copy;
case XE_ENGINE_CLASS_VIDEO_DECODE:
if (has_aux_ccs(xe))
return &auxccs_ring_ops_gen12_video_decode;
else
return &ring_ops_gen12_video_decode;
case XE_ENGINE_CLASS_VIDEO_ENHANCE:
return &ring_ops_gen12_video;
if (has_aux_ccs(xe))
return &auxccs_ring_ops_gen12_video_enhance;
else
return &ring_ops_gen12_video_enhance;
case XE_ENGINE_CLASS_RENDER:
case XE_ENGINE_CLASS_COMPUTE:
return &ring_ops_gen12_render_compute;
if (has_aux_ccs(xe))
return &auxccs_ring_ops_gen12_render_compute;
else
return &ring_ops_gen12_render_compute;
default:
return NULL;
}

View File

@ -6,9 +6,12 @@
#ifndef _XE_RING_OPS_TYPES_H_
#define _XE_RING_OPS_TYPES_H_
#include <linux/types.h>
struct xe_gt;
struct xe_sched_job;
#define MAX_JOB_SIZE_DW 58
#define MAX_JOB_SIZE_DW 74
#define MAX_JOB_SIZE_BYTES (MAX_JOB_SIZE_DW * 4)
/**
@ -17,6 +20,9 @@ struct xe_sched_job;
struct xe_ring_ops {
/** @emit_job: Write job to ring */
void (*emit_job)(struct xe_sched_job *job);
/** @emit_aux_table_inv: Emit aux table invalidation to the ring */
u32 *(*emit_aux_table_inv)(struct xe_gt *gt, u32 *cmd);
};
#endif

View File

@ -63,6 +63,8 @@ struct xe_sched_job {
u64 sample_timestamp;
/** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
bool ring_ops_flush_tlb;
/** @ring_ops_force_reset: The ring ops need to trigger a reset before payload. */
bool ring_ops_force_reset;
/** @ggtt: mapped in ggtt. */
bool ggtt;
/** @restore_replay: job being replayed for restore */

View File

@ -341,6 +341,8 @@ ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid,
ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data);
if (ret) {
xe_sriov_packet_free(*data);
*data = NULL;
return ret;
}

View File

@ -123,6 +123,30 @@ int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid)
return result;
}
/**
* xe_sriov_pf_control_prepare_flr() - Notify PF that VF FLR prepare has started.
* @xe: the &xe_device
* @vfid: the VF identifier
*
* This function is for PF only.
*
* Return: 0 on success or a negative error code on failure.
*/
int xe_sriov_pf_control_prepare_flr(struct xe_device *xe, unsigned int vfid)
{
struct xe_gt *gt;
unsigned int id;
int result = 0;
int err;
for_each_gt(gt, xe, id) {
err = xe_gt_sriov_pf_control_prepare_flr(gt, vfid);
result = result ? -EUCLEAN : err;
}
return result;
}
/**
* xe_sriov_pf_control_wait_flr() - Wait for a VF reset (FLR) to complete.
* @xe: the &xe_device

View File

@ -12,6 +12,7 @@ int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_prepare_flr(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid);
int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid);

View File

@ -42,6 +42,7 @@ _type xe_sriov_vfio_##_func(struct xe_device *xe, unsigned int vfid) \
EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_##_func, "xe-vfio-pci")
DEFINE_XE_SRIOV_VFIO_FUNCTION(int, wait_flr_done, control_wait_flr);
DEFINE_XE_SRIOV_VFIO_FUNCTION(int, flr_prepare, control_prepare_flr);
DEFINE_XE_SRIOV_VFIO_FUNCTION(int, suspend_device, control_pause_vf);
DEFINE_XE_SRIOV_VFIO_FUNCTION(int, resume_device, control_resume_vf);
DEFINE_XE_SRIOV_VFIO_FUNCTION(int, stop_copy_enter, control_trigger_save_vf);

View File

@ -485,10 +485,33 @@ static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt,
const enum xe_svm_copy_dir dir,
int kb)
{
if (dir == XE_SVM_COPY_TO_VRAM)
if (dir == XE_SVM_COPY_TO_VRAM) {
switch (kb) {
case 4:
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_KB, kb);
break;
case 64:
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_KB, kb);
break;
case 2048:
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_KB, kb);
break;
}
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb);
else
} else {
switch (kb) {
case 4:
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_KB, kb);
break;
case 64:
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_KB, kb);
break;
case 2048:
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_KB, kb);
break;
}
xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb);
}
}
static void xe_svm_copy_us_stats_incr(struct xe_gt *gt,

View File

@ -157,23 +157,19 @@ static int vf_uc_load_hw(struct xe_uc *uc)
err = xe_gt_sriov_vf_connect(uc_to_gt(uc));
if (err)
goto err_out;
return err;
uc->guc.submission_state.enabled = true;
err = xe_guc_opt_in_features_enable(&uc->guc);
if (err)
goto err_out;
return err;
err = xe_gt_record_default_lrcs(uc_to_gt(uc));
if (err)
goto err_out;
return err;
return 0;
err_out:
xe_guc_sanitize(&uc->guc);
return err;
}
/*
@ -205,19 +201,19 @@ int xe_uc_load_hw(struct xe_uc *uc)
ret = xe_gt_record_default_lrcs(uc_to_gt(uc));
if (ret)
goto err_out;
return ret;
ret = xe_guc_post_load_init(&uc->guc);
if (ret)
goto err_out;
return ret;
ret = xe_guc_pc_start(&uc->guc.pc);
if (ret)
goto err_out;
return ret;
ret = xe_guc_rc_enable(&uc->guc);
if (ret)
goto err_out;
return ret;
xe_guc_engine_activity_enable_stats(&uc->guc);
@ -232,10 +228,6 @@ int xe_uc_load_hw(struct xe_uc *uc)
xe_gsc_load_start(&uc->gsc);
return 0;
err_out:
xe_guc_sanitize(&uc->guc);
return ret;
}
int xe_uc_reset_prepare(struct xe_uc *uc)

View File

@ -27,6 +27,7 @@
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
#include "xe_gt.h"
#include "xe_migrate.h"
#include "xe_pat.h"
#include "xe_pm.h"
@ -577,6 +578,74 @@ static void preempt_rebind_work_func(struct work_struct *w)
trace_xe_vm_rebind_worker_exit(vm);
}
/**
* xe_vm_add_fault_entry_pf() - Add pagefault to vm fault list
* @vm: The VM.
* @pf: The pagefault.
*
* This function takes the data from the pagefault @pf and saves it to @vm->faults.list.
*
* The function exits silently if the list is full, and reports a warning if the pagefault
* could not be saved to the list.
*/
void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf)
{
struct xe_vm_fault_entry *e;
struct xe_hw_engine *hwe;
/* Do not report faults on reserved engines */
hwe = xe_gt_hw_engine(pf->gt, pf->consumer.engine_class,
pf->consumer.engine_instance, false);
if (!hwe || xe_hw_engine_is_reserved(hwe))
return;
e = kzalloc_obj(*e);
if (!e) {
drm_warn(&vm->xe->drm,
"Could not allocate memory for fault!\n");
return;
}
guard(spinlock)(&vm->faults.lock);
/*
* Limit the number of faults in the fault list to prevent
* memory overuse.
*/
if (vm->faults.len >= MAX_FAULTS_SAVED_PER_VM) {
kfree(e);
return;
}
e->address = pf->consumer.page_addr;
/*
* TODO:
* Address precision is currently always SZ_4K, but this may change
* in the future.
*/
e->address_precision = SZ_4K;
e->access_type = pf->consumer.access_type;
e->fault_type = FIELD_GET(XE_PAGEFAULT_TYPE_MASK,
pf->consumer.fault_type_level),
e->fault_level = FIELD_GET(XE_PAGEFAULT_LEVEL_MASK,
pf->consumer.fault_type_level),
list_add_tail(&e->list, &vm->faults.list);
vm->faults.len++;
}
static void xe_vm_clear_fault_entries(struct xe_vm *vm)
{
struct xe_vm_fault_entry *e, *tmp;
guard(spinlock)(&vm->faults.lock);
list_for_each_entry_safe(e, tmp, &vm->faults.list, list) {
list_del(&e->list);
kfree(e);
}
vm->faults.len = 0;
}
static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
{
int i;
@ -1538,6 +1607,9 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
INIT_LIST_HEAD(&vm->userptr.invalidated);
spin_lock_init(&vm->userptr.invalidated_lock);
INIT_LIST_HEAD(&vm->faults.list);
spin_lock_init(&vm->faults.lock);
ttm_lru_bulk_move_init(&vm->lru_bulk_move);
INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
@ -1854,6 +1926,8 @@ void xe_vm_close_and_put(struct xe_vm *vm)
}
up_write(&xe->usm.lock);
xe_vm_clear_fault_entries(vm);
for_each_tile(tile, xe, id)
xe_range_fence_tree_fini(&vm->rftree[id]);
@ -2584,7 +2658,6 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
if (!err && op->remap.skip_prev) {
op->remap.prev->tile_present =
tile_present;
op->remap.prev = NULL;
}
}
if (op->remap.next) {
@ -2594,11 +2667,13 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
if (!err && op->remap.skip_next) {
op->remap.next->tile_present =
tile_present;
op->remap.next = NULL;
}
}
/* Adjust for partial unbind after removing VMA from VM */
/*
* Adjust for partial unbind after removing VMA from VM. In case
* of unwind we might need to undo this later.
*/
if (!err) {
op->base.remap.unmap->va->va.addr = op->remap.start;
op->base.remap.unmap->va->va.range = op->remap.range;
@ -2717,6 +2792,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
op->remap.start = xe_vma_start(old);
op->remap.range = xe_vma_size(old);
op->remap.old_start = op->remap.start;
op->remap.old_range = op->remap.range;
flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
if (op->base.remap.prev) {
@ -2865,8 +2942,19 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
xe_svm_notifier_lock(vm);
vma->gpuva.flags &= ~XE_VMA_DESTROYED;
xe_svm_notifier_unlock(vm);
if (post_commit)
if (post_commit) {
/*
* Restore the old va range, in case of the
* prev/next skip optimisation. Otherwise what
* we re-insert here could be smaller than the
* original range.
*/
op->base.remap.unmap->va->va.addr =
op->remap.old_start;
op->base.remap.unmap->va->va.range =
op->remap.old_range;
xe_vm_insert_vma(vm, vma);
}
}
break;
}
@ -3465,7 +3553,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
goto free_bind_ops;
}
if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
if (XE_WARN_ON(coh_mode > XE_COH_2WAY)) {
err = -EINVAL;
goto free_bind_ops;
}
@ -3492,6 +3580,10 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, xe_device_is_l2_flush_optimized(xe) &&
(op == DRM_XE_VM_BIND_OP_MAP_USERPTR ||
is_cpu_addr_mirror) &&
(pat_index != 19 && coh_mode != XE_COH_2WAY)) ||
XE_IOCTL_DBG(xe, comp_en &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
@ -3633,6 +3725,10 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && comp_en))
return -EINVAL;
if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach && xe_device_is_l2_flush_optimized(xe) &&
(pat_index != 19 && coh_mode != XE_COH_2WAY)))
return -EINVAL;
/* If a BO is protected it can only be mapped if the key is still valid */
if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
@ -3878,6 +3974,123 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
return err;
}
/*
* Map access type, fault type, and fault level from current bspec
* specification to user spec abstraction. The current mapping is
* approximately 1-to-1, with access type being the only notable
* exception as it carries additional data with respect to prefetch
* status that needs to be masked out.
*/
static u8 xe_to_user_access_type(u8 access_type)
{
return access_type & XE_PAGEFAULT_ACCESS_TYPE_MASK;
}
static u8 xe_to_user_fault_type(u8 fault_type)
{
return fault_type;
}
static u8 xe_to_user_fault_level(u8 fault_level)
{
return fault_level;
}
static int fill_faults(struct xe_vm *vm,
struct drm_xe_vm_get_property *args)
{
struct xe_vm_fault __user *usr_ptr = u64_to_user_ptr(args->data);
struct xe_vm_fault *fault_list, fault_entry = { 0 };
struct xe_vm_fault_entry *entry;
int ret = 0, i = 0, count, entry_size;
entry_size = sizeof(struct xe_vm_fault);
count = args->size / entry_size;
fault_list = kcalloc(count, sizeof(struct xe_vm_fault), GFP_KERNEL);
if (!fault_list)
return -ENOMEM;
spin_lock(&vm->faults.lock);
list_for_each_entry(entry, &vm->faults.list, list) {
if (i == count)
break;
fault_entry.address = xe_device_canonicalize_addr(vm->xe, entry->address);
fault_entry.address_precision = entry->address_precision;
fault_entry.access_type = xe_to_user_access_type(entry->access_type);
fault_entry.fault_type = xe_to_user_fault_type(entry->fault_type);
fault_entry.fault_level = xe_to_user_fault_level(entry->fault_level);
memcpy(&fault_list[i], &fault_entry, entry_size);
i++;
}
spin_unlock(&vm->faults.lock);
ret = copy_to_user(usr_ptr, fault_list, args->size);
kfree(fault_list);
return ret ? -EFAULT : 0;
}
static int xe_vm_get_property_helper(struct xe_vm *vm,
struct drm_xe_vm_get_property *args)
{
size_t size;
switch (args->property) {
case DRM_XE_VM_GET_PROPERTY_FAULTS:
spin_lock(&vm->faults.lock);
size = size_mul(sizeof(struct xe_vm_fault), vm->faults.len);
spin_unlock(&vm->faults.lock);
if (!args->size) {
args->size = size;
return 0;
}
/*
* Number of faults may increase between calls to
* xe_vm_get_property_ioctl, so just report the number of
* faults the user requests if it's less than or equal to
* the number of faults in the VM fault array.
*
* We should also at least assert that the args->size value
* is a multiple of the xe_vm_fault struct size.
*/
if (args->size > size || args->size % sizeof(struct xe_vm_fault))
return -EINVAL;
return fill_faults(vm, args);
}
return -EINVAL;
}
int xe_vm_get_property_ioctl(struct drm_device *drm, void *data,
struct drm_file *file)
{
struct xe_device *xe = to_xe_device(drm);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_vm_get_property *args = data;
struct xe_vm *vm;
int ret = 0;
if (XE_IOCTL_DBG(xe, (args->reserved[0] || args->reserved[1] ||
args->reserved[2])))
return -EINVAL;
vm = xe_vm_lookup(xef, args->vm_id);
if (XE_IOCTL_DBG(xe, !vm))
return -ENOENT;
ret = xe_vm_get_property_helper(vm, args);
xe_vm_put(vm);
return ret;
}
/**
* xe_vm_bind_kernel_bo - bind a kernel BO to a VM
* @vm: VM to bind the BO to

View File

@ -12,6 +12,12 @@
#include "xe_map.h"
#include "xe_vm_types.h"
/**
* MAX_FAULTS_SAVED_PER_VM - Maximum number of faults each vm can store before future
* faults are discarded to prevent memory overuse
*/
#define MAX_FAULTS_SAVED_PER_VM 50
struct drm_device;
struct drm_printer;
struct drm_file;
@ -22,6 +28,7 @@ struct dma_fence;
struct xe_exec_queue;
struct xe_file;
struct xe_pagefault;
struct xe_sync_entry;
struct xe_svm_range;
struct drm_exec;
@ -203,6 +210,9 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int xe_vm_get_property_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
void xe_vm_close_and_put(struct xe_vm *vm);
static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
@ -318,6 +328,8 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);
void xe_vm_add_fault_entry_pf(struct xe_vm *vm, struct xe_pagefault *pf);
/**
* xe_vm_set_validating() - Register this task as currently making bos resident
* @allow_res_evict: Allow eviction of buffer objects bound to @vm when

View File

@ -309,7 +309,7 @@ static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madv
if (XE_IOCTL_DBG(xe, !coh_mode))
return false;
if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY))
if (XE_WARN_ON(coh_mode > XE_COH_2WAY))
return false;
if (XE_IOCTL_DBG(xe, args->pat_index.pad))
@ -419,6 +419,7 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
.range = args->range, };
struct xe_madvise_details details;
u16 pat_index, coh_mode;
struct xe_vm *vm;
struct drm_exec exec;
int err, attr_type;
@ -455,6 +456,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
if (err || !madvise_range.num_vmas)
goto madv_fini;
if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
pat_index = array_index_nospec(args->pat_index.val, xe->pat.n_entries);
coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
if (XE_IOCTL_DBG(xe, madvise_range.has_svm_userptr_vmas &&
xe_device_is_l2_flush_optimized(xe) &&
(pat_index != 19 && coh_mode != XE_COH_2WAY))) {
err = -EINVAL;
goto madv_fini;
}
}
if (madvise_range.has_bo_vmas) {
if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
if (!check_bo_args_are_sane(vm, madvise_range.vmas,
@ -472,6 +484,17 @@ int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *fil
if (!bo)
continue;
if (args->type == DRM_XE_MEM_RANGE_ATTR_PAT) {
if (XE_IOCTL_DBG(xe, bo->ttm.base.import_attach &&
xe_device_is_l2_flush_optimized(xe) &&
(pat_index != 19 &&
coh_mode != XE_COH_2WAY))) {
err = -EINVAL;
goto err_fini;
}
}
err = drm_exec_lock_obj(&exec, &bo->ttm.base);
drm_exec_retry_on_contention(&exec);
if (err)

View File

@ -24,6 +24,7 @@
struct drm_pagemap;
struct xe_bo;
struct xe_pagefault;
struct xe_svm_range;
struct xe_sync_entry;
struct xe_user_fence;
@ -176,6 +177,24 @@ struct xe_userptr_vma {
struct xe_device;
/**
* struct xe_vm_fault_entry - Elements of vm->faults.list
* @list: link into @xe_vm.faults.list
* @address: address of the fault
* @address_precision: precision of faulted address
* @access_type: type of address access that resulted in fault
* @fault_type: type of fault reported
* @fault_level: fault level of the fault
*/
struct xe_vm_fault_entry {
struct list_head list;
u64 address;
u32 address_precision;
u8 access_type;
u8 fault_type;
u8 fault_level;
};
struct xe_vm {
/** @gpuvm: base GPUVM used to track VMAs */
struct drm_gpuvm gpuvm;
@ -333,6 +352,16 @@ struct xe_vm {
bool capture_once;
} error_capture;
/** @faults: List of all faults associated with this VM */
struct {
/** @faults.lock: lock protecting @faults.list */
spinlock_t lock;
/** @faults.list: list of xe_vm_fault_entry entries */
struct list_head list;
/** @faults.len: length of @faults.list */
unsigned int len;
} faults;
/**
* @validation: Validation data only valid with the vm resv held.
* Note: This is really task state of the task holding the vm resv,
@ -393,6 +422,10 @@ struct xe_vma_op_remap {
u64 start;
/** @range: range of the VMA unmap */
u64 range;
/** @old_start: Original start of the VMA we unmap */
u64 old_start;
/** @old_range: Original range of the VMA we unmap */
u64 old_range;
/** @skip_prev: skip prev rebind */
bool skip_prev;
/** @skip_next: skip next rebind */

View File

@ -260,21 +260,8 @@ static const struct xe_rtp_entry_sr gt_was[] = {
LSN_DIM_Z_WGT_MASK,
LSN_LNI_WGT(1) | LSN_LNE_WGT(1) |
LSN_DIM_X_WGT(1) | LSN_DIM_Y_WGT(1) |
LSN_DIM_Z_WGT(1)))
},
/* Xe2_HPM */
{ XE_RTP_NAME("16021867713"),
XE_RTP_RULES(MEDIA_VERSION(1301),
ENGINE_CLASS(VIDEO_DECODE)),
XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
},
{ XE_RTP_NAME("14019449301"),
XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)),
XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)),
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
LSN_DIM_Z_WGT(1)),
SET(LSC_CHICKEN_BIT_0_UDW, L3_128B_256B_WRT_DIS))
},
/* Xe3_LPG */
@ -306,7 +293,7 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ACTIONS(SET(MMIOATSREQLIMIT_GAM_WALK_3D,
DIS_ATS_WRONLY_PG))
},
{ XE_RTP_NAME("14026144927"),
{ XE_RTP_NAME("14026144927, 16029437861"),
XE_RTP_RULES(GRAPHICS_VERSION(3510), GRAPHICS_STEP(A0, B0)),
XE_RTP_ACTIONS(SET(L3SQCREG2, L3_SQ_DISABLE_COAMA_2WAY_COH |
L3_SQ_DISABLE_COAMA))
@ -670,6 +657,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
{ XE_RTP_NAME("14026781792"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3510), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(FF_MODE, DIS_TE_PATCH_CTRL))
},
/* DG1 */
@ -798,10 +789,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
{ XE_RTP_NAME("14026781792"),
XE_RTP_RULES(GRAPHICS_VERSION(3510), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(FF_MODE, DIS_TE_PATCH_CTRL))
},
};
static __maybe_unused const struct xe_rtp_entry oob_was[] = {

View File

@ -85,6 +85,19 @@ static void xe_vfio_pci_state_mutex_unlock(struct xe_vfio_pci_core_device *xe_vd
spin_unlock(&xe_vdev->reset_lock);
}
static void xe_vfio_pci_reset_prepare(struct pci_dev *pdev)
{
struct xe_vfio_pci_core_device *xe_vdev = pci_get_drvdata(pdev);
int ret;
if (!pdev->is_virtfn)
return;
ret = xe_sriov_vfio_flr_prepare(xe_vdev->xe, xe_vdev->vfid);
if (ret)
dev_err(&pdev->dev, "Failed to prepare FLR: %d\n", ret);
}
static void xe_vfio_pci_reset_done(struct pci_dev *pdev)
{
struct xe_vfio_pci_core_device *xe_vdev = pci_get_drvdata(pdev);
@ -127,6 +140,7 @@ static void xe_vfio_pci_reset_done(struct pci_dev *pdev)
}
static const struct pci_error_handlers xe_vfio_pci_err_handlers = {
.reset_prepare = xe_vfio_pci_reset_prepare,
.reset_done = xe_vfio_pci_reset_done,
.error_detected = vfio_pci_core_aer_err_detected,
};

View File

@ -4,6 +4,7 @@
#include <linux/dma-direction.h>
#include <linux/hmm.h>
#include <linux/memremap.h>
#include <linux/types.h>
#define NR_PAGES(order) (1U << (order))
@ -367,6 +368,26 @@ void drm_pagemap_destroy(struct drm_pagemap *dpagemap, bool is_atomic_or_reclaim
int drm_pagemap_reinit(struct drm_pagemap *dpagemap);
/**
* drm_pagemap_page_zone_device_data() - Page to zone_device_data
* @page: Pointer to the page
*
* Return: Page's zone_device_data
*/
static inline struct drm_pagemap_zdd *drm_pagemap_page_zone_device_data(struct page *page)
{
struct folio *folio = page_folio(page);
return folio_zone_device_data(folio);
}
#else
static inline struct drm_pagemap_zdd *drm_pagemap_page_zone_device_data(struct page *page)
{
return NULL;
}
#endif /* IS_ENABLED(CONFIG_ZONE_DEVICE) */
#endif

View File

@ -27,6 +27,17 @@ struct xe_device *xe_sriov_vfio_get_pf(struct pci_dev *pdev);
*/
bool xe_sriov_vfio_migration_supported(struct xe_device *xe);
/**
* xe_sriov_vfio_flr_prepare() - Notify PF that VF FLR prepare has started.
* @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()
* @vfid: the VF identifier (can't be 0)
*
* This function marks VF FLR as pending before PF receives GuC FLR event.
*
* Return: 0 on success or a negative error code on failure.
*/
int xe_sriov_vfio_flr_prepare(struct xe_device *xe, unsigned int vfid);
/**
* xe_sriov_vfio_wait_flr_done() - Wait for VF FLR completion.
* @xe: the PF &xe_device obtained by calling xe_sriov_vfio_get_pf()

View File

@ -83,6 +83,7 @@ extern "C" {
* - &DRM_IOCTL_XE_OBSERVATION
* - &DRM_IOCTL_XE_MADVISE
* - &DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS
* - &DRM_IOCTL_XE_VM_GET_PROPERTY
*/
/*
@ -107,6 +108,7 @@ extern "C" {
#define DRM_XE_MADVISE 0x0c
#define DRM_XE_VM_QUERY_MEM_RANGE_ATTRS 0x0d
#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0e
#define DRM_XE_VM_GET_PROPERTY 0x0f
/* Must be kept compact -- no holes */
@ -125,6 +127,7 @@ extern "C" {
#define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise)
#define DRM_IOCTL_XE_VM_QUERY_MEM_RANGE_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_MEM_RANGE_ATTRS, struct drm_xe_vm_query_mem_range_attr)
#define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct drm_xe_exec_queue_set_property)
#define DRM_IOCTL_XE_VM_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_GET_PROPERTY, struct drm_xe_vm_get_property)
/**
* DOC: Xe IOCTL Extensions
@ -1057,7 +1060,7 @@ struct drm_xe_vm_destroy {
* not invoke autoreset. Neither will stack variables going out of scope.
* Therefore it's recommended to always explicitly reset the madvises when
* freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call.
* - DRM_XE_VM_BIND_FLAG_DECOMPRESS - Request on-device decompression for a MAP.
* - %DRM_XE_VM_BIND_FLAG_DECOMPRESS - Request on-device decompression for a MAP.
* When set on a MAP bind operation, request the driver schedule an on-device
* in-place decompression (via the migrate/resolve path) for the GPU mapping
* created by this bind. Only valid for DRM_XE_VM_BIND_OP_MAP; usage on
@ -1114,7 +1117,9 @@ struct drm_xe_vm_bind_op {
* incoherent GT access is possible.
*
* Note: For userptr and externally imported dma-buf the kernel expects
* either 1WAY or 2WAY for the @pat_index.
* either 1WAY or 2WAY for the @pat_index. Starting from NVL-P, for
* userptr, svm, madvise and externally imported dma-buf the kernel expects
* either 2WAY or 1WAY and XA @pat_index.
*
* For DRM_XE_VM_BIND_FLAG_NULL bindings there are no KMD restrictions
* on the @pat_index. For such mappings there is no actual memory being
@ -1261,6 +1266,89 @@ struct drm_xe_vm_bind {
__u64 reserved[2];
};
/** struct xe_vm_fault - Describes faults for %DRM_XE_VM_GET_PROPERTY_FAULTS */
struct xe_vm_fault {
/** @address: Canonical address of the fault */
__u64 address;
/** @address_precision: Precision of faulted address */
__u32 address_precision;
/** @access_type: Type of address access that resulted in fault */
#define FAULT_ACCESS_TYPE_READ 0
#define FAULT_ACCESS_TYPE_WRITE 1
#define FAULT_ACCESS_TYPE_ATOMIC 2
__u8 access_type;
/** @fault_type: Type of fault reported */
#define FAULT_TYPE_NOT_PRESENT 0
#define FAULT_TYPE_WRITE_ACCESS 1
#define FAULT_TYPE_ATOMIC_ACCESS 2
__u8 fault_type;
/** @fault_level: fault level of the fault */
#define FAULT_LEVEL_PTE 0
#define FAULT_LEVEL_PDE 1
#define FAULT_LEVEL_PDP 2
#define FAULT_LEVEL_PML4 3
#define FAULT_LEVEL_PML5 4
__u8 fault_level;
/** @pad: MBZ */
__u8 pad;
/** @reserved: MBZ */
__u64 reserved[4];
};
/**
* struct drm_xe_vm_get_property - Input of &DRM_IOCTL_XE_VM_GET_PROPERTY
*
* The user provides a VM and a property to query among DRM_XE_VM_GET_PROPERTY_*,
* and sets the values in the vm_id and property members, respectively. This
* determines both the VM to get the property of, as well as the property to
* report.
*
* If size is set to 0, the driver fills it with the required size for the
* requested property. The user is expected here to allocate memory for the
* property structure and to provide a pointer to the allocated memory using the
* data member. For some properties, this may be zero, in which case, the
* value of the property will be saved to the value member and size will remain
* zero on return.
*
* If size is not zero, then the IOCTL will attempt to copy the requested
* property into the data member.
*
* The IOCTL will return -ENOENT if the VM could not be identified from the
* provided VM ID, or -EINVAL if the IOCTL fails for any other reason, such as
* providing an invalid size for the given property or if the property data
* could not be copied to the memory allocated to the data member.
*
* The property member can be:
* - %DRM_XE_VM_GET_PROPERTY_FAULTS
*/
struct drm_xe_vm_get_property {
/** @extensions: Pointer to the first extension struct, if any */
__u64 extensions;
/** @vm_id: The ID of the VM to query the properties of */
__u32 vm_id;
#define DRM_XE_VM_GET_PROPERTY_FAULTS 0
/** @property: property to get */
__u32 property;
/** @size: Size to allocate for @data */
__u32 size;
/** @pad: MBZ */
__u32 pad;
union {
/** @data: Pointer to user-defined array of flexible size and type */
__u64 data;
/** @value: Return value for scalar queries */
__u64 value;
};
/** @reserved: MBZ */
__u64 reserved[3];
};
/**
* struct drm_xe_exec_queue_create - Input of &DRM_IOCTL_XE_EXEC_QUEUE_CREATE
*