From 9ed1fdee9ee324f3505ff066287ee53143caaaa2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 11 Nov 2022 00:22:24 +0000 Subject: [PATCH 1/4] drm/i915/gvt: Get reference to KVM iff attachment to VM is successful Get a reference to KVM if and only if a vGPU is successfully attached to the VM to avoid leaking a reference if there's no available vGPU. On open_device() failure, vfio_device_open() doesn't invoke close_device(). Fixes: 421cfe6596f6 ("vfio: remove VFIO_GROUP_NOTIFY_SET_KVM") Cc: stable@vger.kernel.org Reviewed-by: Kevin Tian Signed-off-by: Sean Christopherson Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221111002225.2418386-2-seanjc@google.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 7a45e5360caf..e67d5267fde0 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -664,8 +664,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) return -ESRCH; } - kvm_get_kvm(vgpu->vfio_device.kvm); - if (__kvmgt_vgpu_exist(vgpu)) return -EEXIST; @@ -676,6 +674,7 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; + kvm_get_kvm(vgpu->vfio_device.kvm); kvm_page_track_register_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); From 3c9fd44b9330adc5006653566f3d386784b2080e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 11 Nov 2022 00:22:25 +0000 Subject: [PATCH 2/4] drm/i915/gvt: Unconditionally put reference to KVM when detaching vGPU Always put the KVM reference when closing a vCPU device, as intel_vgpu_open_device() succeeds if and only if the KVM pointer is valid and a reference to KVM is acquired. And if that doesn't hold true, the call to kvm_page_track_unregister_notifier() a few lines earlier is doomed. Reviewed-by: Kevin Tian Signed-off-by: Sean Christopherson Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221111002225.2418386-3-seanjc@google.com --- drivers/gpu/drm/i915/gvt/kvmgt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index e67d5267fde0..714221f9a131 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -714,15 +714,14 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, &vgpu->track_node); + kvm_put_kvm(vgpu->vfio_device.kvm); + kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); intel_vgpu_release_msi_eventfd_ctx(vgpu); vgpu->attached = false; - - if (vgpu->vfio_device.kvm) - kvm_put_kvm(vgpu->vfio_device.kvm); } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) From 00a6c36cca760d0b659f894dee728555b193c5e1 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 15 Nov 2022 10:46:20 +0000 Subject: [PATCH 3/4] drm/i915/ttm: never purge busy objects In i915_gem_madvise_ioctl() we immediately purge the object is not currently used, like when the mm.pages are NULL. With shmem the pages might still be hanging around or are perhaps swapped out. Similarly with ttm we might still have the pages hanging around on the ttm resource, like with lmem or shmem, but here we need to be extra careful since async unbinds are possible as well as in-progress kernel moves. In i915_ttm_purge() we expect the pipeline-gutting to nuke the ttm resource for us, however if it's busy the memory is only moved to a ghost object, which then leads to broken behaviour when for example clearing the i915_tt->filp, since the actual ttm_tt is still alive and populated, even though it's been moved to the ghost object. When we later destroy the ghost object we hit the following, since the filp is now NULL: [ +0.006982] #PF: supervisor read access in kernel mode [ +0.005149] #PF: error_code(0x0000) - not-present page [ +0.005147] PGD 11631d067 P4D 11631d067 PUD 115972067 PMD 0 [ +0.005676] Oops: 0000 [#1] PREEMPT SMP NOPTI [ +0.012962] Workqueue: events ttm_device_delayed_workqueue [ttm] [ +0.006022] RIP: 0010:i915_ttm_tt_unpopulate+0x3a/0x70 [i915] [ +0.005879] Code: 89 fb 48 85 f6 74 11 8b 55 4c 48 8b 7d 30 45 31 c0 31 c9 e8 18 6a e5 e0 80 7d 60 00 74 20 48 8b 45 68 8b 55 08 4c 89 e7 5b 5d <48> 8b 40 20 83 e2 01 41 5c 89 d1 48 8b 70 30 e9 42 b2 ff ff 4c 89 [ +0.018782] RSP: 0000:ffffc9000bf6fd70 EFLAGS: 00010202 [ +0.005244] RAX: 0000000000000000 RBX: ffff8883e12ae380 RCX: 0000000000000000 [ +0.007150] RDX: 000000008000000e RSI: ffffffff823559b4 RDI: ffff8883e12ae3c0 [ +0.007142] RBP: ffff888103b65d48 R08: 0000000000000001 R09: 0000000000000001 [ +0.007144] R10: 0000000000000001 R11: ffff88829c2c8040 R12: ffff8883e12ae3c0 [ +0.007148] R13: 0000000000000001 R14: ffff888115184140 R15: ffff888115184248 [ +0.007154] FS: 0000000000000000(0000) GS:ffff88844db00000(0000) knlGS:0000000000000000 [ +0.008108] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ +0.005763] CR2: 0000000000000020 CR3: 000000013fdb4004 CR4: 00000000003706e0 [ +0.007152] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ +0.007145] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ +0.007154] Call Trace: [ +0.002459] [ +0.002126] ttm_tt_unpopulate.part.0+0x17/0x70 [ttm] [ +0.005068] ttm_bo_tt_destroy+0x1c/0x50 [ttm] [ +0.004464] ttm_bo_cleanup_memtype_use+0x25/0x40 [ttm] [ +0.005244] ttm_bo_cleanup_refs+0x90/0x2c0 [ttm] [ +0.004721] ttm_bo_delayed_delete+0x235/0x250 [ttm] [ +0.004981] ttm_device_delayed_workqueue+0x13/0x40 [ttm] [ +0.005422] process_one_work+0x248/0x560 [ +0.004028] worker_thread+0x4b/0x390 [ +0.003682] ? process_one_work+0x560/0x560 [ +0.004199] kthread+0xeb/0x120 [ +0.003163] ? kthread_complete_and_exit+0x20/0x20 [ +0.004815] ret_from_fork+0x1f/0x30 v2: - Just use ttm_bo_wait() directly (Niranjana) - Add testcase reference Testcase: igt@gem_madvise@dontneed-evict-race Fixes: 213d50927763 ("drm/i915/ttm: Introduce a TTM i915 gem object backend") Reported-by: Niranjana Vishwanathapura Signed-off-by: Matthew Auld Cc: Andrzej Hajda Cc: Nirmoy Das Cc: # v5.15+ Reviewed-by: Niranjana Vishwanathapura Acked-by: Nirmoy Das Reviewed-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20221115104620.120432-1-matthew.auld@intel.com (cherry picked from commit 5524b5e52e08f675116a93296fe5bee60bc43c03) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 3d4305eea1aa..0d6d640225fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -612,6 +612,10 @@ static int i915_ttm_truncate(struct drm_i915_gem_object *obj) WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED); + err = ttm_bo_wait(bo, true, false); + if (err) + return err; + err = i915_ttm_move_notify(bo); if (err) return err; From ebbaa4392e36521fb893973d8a0fcb32f3b6d5eb Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 14 Nov 2022 14:22:43 +0200 Subject: [PATCH 4/4] drm/i915: Fix warn in intel_display_power_*_domain() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The intel_display_power_*_domain() functions should always warn if a default domain is returned as a fallback, fix this up. Spotted by Ville. Fixes: 979e1b32e0e2 ("drm/i915: Sanitize the port -> DDI/AUX power domain mapping for each platform") Cc: Ville Syrjälä Cc: Jouni Högander Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221114122251.21327-2-imre.deak@intel.com (cherry picked from commit 10b85f0e1d922210ae857afed6d012ec32c4b6cb) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/display/intel_display_power.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 1e608b9e5055..1a63da28f330 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -2434,7 +2434,7 @@ intel_display_power_ddi_io_domain(struct drm_i915_private *i915, enum port port) { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_io == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_io == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_IO_A; return domains->ddi_io + (int)(port - domains->port_start); @@ -2445,7 +2445,7 @@ intel_display_power_ddi_lanes_domain(struct drm_i915_private *i915, enum port po { const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port); - if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_lanes == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_lanes == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_PORT_DDI_LANES_A; return domains->ddi_lanes + (int)(port - domains->port_start); @@ -2471,7 +2471,7 @@ intel_display_power_legacy_aux_domain(struct drm_i915_private *i915, enum aux_ch { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_A; return domains->aux_legacy_usbc + (int)(aux_ch - domains->aux_ch_start); @@ -2482,7 +2482,7 @@ intel_display_power_tbt_aux_domain(struct drm_i915_private *i915, enum aux_ch au { const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch); - if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_tbt == POWER_DOMAIN_INVALID) + if (drm_WARN_ON(&i915->drm, !domains || domains->aux_tbt == POWER_DOMAIN_INVALID)) return POWER_DOMAIN_AUX_TBT1; return domains->aux_tbt + (int)(aux_ch - domains->aux_ch_start);