drm/amdkfd: kfd driver supports hot unplug/replug amdgpu devices

This patch allows kfd driver function correctly when AMD gpu devices got unplug/replug at run time. When an AMD gpu device got unplug kfd driver gracefully terminates existing kfd processes after stops all queues by sending SIGBUS to user process. After that user space can still use remaining AMD gpu devices. When all AMD gpu devices at system got removed kfd driver will not response new requests. Unplugged AMD gpu devices can be re-plugged. kfd driver will use added devices to function as usual. The purpose of this patch is having kfd driver behavior as expected during and after AMD gpu devices unplug/replug at run time. Signed-off-by: Xiaogang Chen <Xiaogang.Chen@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2026-05-14 01:08:22 +02:00 · 2026-01-13 20:45:14 -06:00 · 2026-01-13 20:45:14 -06:00 · 6cca686dfc
commit 6cca686dfc
parent d81e52fc61
8 changed files with 156 additions and 2 deletions
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@ -248,6 +248,11 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
 }

+void amdgpu_amdkfd_teardown_processes(struct amdgpu_device *adev)
+{
+	kgd2kfd_teardown_processes(adev);
+}
+
 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)
 {
 	if (adev->kfd.dev) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@ -158,6 +158,7 @@ struct amdkfd_process_info {

 int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
+void amdgpu_amdkfd_teardown_processes(struct amdgpu_device *adev);

 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc);
 int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc);
@ -438,6 +439,8 @@ int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd);
 bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);
 bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,
 			       bool retry_fault);
+void kgd2kfd_lock_kfd(void);
+void kgd2kfd_teardown_processes(struct amdgpu_device *adev);

 #else
 static inline int kgd2kfd_init(void)
@ -550,5 +553,13 @@ static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct
 	return false;
 }

+static inline void kgd2kfd_lock_kfd(void)
+{
+}
+
+static inline void kgd2kfd_teardown_processes(struct amdgpu_device *adev)
+{
+}
+
 #endif
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@ -3510,6 +3510,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);

 	amdgpu_amdkfd_suspend(adev, true);
+	amdgpu_amdkfd_teardown_processes(adev);
 	amdgpu_userq_suspend(adev);

 	/* Workaround for ASICs need to disable SMC first */
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@ -973,6 +973,9 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
 	}

 	kfree(kfd);
+
+	/* after remove a kfd device unlock kfd driver */
+	kgd2kfd_unlock_kfd(NULL);
 }

 int kgd2kfd_pre_reset(struct kfd_dev *kfd,
@ -1557,10 +1560,14 @@ int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
 	return r;
 }

+/* unlock a kfd dev or kfd driver */
 void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
 {
 	mutex_lock(&kfd_processes_mutex);
-	--kfd->kfd_dev_lock;
+	if (kfd)
+		--kfd->kfd_dev_lock;
+	else
+		--kfd_locked;
 	mutex_unlock(&kfd_processes_mutex);
 }

@ -1729,6 +1736,73 @@ bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entr
 	return false;
 }

+/* check if there is kfd process still uses adev */
+static bool kgd2kfd_check_device_idle(struct amdgpu_device *adev)
+{
+	struct kfd_process *p;
+	struct hlist_node *p_temp;
+	unsigned int temp;
+	struct kfd_node *dev;
+
+	mutex_lock(&kfd_processes_mutex);
+
+	if (hash_empty(kfd_processes_table)) {
+		mutex_unlock(&kfd_processes_mutex);
+		return true;
+	}
+
+	/* check if there is device still use adev */
+	hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
+		for (int i = 0; i < p->n_pdds; i++) {
+			dev = p->pdds[i]->dev;
+			if (dev->adev == adev) {
+				mutex_unlock(&kfd_processes_mutex);
+				return false;
+			}
+		}
+	}
+
+	mutex_unlock(&kfd_processes_mutex);
+
+	return true;
+}
+
+/** kgd2kfd_teardown_processes - gracefully tear down existing
+ *  kfd processes that use adev
+ *
+ * @adev: amdgpu_device where kfd processes run on and will be
+ *  teardown
+ *
+ */
+void kgd2kfd_teardown_processes(struct amdgpu_device *adev)
+{
+	struct hlist_node *p_temp;
+	struct kfd_process *p;
+	struct kfd_node *dev;
+	unsigned int temp;
+
+	mutex_lock(&kfd_processes_mutex);
+
+	if (hash_empty(kfd_processes_table)) {
+		mutex_unlock(&kfd_processes_mutex);
+		return;
+	}
+
+	hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
+		for (int i = 0; i < p->n_pdds; i++) {
+			dev = p->pdds[i]->dev;
+			if (dev->adev == adev)
+				kfd_signal_process_terminate_event(p);
+		}
+	}
+
+	mutex_unlock(&kfd_processes_mutex);
+
+	/* wait all kfd processes use adev terminate */
+	while (!kgd2kfd_check_device_idle(adev))
+		cond_resched();
+}
+
 #if defined(CONFIG_DEBUG_FS)

 /* This function will send a package to HIQ to hang the HWS
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@ -1380,3 +1380,32 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)

 	kfd_unref_process(p);
 }
+
+/* signal KFD_EVENT_TYPE_SIGNAL events from process p
+ * send signal SIGBUS to correspondent user space process
+ */
+void kfd_signal_process_terminate_event(struct kfd_process *p)
+{
+	struct kfd_event *ev;
+	u32 id;
+
+	rcu_read_lock();
+
+	/* iterate from id 1 for KFD_EVENT_TYPE_SIGNAL events */
+	id = 1;
+	idr_for_each_entry_continue(&p->event_idr, ev, id)
+		if (ev->type == KFD_EVENT_TYPE_SIGNAL) {
+			spin_lock(&ev->lock);
+			set_event(ev);
+			spin_unlock(&ev->lock);
+		}
+
+	/* Send SIGBUS to p->lead_thread */
+	dev_notice(kfd_device,
+		   "Sending SIGBUS to process %d",
+		   p->lead_thread->pid);
+
+	send_sig(SIGBUS, p->lead_thread, 0);
+
+	rcu_read_unlock();
+}
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@ -1192,6 +1192,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev,
 }
 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev);
 int kfd_numa_node_to_apic_id(int numa_node_id);
+uint32_t kfd_gpu_node_num(void);

 /* Interrupts */
 #define	KFD_IRQ_FENCE_CLIENTID	0xff
@ -1547,6 +1548,7 @@ void kfd_signal_vm_fault_event(struct kfd_process_device *pdd,
 void kfd_signal_reset_event(struct kfd_node *dev);

 void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
+void kfd_signal_process_terminate_event(struct kfd_process *p);

 static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
 				 enum TLB_FLUSH_TYPE type)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@ -949,6 +949,12 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
 	 */
 	mutex_lock(&kfd_processes_mutex);

+	if (kfd_gpu_node_num() <= 0) {
+		pr_warn("no gpu node! Cannot create KFD process");
+		process = ERR_PTR(-EINVAL);
+		goto out;
+	}
+
 	if (kfd_is_locked(NULL)) {
 		pr_debug("KFD is locked! Cannot create process");
 		process = ERR_PTR(-EINVAL);
@ -1235,7 +1241,6 @@ static void kfd_process_wq_release(struct work_struct *work)
 	else
 		ida_destroy(&p->id_table);

-	kfd_process_remove_sysfs(p);
 	kfd_debugfs_remove_process(p);

 	kfd_process_kunmap_signal_bo(p);
@ -1251,6 +1256,11 @@ static void kfd_process_wq_release(struct work_struct *work)

 	put_task_struct(p->lead_thread);

+	/* the last step is removing process entries under /sys
+	 * to indicate the process has been terminated.
+	 */
+	kfd_process_remove_sysfs(p);
+
 	kfree(p);
 }

--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@ -2357,6 +2357,28 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
 	return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
 }

+/* kfd_gpu_node_num - Return kfd gpu node number at system */
+uint32_t kfd_gpu_node_num(void)
+{
+	struct kfd_node *dev;
+	u8 gpu_num  = 0;
+	u8 id  = 0;
+
+	while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
+		if (!dev || kfd_devcgroup_check_permission(dev)) {
+			/* Skip non GPU devices and devices to which the
+			 * current process have no access to
+			 */
+			id++;
+			continue;
+		}
+		id++;
+		gpu_num++;
+	}
+
+	return gpu_num;
+}
+
 #if defined(CONFIG_DEBUG_FS)

 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)