mirror of
https://github.com/torvalds/linux.git
synced 2026-06-01 11:03:43 +02:00
drm/amdkfd: CRIU Implement KFD unpause operation
Introducing UNPAUSE op. After CRIU amdgpu plugin performs a PROCESS_INFO op the queues will be stay in an evicted state. Once the plugin is done draining BO contents, it is safe to perform an UNPAUSE op for the queues to resume. Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: David Yat Sin <david.yatsin@amd.com> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
011bbb0302
commit
cd9f791030
|
|
@ -2049,6 +2049,14 @@ static int criu_checkpoint(struct file *filep,
|
|||
goto exit_unlock;
|
||||
}
|
||||
|
||||
/* Confirm all process queues are evicted */
|
||||
if (!p->queues_paused) {
|
||||
pr_err("Cannot dump process when queues are not in evicted state\n");
|
||||
/* CRIU plugin did not call op PROCESS_INFO before checkpointing */
|
||||
ret = -EINVAL;
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
criu_get_process_object_info(p, &num_bos, &priv_size);
|
||||
|
||||
if (num_bos != args->num_bos ||
|
||||
|
|
@ -2388,7 +2396,24 @@ static int criu_unpause(struct file *filep,
|
|||
struct kfd_process *p,
|
||||
struct kfd_ioctl_criu_args *args)
|
||||
{
|
||||
return 0;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&p->mutex);
|
||||
|
||||
if (!p->queues_paused) {
|
||||
mutex_unlock(&p->mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = kfd_process_restore_queues(p);
|
||||
if (ret)
|
||||
pr_err("Failed to unpause queues ret:%d\n", ret);
|
||||
else
|
||||
p->queues_paused = false;
|
||||
|
||||
mutex_unlock(&p->mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int criu_resume(struct file *filep,
|
||||
|
|
@ -2440,6 +2465,12 @@ static int criu_process_info(struct file *filep,
|
|||
goto err_unlock;
|
||||
}
|
||||
|
||||
ret = kfd_process_evict_queues(p);
|
||||
if (ret)
|
||||
goto err_unlock;
|
||||
|
||||
p->queues_paused = true;
|
||||
|
||||
args->pid = task_pid_nr_ns(p->lead_thread,
|
||||
task_active_pid_ns(p->lead_thread));
|
||||
|
||||
|
|
@ -2447,6 +2478,10 @@ static int criu_process_info(struct file *filep,
|
|||
|
||||
dev_dbg(kfd_device, "Num of bos:%u\n", args->num_bos);
|
||||
err_unlock:
|
||||
if (ret) {
|
||||
kfd_process_restore_queues(p);
|
||||
p->queues_paused = false;
|
||||
}
|
||||
mutex_unlock(&p->mutex);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -877,6 +877,8 @@ struct kfd_process {
|
|||
bool xnack_enabled;
|
||||
|
||||
atomic_t poison;
|
||||
/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
|
||||
bool queues_paused;
|
||||
};
|
||||
|
||||
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
|
||||
|
|
|
|||
|
|
@ -1384,6 +1384,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
|
|||
process->mm = thread->mm;
|
||||
process->lead_thread = thread->group_leader;
|
||||
process->n_pdds = 0;
|
||||
process->queues_paused = false;
|
||||
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
|
||||
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
|
||||
process->last_restore_timestamp = get_jiffies_64();
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user