mirror of
https://github.com/torvalds/linux.git
synced 2026-05-25 23:52:08 +02:00
drm/amdgpu: Estimate RAS reservation when report capacity v2
Add estimate of how much vram we need to reserve for RAS when caculating the total available vram. v2: apply the change to MP0 v13_0_2 and v13_0_14 Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com> Reviewed-by: Tao Zhou <tao.zhou1@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
76bec2a031
commit
473af28d3e
|
|
@ -172,6 +172,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
{
|
||||
uint64_t reserved_for_pt =
|
||||
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
|
||||
size_t system_mem_needed, ttm_mem_needed, vram_needed;
|
||||
int ret = 0;
|
||||
uint64_t vram_size = 0;
|
||||
|
|
@ -220,7 +222,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||
kfd_mem_limit.max_ttm_mem_limit) ||
|
||||
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
|
||||
vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) {
|
||||
vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) {
|
||||
ret = -ENOMEM;
|
||||
goto release;
|
||||
}
|
||||
|
|
@ -1673,6 +1675,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
|
|||
{
|
||||
uint64_t reserved_for_pt =
|
||||
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
|
||||
ssize_t available;
|
||||
uint64_t vram_available, system_mem_available, ttm_mem_available;
|
||||
|
||||
|
|
@ -1680,7 +1684,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
|
|||
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
|
||||
- adev->kfd.vram_used_aligned[xcp_id]
|
||||
- atomic64_read(&adev->vram_pin_size)
|
||||
- reserved_for_pt;
|
||||
- reserved_for_pt
|
||||
- reserved_for_ras;
|
||||
|
||||
if (adev->flags & AMD_IS_APU) {
|
||||
system_mem_available = no_system_mem_limit ?
|
||||
|
|
|
|||
|
|
@ -3298,6 +3298,24 @@ static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
|
|||
amdgpu_put_xgmi_hive(hive);
|
||||
}
|
||||
|
||||
static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
||||
if (!con || (adev->flags & AMD_IS_APU))
|
||||
return;
|
||||
|
||||
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
|
||||
case IP_VERSION(13, 0, 2):
|
||||
case IP_VERSION(13, 0, 6):
|
||||
case IP_VERSION(13, 0, 14):
|
||||
con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int amdgpu_ras_init(struct amdgpu_device *adev)
|
||||
{
|
||||
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
|
||||
|
|
@ -3403,6 +3421,8 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
|
|||
/* Get RAS schema for particular SOC */
|
||||
con->schema = amdgpu_get_ras_schema(adev);
|
||||
|
||||
amdgpu_ras_init_reserved_vram_size(adev);
|
||||
|
||||
if (amdgpu_ras_fs_init(adev)) {
|
||||
r = -EINVAL;
|
||||
goto release_con;
|
||||
|
|
|
|||
|
|
@ -64,6 +64,9 @@ struct amdgpu_iv_entry;
|
|||
#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29
|
||||
#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000
|
||||
|
||||
/* Reserve 8 physical dram row for possible retirement.
|
||||
* In worst cases, it will lose 8 * 2MB memory in vram domain */
|
||||
#define AMDGPU_RAS_RESERVED_VRAM_SIZE (16ULL << 20)
|
||||
/* The high three bits indicates socketid */
|
||||
#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
|
||||
|
||||
|
|
@ -541,6 +544,7 @@ struct amdgpu_ras {
|
|||
struct ras_event_manager __event_mgr;
|
||||
struct ras_event_manager *event_mgr;
|
||||
|
||||
uint64_t reserved_pages_in_bytes;
|
||||
};
|
||||
|
||||
struct ras_fs_data {
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user