From fc74881c28d314b10efac016ef49df4ff40b8b97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 10 Dec 2021 09:39:27 +0100 Subject: [PATCH 01/76] drm/amdgpu: fix dropped backing store handling in amdgpu_dma_buf_move_notify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bo->tbo.resource can now be NULL. Signed-off-by: Christian König Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1811 Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20211210083927.1754-1-christian.koenig@amd.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index ae6ab93c868b..7444484a12bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -384,7 +384,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) struct amdgpu_vm_bo_base *bo_base; int r; - if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM) + if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; r = ttm_bo_validate(&bo->tbo, &placement, &ctx); From bf67014d6bda16a72deea11dbbff2a97c705ca92 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Thu, 16 Dec 2021 13:35:27 -0500 Subject: [PATCH 02/76] drm/amdgpu: introduce new amdgpu_fence object to indicate the job embedded fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The job embedded fence donesn't initialize the flags at dma_fence_init(). Then we will go a wrong way in amdgpu_fence_get_timeline_name callback and trigger a null pointer panic once we enabled the trace event here. So introduce new amdgpu_fence object to indicate the job embedded fence. [ 156.131790] BUG: kernel NULL pointer dereference, address: 00000000000002a0 [ 156.131804] #PF: supervisor read access in kernel mode [ 156.131811] #PF: error_code(0x0000) - not-present page [ 156.131817] PGD 0 P4D 0 [ 156.131824] Oops: 0000 [#1] PREEMPT SMP PTI [ 156.131832] CPU: 6 PID: 1404 Comm: sdma0 Tainted: G OE 5.16.0-rc1-custom #1 [ 156.131842] Hardware name: Gigabyte Technology Co., Ltd. Z170XP-SLI/Z170XP-SLI-CF, BIOS F20 11/04/2016 [ 156.131848] RIP: 0010:strlen+0x0/0x20 [ 156.131859] Code: 89 c0 c3 0f 1f 80 00 00 00 00 48 01 fe eb 0f 0f b6 07 38 d0 74 10 48 83 c7 01 84 c0 74 05 48 39 f7 75 ec 31 c0 c3 48 89 f8 c3 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31 [ 156.131872] RSP: 0018:ffff9bd0018dbcf8 EFLAGS: 00010206 [ 156.131880] RAX: 00000000000002a0 RBX: ffff8d0305ef01b0 RCX: 000000000000000b [ 156.131888] RDX: ffff8d03772ab924 RSI: ffff8d0305ef01b0 RDI: 00000000000002a0 [ 156.131895] RBP: ffff9bd0018dbd60 R08: ffff8d03002094d0 R09: 0000000000000000 [ 156.131901] R10: 000000000000005e R11: 0000000000000065 R12: ffff8d03002094d0 [ 156.131907] R13: 000000000000001f R14: 0000000000070018 R15: 0000000000000007 [ 156.131914] FS: 0000000000000000(0000) GS:ffff8d062ed80000(0000) knlGS:0000000000000000 [ 156.131923] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 156.131929] CR2: 00000000000002a0 CR3: 000000001120a005 CR4: 00000000003706e0 [ 156.131937] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 156.131942] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 156.131949] Call Trace: [ 156.131953] [ 156.131957] ? trace_event_raw_event_dma_fence+0xcc/0x200 [ 156.131973] ? ring_buffer_unlock_commit+0x23/0x130 [ 156.131982] dma_fence_init+0x92/0xb0 [ 156.131993] amdgpu_fence_emit+0x10d/0x2b0 [amdgpu] [ 156.132302] amdgpu_ib_schedule+0x2f9/0x580 [amdgpu] [ 156.132586] amdgpu_job_run+0xed/0x220 [amdgpu] v2: fix mismatch warning between the prototype and function name (Ray, kernel test robot) Signed-off-by: Huang Rui Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 126 ++++++++++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 +- 3 files changed, 90 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1e651b959141..9dc86c5a1cad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4461,7 +4461,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context) { - int i, j, r = 0; + int i, r = 0; struct amdgpu_job *job = NULL; bool need_full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); @@ -4483,15 +4483,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, /*clear job fence from fence drv to avoid force_completion *leave NULL and vm flush fence in fence drv */ - for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) { - struct dma_fence *old, **ptr; + amdgpu_fence_driver_clear_job_fences(ring); - ptr = &ring->fence_drv.fences[j]; - old = rcu_dereference_protected(*ptr, 1); - if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) { - RCU_INIT_POINTER(*ptr, NULL); - } - } /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 3b7e86ea7167..9afd11ca2709 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -77,11 +77,13 @@ void amdgpu_fence_slab_fini(void) * Cast helper */ static const struct dma_fence_ops amdgpu_fence_ops; +static const struct dma_fence_ops amdgpu_job_fence_ops; static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f) { struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base); - if (__f->base.ops == &amdgpu_fence_ops) + if (__f->base.ops == &amdgpu_fence_ops || + __f->base.ops == &amdgpu_job_fence_ops) return __f; return NULL; @@ -158,19 +160,18 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd } seq = ++ring->fence_drv.sync_seq; - if (job != NULL && job->job_run_counter) { + if (job && job->job_run_counter) { /* reinit seq for resubmitted jobs */ fence->seqno = seq; } else { - dma_fence_init(fence, &amdgpu_fence_ops, - &ring->fence_drv.lock, - adev->fence_context + ring->idx, - seq); - } - - if (job != NULL) { - /* mark this fence has a parent job */ - set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags); + if (job) + dma_fence_init(fence, &amdgpu_job_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); + else + dma_fence_init(fence, &amdgpu_fence_ops, + &ring->fence_drv.lock, + adev->fence_context + ring->idx, seq); } amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, @@ -620,6 +621,25 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev) } } +/** + * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring + * + * @ring: fence of the ring to be cleared + * + */ +void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring) +{ + int i; + struct dma_fence *old, **ptr; + + for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) { + ptr = &ring->fence_drv.fences[i]; + old = rcu_dereference_protected(*ptr, 1); + if (old && old->ops == &amdgpu_job_fence_ops) + RCU_INIT_POINTER(*ptr, NULL); + } +} + /** * amdgpu_fence_driver_force_completion - force signal latest fence of ring * @@ -643,16 +663,14 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence) static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) { - struct amdgpu_ring *ring; + return (const char *)to_amdgpu_fence(f)->ring->name; +} - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); +static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); - ring = to_amdgpu_ring(job->base.sched); - } else { - ring = to_amdgpu_fence(f)->ring; - } - return (const char *)ring->name; + return (const char *)to_amdgpu_ring(job->base.sched)->name; } /** @@ -665,18 +683,25 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) */ static bool amdgpu_fence_enable_signaling(struct dma_fence *f) { - struct amdgpu_ring *ring; + if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring); - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); + return true; +} - ring = to_amdgpu_ring(job->base.sched); - } else { - ring = to_amdgpu_fence(f)->ring; - } +/** + * amdgpu_job_fence_enable_signaling - enable signalling on job fence + * @f: fence + * + * This is the simliar function with amdgpu_fence_enable_signaling above, it + * only handles the job embedded fence. + */ +static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence); - if (!timer_pending(&ring->fence_drv.fallback_timer)) - amdgpu_fence_schedule_fallback(ring); + if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer)) + amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched)); return true; } @@ -692,19 +717,23 @@ static void amdgpu_fence_free(struct rcu_head *rcu) { struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); - if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) { - /* free job if fence has a parent job */ - struct amdgpu_job *job; - - job = container_of(f, struct amdgpu_job, hw_fence); - kfree(job); - } else { /* free fence_slab if it's separated fence*/ - struct amdgpu_fence *fence; + kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f)); +} - fence = to_amdgpu_fence(f); - kmem_cache_free(amdgpu_fence_slab, fence); - } +/** + * amdgpu_job_fence_free - free up the job with embedded fence + * + * @rcu: RCU callback head + * + * Free up the job with embedded fence after the RCU grace period. + */ +static void amdgpu_job_fence_free(struct rcu_head *rcu) +{ + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); + + /* free job if fence has a parent job */ + kfree(container_of(f, struct amdgpu_job, hw_fence)); } /** @@ -720,6 +749,19 @@ static void amdgpu_fence_release(struct dma_fence *f) call_rcu(&f->rcu, amdgpu_fence_free); } +/** + * amdgpu_job_fence_release - callback that job embedded fence can be freed + * + * @f: fence + * + * This is the simliar function with amdgpu_fence_release above, it + * only handles the job embedded fence. + */ +static void amdgpu_job_fence_release(struct dma_fence *f) +{ + call_rcu(&f->rcu, amdgpu_job_fence_free); +} + static const struct dma_fence_ops amdgpu_fence_ops = { .get_driver_name = amdgpu_fence_get_driver_name, .get_timeline_name = amdgpu_fence_get_timeline_name, @@ -727,6 +769,12 @@ static const struct dma_fence_ops amdgpu_fence_ops = { .release = amdgpu_fence_release, }; +static const struct dma_fence_ops amdgpu_job_fence_ops = { + .get_driver_name = amdgpu_fence_get_driver_name, + .get_timeline_name = amdgpu_job_fence_get_timeline_name, + .enable_signaling = amdgpu_job_fence_enable_signaling, + .release = amdgpu_job_fence_release, +}; /* * Fence debugfs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 4d380e79752c..fae7d185ad0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -53,9 +53,6 @@ enum amdgpu_ring_priority_level { #define AMDGPU_FENCE_FLAG_INT (1 << 1) #define AMDGPU_FENCE_FLAG_TC_WB_ONLY (1 << 2) -/* fence flag bit to indicate the face is embedded in job*/ -#define AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT (DMA_FENCE_FLAG_USER_BITS + 1) - #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) #define AMDGPU_IB_POOL_SIZE (1024 * 1024) @@ -114,6 +111,7 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, From 19e66d512e4182a0461530fa3159638e0f55d97e Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 15 Dec 2021 23:37:03 +0800 Subject: [PATCH 03/76] drm/amd/pm: Fix xgmi link control on aldebaran Fix the message argument. 0: Allow power down 1: Disallow power down Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 59a7d276541d..7d50827cf0a8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -1621,7 +1621,7 @@ static int aldebaran_allow_xgmi_power_down(struct smu_context *smu, bool en) { return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GmiPwrDnControl, - en ? 1 : 0, + en ? 0 : 1, NULL); } From b7865173cf6ae59942e2c69326a06e1c1df5ecf6 Mon Sep 17 00:00:00 2001 From: chen gong Date: Thu, 9 Dec 2021 19:47:10 +0800 Subject: [PATCH 04/76] drm/amdgpu: When the VCN(1.0) block is suspended, powergating is explicitly enabled Play a video on the raven (or PCO, raven2) platform, and then do the S3 test. When resume, the following error will be reported: amdgpu 0000:02:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring vcn_dec test failed (-110) [drm:amdgpu_device_ip_resume_phase2 [amdgpu]] *ERROR* resume of IP block failed -110 amdgpu 0000:02:00.0: amdgpu: amdgpu_device_ip_resume failed (-110). PM: dpm_run_callback(): pci_pm_resume+0x0/0x90 returns -110 [why] When playing the video: The power state flag of the vcn block is set to POWER_STATE_ON. When doing suspend: There is no change to the power state flag of the vcn block, it is still POWER_STATE_ON. When doing resume: Need to open the power gate of the vcn block and set the power state flag of the VCN block to POWER_STATE_ON. But at this time, the power state flag of the vcn block is already POWER_STATE_ON. The power status flag check in the "8f2cdef drm/amd/pm: avoid duplicate powergate/ungate setting" patch will return the amdgpu_dpm_set_powergating_by_smu function directly. As a result, the gate of the power was not opened, causing the subsequent ring test to fail. [how] In the suspend function of the vcn block, explicitly change the power state flag of the vcn block to POWER_STATE_OFF. BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1828 Signed-off-by: chen gong Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index d54d720b3cf6..3799226defc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -246,6 +246,13 @@ static int vcn_v1_0_suspend(void *handle) { int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool idle_work_unexecuted; + + idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); + if (idle_work_unexecuted) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + } r = vcn_v1_0_hw_fini(adev); if (r) From 5e713c6afa34c0fd6f113bf7bb1c2847172d7b20 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 15 Dec 2021 22:13:56 -0500 Subject: [PATCH 05/76] drm/amdgpu: add support for IP discovery gc_info table v2 Used on gfx9 based systems. Fixes incorrect CU counts reported in the kernel log. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1833 Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 76 +++++++++++++------ drivers/gpu/drm/amd/include/discovery.h | 49 ++++++++++++ 2 files changed, 103 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ea00090b3fb3..bcc9343353b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -526,10 +526,15 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) } } +union gc_info { + struct gc_info_v1_0 v1; + struct gc_info_v2_0 v2; +}; + int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) { struct binary_header *bhdr; - struct gc_info_v1_0 *gc_info; + union gc_info *gc_info; if (!adev->mman.discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); @@ -537,28 +542,55 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) } bhdr = (struct binary_header *)adev->mman.discovery_bin; - gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin + + gc_info = (union gc_info *)(adev->mman.discovery_bin + le16_to_cpu(bhdr->table_list[GC].offset)); - - adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); - adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + - le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); - adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); - adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); - adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); - adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); - adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); - adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); - adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); - adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); - adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); - adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); - adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); - adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / - le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); - + switch (gc_info->v1.header.version_major) { + case 1: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se); + adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) + + le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa)); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc); + break; + case 2: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); + adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc); + break; + default: + dev_err(adev->dev, + "Unhandled GC info table %d.%d\n", + gc_info->v1.header.version_major, + gc_info->v1.header.version_minor); + return -EINVAL; + } return 0; } diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 7ec4331e67f2..a486769b66c6 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -143,6 +143,55 @@ struct gc_info_v1_0 { uint32_t gc_num_gl2a; }; +struct gc_info_v1_1 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; +}; + +struct gc_info_v2_0 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_cu_per_sh; + uint32_t gc_num_sh_per_se; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_tccs; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_packer_per_sc; +}; + typedef struct harvest_info_header { uint32_t signature; /* Table Signature */ uint32_t version; /* Table Version */ From b1e0887379422975f237d43d8839b751a6bcf154 Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Sat, 18 Dec 2021 02:18:40 +0000 Subject: [PATCH 06/76] usb: gadget: f_fs: Clear ffs_eventfd in ffs_data_clear. ffs_data_clear is indirectly called from both ffs_fs_kill_sb and ffs_ep0_release, so it ends up being called twice when userland closes ep0 and then unmounts f_fs. If userland provided an eventfd along with function's USB descriptors, it ends up calling eventfd_ctx_put as many times, causing a refcount underflow. NULL-ify ffs_eventfd to prevent these extraneous eventfd_ctx_put calls. Also, set epfiles to NULL right after de-allocating it, for readability. For completeness, ffs_data_clear actually ends up being called thrice, the last call being before the whole ffs structure gets freed, so when this specific sequence happens there is a second underflow happening (but not being reported): /sys/kernel/debug/tracing# modprobe usb_f_fs /sys/kernel/debug/tracing# echo ffs_data_clear > set_ftrace_filter /sys/kernel/debug/tracing# echo function > current_tracer /sys/kernel/debug/tracing# echo 1 > tracing_on (setup gadget, run and kill function userland process, teardown gadget) /sys/kernel/debug/tracing# echo 0 > tracing_on /sys/kernel/debug/tracing# cat trace smartcard-openp-436 [000] ..... 1946.208786: ffs_data_clear <-ffs_data_closed smartcard-openp-431 [000] ..... 1946.279147: ffs_data_clear <-ffs_data_closed smartcard-openp-431 [000] .n... 1946.905512: ffs_data_clear <-ffs_data_put Warning output corresponding to above trace: [ 1946.284139] WARNING: CPU: 0 PID: 431 at lib/refcount.c:28 refcount_warn_saturate+0x110/0x15c [ 1946.293094] refcount_t: underflow; use-after-free. [ 1946.298164] Modules linked in: usb_f_ncm(E) u_ether(E) usb_f_fs(E) hci_uart(E) btqca(E) btrtl(E) btbcm(E) btintel(E) bluetooth(E) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) bcm2835_v4l2(CE) bcm2835_mmal_vchiq(CE) videobuf2_vmalloc(E) videobuf2_memops(E) sha512_generic(E) videobuf2_v4l2(E) sha512_arm(E) videobuf2_common(E) videodev(E) cpufreq_dt(E) snd_bcm2835(CE) brcmfmac(E) mc(E) vc4(E) ctr(E) brcmutil(E) snd_soc_core(E) snd_pcm_dmaengine(E) drbg(E) snd_pcm(E) snd_timer(E) snd(E) soundcore(E) drm_kms_helper(E) cec(E) ansi_cprng(E) rc_core(E) syscopyarea(E) raspberrypi_cpufreq(E) sysfillrect(E) sysimgblt(E) cfg80211(E) max17040_battery(OE) raspberrypi_hwmon(E) fb_sys_fops(E) regmap_i2c(E) ecdh_generic(E) rfkill(E) ecc(E) bcm2835_rng(E) rng_core(E) vchiq(CE) leds_gpio(E) libcomposite(E) fuse(E) configfs(E) ip_tables(E) x_tables(E) autofs4(E) ext4(E) crc16(E) mbcache(E) jbd2(E) crc32c_generic(E) sdhci_iproc(E) sdhci_pltfm(E) sdhci(E) [ 1946.399633] CPU: 0 PID: 431 Comm: smartcard-openp Tainted: G C OE 5.15.0-1-rpi #1 Debian 5.15.3-1 [ 1946.417950] Hardware name: BCM2835 [ 1946.425442] Backtrace: [ 1946.432048] [] (dump_backtrace) from [] (show_stack+0x20/0x24) [ 1946.448226] r7:00000009 r6:0000001c r5:c04a948c r4:c0a64e2c [ 1946.458412] [] (show_stack) from [] (dump_stack+0x28/0x30) [ 1946.470380] [] (dump_stack) from [] (__warn+0xe8/0x154) [ 1946.482067] r5:c04a948c r4:c0a71dc8 [ 1946.490184] [] (__warn) from [] (warn_slowpath_fmt+0xa0/0xe4) [ 1946.506758] r7:00000009 r6:0000001c r5:c0a71dc8 r4:c0a71e04 [ 1946.517070] [] (warn_slowpath_fmt) from [] (refcount_warn_saturate+0x110/0x15c) [ 1946.535309] r8:c0100224 r7:c0dfcb84 r6:ffffffff r5:c3b84c00 r4:c24a17c0 [ 1946.546708] [] (refcount_warn_saturate) from [] (eventfd_ctx_put+0x48/0x74) [ 1946.564476] [] (eventfd_ctx_put) from [] (ffs_data_clear+0xd0/0x118 [usb_f_fs]) [ 1946.582664] r5:c3b84c00 r4:c2695b00 [ 1946.590668] [] (ffs_data_clear [usb_f_fs]) from [] (ffs_data_closed+0x9c/0x150 [usb_f_fs]) [ 1946.609608] r5:bf54d014 r4:c2695b00 [ 1946.617522] [] (ffs_data_closed [usb_f_fs]) from [] (ffs_fs_kill_sb+0x2c/0x30 [usb_f_fs]) [ 1946.636217] r7:c0dfcb84 r6:c3a12260 r5:bf54d014 r4:c229f000 [ 1946.646273] [] (ffs_fs_kill_sb [usb_f_fs]) from [] (deactivate_locked_super+0x54/0x9c) [ 1946.664893] r5:bf54d014 r4:c229f000 [ 1946.672921] [] (deactivate_locked_super) from [] (deactivate_super+0x60/0x64) [ 1946.690722] r5:c2a09000 r4:c229f000 [ 1946.698706] [] (deactivate_super) from [] (cleanup_mnt+0xe4/0x14c) [ 1946.715553] r5:c2a09000 r4:00000000 [ 1946.723528] [] (cleanup_mnt) from [] (__cleanup_mnt+0x1c/0x20) [ 1946.739922] r7:c0dfcb84 r6:c3a12260 r5:c3a126fc r4:00000000 [ 1946.750088] [] (__cleanup_mnt) from [] (task_work_run+0x84/0xb8) [ 1946.766602] [] (task_work_run) from [] (do_work_pending+0x470/0x56c) [ 1946.783540] r7:5ac3c35a r6:c0d0424c r5:c200bfb0 r4:c200a000 [ 1946.793614] [] (do_work_pending) from [] (slow_work_pending+0xc/0x20) [ 1946.810553] Exception stack(0xc200bfb0 to 0xc200bff8) [ 1946.820129] bfa0: 00000000 00000000 000000aa b5e21430 [ 1946.837104] bfc0: bef867a0 00000001 bef86840 00000034 bef86838 bef86790 bef86794 bef867a0 [ 1946.854125] bfe0: 00000000 bef86798 b67b7a1c b6d626a4 60000010 b5a23760 [ 1946.865335] r10:00000000 r9:c200a000 r8:c0100224 r7:00000034 r6:bef86840 r5:00000001 [ 1946.881914] r4:bef867a0 [ 1946.888793] ---[ end trace 7387f2a9725b28d0 ]--- Fixes: 5e33f6fdf735 ("usb: gadget: ffs: add eventfd notification about ffs events") Cc: stable Signed-off-by: Vincent Pelletier Link: https://lore.kernel.org/r/f79eeea29f3f98de6782a064ec0f7351ad2f598f.1639793920.git.plr.vincent@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index e20c19a0f106..a7e069b18544 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1773,11 +1773,15 @@ static void ffs_data_clear(struct ffs_data *ffs) BUG_ON(ffs->gadget); - if (ffs->epfiles) + if (ffs->epfiles) { ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count); + ffs->epfiles = NULL; + } - if (ffs->ffs_eventfd) + if (ffs->ffs_eventfd) { eventfd_ctx_put(ffs->ffs_eventfd); + ffs->ffs_eventfd = NULL; + } kfree(ffs->raw_descs_data); kfree(ffs->raw_strings); @@ -1790,7 +1794,6 @@ static void ffs_data_reset(struct ffs_data *ffs) ffs_data_clear(ffs); - ffs->epfiles = NULL; ffs->raw_descs_data = NULL; ffs->raw_descs = NULL; ffs->raw_strings = NULL; From e3d4621c22f90c33321ae6a6baab60cdb8e5a77c Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:46 +0800 Subject: [PATCH 07/76] usb: mtu3: fix interval value for intr and isoc Use the Interval value from isoc/intr endpoint descriptor, no need minus one. The original code doesn't cause transfer error for normal cases, but it may have side effect with respond time of ERDY or tPingTimeout. Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-1-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index a9a65b4bbfed..c51be015345b 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -77,7 +77,7 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; + interval = clamp_val(interval, 1, 16); if (usb_endpoint_xfer_isoc(desc) && comp_desc) mult = comp_desc->bmAttributes; } @@ -89,7 +89,7 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_isoc(desc) || usb_endpoint_xfer_int(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; + interval = clamp_val(interval, 1, 16); mult = usb_endpoint_maxp_mult(desc) - 1; } break; From a7aae769ca626819a7f9f078ebdc69a8a1b00c81 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:47 +0800 Subject: [PATCH 08/76] usb: mtu3: add memory barrier before set GPD's HWO There is a seldom issue that the controller access invalid address and trigger devapc or emimpu violation. That is due to memory access is out of order and cause gpd data is not correct. Add mb() to prohibit compiler or cpu from reordering to make sure GPD is fully written before setting its HWO. Fixes: 48e0d3735aa5 ("usb: mtu3: supports new QMU format") Cc: stable@vger.kernel.org Reported-by: Eddie Hung Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-2-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_qmu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c index 3f414f91b589..2ea3157ddb6e 100644 --- a/drivers/usb/mtu3/mtu3_qmu.c +++ b/drivers/usb/mtu3/mtu3_qmu.c @@ -273,6 +273,8 @@ static int mtu3_prepare_tx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->dw3_info |= cpu_to_le32(GPD_EXT_FLAG_ZLP); } + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -306,6 +308,8 @@ static int mtu3_prepare_rx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->next_gpd = cpu_to_le32(lower_32_bits(enq_dma)); ext_addr |= GPD_EXT_NGP(mtu, upper_32_bits(enq_dma)); gpd->dw3_info = cpu_to_le32(ext_addr); + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -445,7 +449,8 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum) return; } mtu3_setbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_TXPKTRDY); - + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); /* by pass the current GDP */ gpd_current->dw0_info |= cpu_to_le32(GPD_FLAGS_BPS | GPD_FLAGS_HWO); From 8c313e3bfd9adae8d5c4ba1cc696dcbc86fbf9bf Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:48 +0800 Subject: [PATCH 09/76] usb: mtu3: fix list_head check warning This is caused by uninitialization of list_head. BUG: KASAN: use-after-free in __list_del_entry_valid+0x34/0xe4 Call trace: dump_backtrace+0x0/0x298 show_stack+0x24/0x34 dump_stack+0x130/0x1a8 print_address_description+0x88/0x56c __kasan_report+0x1b8/0x2a0 kasan_report+0x14/0x20 __asan_load8+0x9c/0xa0 __list_del_entry_valid+0x34/0xe4 mtu3_req_complete+0x4c/0x300 [mtu3] mtu3_gadget_stop+0x168/0x448 [mtu3] usb_gadget_unregister_driver+0x204/0x3a0 unregister_gadget_item+0x44/0xa4 Fixes: 83374e035b62 ("usb: mtu3: add tracepoints to help debug") Cc: stable@vger.kernel.org Reported-by: Yuwen Ng Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-3-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index c51be015345b..b6c8a4a99c4d 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -235,6 +235,7 @@ struct usb_request *mtu3_alloc_request(struct usb_ep *ep, gfp_t gfp_flags) mreq->request.dma = DMA_ADDR_INVALID; mreq->epnum = mep->epnum; mreq->mep = mep; + INIT_LIST_HEAD(&mreq->list); trace_mtu3_alloc_request(mreq); return &mreq->request; From 43f3b8cbcf93da7c2755af4a543280c31f4adf16 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Sat, 18 Dec 2021 17:57:49 +0800 Subject: [PATCH 10/76] usb: mtu3: set interval of FS intr and isoc endpoint Add support to set interval also for FS intr and isoc endpoint. Fixes: 4d79e042ed8b ("usb: mtu3: add support for usb3.1 IP") Cc: stable@vger.kernel.org Signed-off-by: Chunfeng Yun Link: https://lore.kernel.org/r/20211218095749.6250-4-chunfeng.yun@mediatek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/mtu3_gadget.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index b6c8a4a99c4d..9977600616d7 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -92,6 +92,13 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) interval = clamp_val(interval, 1, 16); mult = usb_endpoint_maxp_mult(desc) - 1; } + break; + case USB_SPEED_FULL: + if (usb_endpoint_xfer_isoc(desc)) + interval = clamp_val(desc->bInterval, 1, 16); + else if (usb_endpoint_xfer_int(desc)) + interval = clamp_val(desc->bInterval, 1, 255); + break; default: break; /*others are ignored */ From 67f74302f45d5d862f22ced3297624e50ac352f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 7 Dec 2021 10:10:15 +0100 Subject: [PATCH 11/76] drm/nouveau: wait for the exclusive fence after the shared ones v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always waiting for the exclusive fence resulted on some performance regressions. So try to wait for the shared fences first, then the exclusive fence should always be signaled already. v2: fix incorrectly placed "(", add some comment why we do this. Signed-off-by: Christian König Tested-by: Stefan Fritsch Tested-by: Dan Moulding Acked-by: Ben Skeggs Signed-off-by: Christian König Cc: Link: https://patchwork.freedesktop.org/patch/msgid/20211209102335.18321-1-christian.koenig@amd.com --- drivers/gpu/drm/nouveau/nouveau_fence.c | 54 +++++++++++++------------ 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 05d0b3eb3690..0ae416aa76dc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -353,34 +353,16 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e if (ret) return ret; + + fobj = NULL; + } else { + fobj = dma_resv_shared_list(resv); } - fobj = dma_resv_shared_list(resv); - fence = dma_resv_excl_fence(resv); - - if (fence) { - struct nouveau_channel *prev = NULL; - bool must_wait = true; - - f = nouveau_local_fence(fence, chan->drm); - if (f) { - rcu_read_lock(); - prev = rcu_dereference(f->channel); - if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) - must_wait = false; - rcu_read_unlock(); - } - - if (must_wait) - ret = dma_fence_wait(fence, intr); - - return ret; - } - - if (!exclusive || !fobj) - return ret; - - for (i = 0; i < fobj->shared_count && !ret; ++i) { + /* Waiting for the exclusive fence first causes performance regressions + * under some circumstances. So manually wait for the shared ones first. + */ + for (i = 0; i < (fobj ? fobj->shared_count : 0) && !ret; ++i) { struct nouveau_channel *prev = NULL; bool must_wait = true; @@ -400,6 +382,26 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e ret = dma_fence_wait(fence, intr); } + fence = dma_resv_excl_fence(resv); + if (fence) { + struct nouveau_channel *prev = NULL; + bool must_wait = true; + + f = nouveau_local_fence(fence, chan->drm); + if (f) { + rcu_read_lock(); + prev = rcu_dereference(f->channel); + if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0)) + must_wait = false; + rcu_read_unlock(); + } + + if (must_wait) + ret = dma_fence_wait(fence, intr); + + return ret; + } + return ret; } From cfd0d84ba28c18b531648c9d4a35ecca89ad9901 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Mon, 20 Dec 2021 11:01:50 -0800 Subject: [PATCH 12/76] binder: fix async_free_space accounting for empty parcels In 4.13, commit 74310e06be4d ("android: binder: Move buffer out of area shared with user space") fixed a kernel structure visibility issue. As part of that patch, sizeof(void *) was used as the buffer size for 0-length data payloads so the driver could detect abusive clients sending 0-length asynchronous transactions to a server by enforcing limits on async_free_size. Unfortunately, on the "free" side, the accounting of async_free_space did not add the sizeof(void *) back. The result was that up to 8-bytes of async_free_space were leaked on every async transaction of 8-bytes or less. These small transactions are uncommon, so this accounting issue has gone undetected for several years. The fix is to use "buffer_size" (the allocated buffer size) instead of "size" (the logical buffer size) when updating the async_free_space during the free operation. These are the same except for this corner case of asynchronous transactions with payloads < 8 bytes. Fixes: 74310e06be4d ("android: binder: Move buffer out of area shared with user space") Signed-off-by: Todd Kjos Cc: stable@vger.kernel.org # 4.14+ Link: https://lore.kernel.org/r/20211220190150.2107077-1-tkjos@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 340515f54498..47bc74a8c7b6 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -671,7 +671,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size); if (buffer->async_transaction) { - alloc->free_async_space += size + sizeof(struct binder_buffer); + alloc->free_async_space += buffer_size + sizeof(struct binder_buffer); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_free_buf size %zd async free %zd\n", From 3a0152b219523227c2a62a0a122cf99608287176 Mon Sep 17 00:00:00 2001 From: Andra Paraschiv Date: Mon, 20 Dec 2021 19:58:56 +0000 Subject: [PATCH 13/76] nitro_enclaves: Use get_user_pages_unlocked() call to handle mmap assert After commit 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()"), the call to get_user_pages() will trigger the mmap assert. static inline void mmap_assert_locked(struct mm_struct *mm) { lockdep_assert_held(&mm->mmap_lock); VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); } [ 62.521410] kernel BUG at include/linux/mmap_lock.h:156! ........................................................... [ 62.538938] RIP: 0010:find_vma+0x32/0x80 ........................................................... [ 62.605889] Call Trace: [ 62.608502] [ 62.610956] ? lock_timer_base+0x61/0x80 [ 62.614106] find_extend_vma+0x19/0x80 [ 62.617195] __get_user_pages+0x9b/0x6a0 [ 62.620356] __gup_longterm_locked+0x42d/0x450 [ 62.623721] ? finish_wait+0x41/0x80 [ 62.626748] ? __kmalloc+0x178/0x2f0 [ 62.629768] ne_set_user_memory_region_ioctl.isra.0+0x225/0x6a0 [nitro_enclaves] [ 62.635776] ne_enclave_ioctl+0x1cf/0x6d7 [nitro_enclaves] [ 62.639541] __x64_sys_ioctl+0x82/0xb0 [ 62.642620] do_syscall_64+0x3b/0x90 [ 62.645642] entry_SYSCALL_64_after_hwframe+0x44/0xae Use get_user_pages_unlocked() when setting the enclave memory regions. That's a similar pattern as mmap_read_lock() used together with get_user_pages(). Fixes: 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()") Cc: stable@vger.kernel.org Signed-off-by: Andra Paraschiv Link: https://lore.kernel.org/r/20211220195856.6549-1-andraprs@amazon.com Signed-off-by: Greg Kroah-Hartman --- drivers/virt/nitro_enclaves/ne_misc_dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c index 8939612ee0e0..6894ccb868a6 100644 --- a/drivers/virt/nitro_enclaves/ne_misc_dev.c +++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c @@ -886,8 +886,9 @@ static int ne_set_user_memory_region_ioctl(struct ne_enclave *ne_enclave, goto put_pages; } - gup_rc = get_user_pages(mem_region.userspace_addr + memory_size, 1, FOLL_GET, - ne_mem_region->pages + i, NULL); + gup_rc = get_user_pages_unlocked(mem_region.userspace_addr + memory_size, 1, + ne_mem_region->pages + i, FOLL_GET); + if (gup_rc < 0) { rc = gup_rc; From e4844092581ceec22489b66c42edc88bc6079783 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 21 Dec 2021 13:28:25 +0200 Subject: [PATCH 14/76] xhci: Fresco FL1100 controller should not have BROKEN_MSI quirk set. The Fresco Logic FL1100 controller needs the TRUST_TX_LENGTH quirk like other Fresco controllers, but should not have the BROKEN_MSI quirks set. BROKEN_MSI quirk causes issues in detecting usb drives connected to docks with this FL1100 controller. The BROKEN_MSI flag was apparently accidentally set together with the TRUST_TX_LENGTH quirk Original patch went to stable so this should go there as well. Fixes: ea0f69d82119 ("xhci: Enable trust tx length quirk for Fresco FL11 USB controller") Cc: stable@vger.kernel.org cc: Nikolay Martynov Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20211221112825.54690-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 3af017883231..5c351970cdf1 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -123,7 +123,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) /* Look for vendor-specific quirks */ if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK || - pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100 || pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1400)) { if (pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_PDK && pdev->revision == 0x0) { @@ -158,6 +157,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009) xhci->quirks |= XHCI_BROKEN_STREAMS; + if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && + pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; From 3f345e907a8e7c56fdebf7231cd67afc85d02aaa Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 21 Dec 2021 17:03:52 +0300 Subject: [PATCH 15/76] usb: typec: ucsi: Only check the contract if there is a connection The driver must make sure there is an actual connection before checking details about the USB Power Delivery contract. Those details are not valid unless there is a connection. This fixes NULL pointer dereference that is caused by an attempt to register bogus partner alternate mode that the firmware on some platform may report before the actual connection. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215117 Fixes: 6cbe4b2d5a3f ("usb: typec: ucsi: Check the partner alt modes always if there is PD contract") Reported-by: Chris Hixon Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/eb34f98f-00ef-3238-2daa-80481116035d@leemhuis.info/ Link: https://lore.kernel.org/r/20211221140352.45501-1-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 6aa28384f77f..08561bf7c40c 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -1150,7 +1150,9 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) ret = 0; } - if (UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == UCSI_CONSTAT_PWR_OPMODE_PD) { + if (con->partner && + UCSI_CONSTAT_PWR_OPMODE(con->status.flags) == + UCSI_CONSTAT_PWR_OPMODE_PD) { ucsi_get_src_pdos(con); ucsi_check_altmodes(con); } From 4d625a97a7e96be016382e3bb0a3cead05fec153 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Dec 2021 09:54:40 -0500 Subject: [PATCH 16/76] drm/amdgpu: fix runpm documentation It's not only supported by HG/PX laptops. It's supported by all dGPUs which supports BOCO/BACO functionality (runtime D3). BOCO - Bus Off, Chip Off. The entire chip is powered off. This is controlled by ACPI. BACO - Bus Active, Chip Off. The chip still shows up on the PCI bus, but the device itself is powered down. v2: fix missed HG/PX reference Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ad95de6399af..73ac02372827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -328,10 +328,11 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); /** * DOC: runpm (int) - * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down - * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality. + * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down + * the dGPUs when they are idle if supported. The default is -1 (auto enable). + * Setting the value to 0 disables this functionality. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** From 6b8b42585886c59a008015083282aae434349094 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 22 Dec 2021 06:54:53 +0000 Subject: [PATCH 17/76] net/mlx5: DR, Fix NULL vs IS_ERR checking in dr_domain_init_resources The mlx5_get_uars_page() function returns error pointers. Using IS_ERR() to check the return value to fix this. Fixes: 4ec9e7b02697 ("net/mlx5: DR, Expose steering domain functionality") Signed-off-by: Miaoqian Lin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 8cbd36c82b3b..f6e6d9209766 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include +#include #include "dr_types.h" #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ @@ -72,9 +73,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) } dmn->uar = mlx5_get_uars_page(dmn->mdev); - if (!dmn->uar) { + if (IS_ERR(dmn->uar)) { mlx5dr_err(dmn, "Couldn't allocate UAR\n"); - ret = -ENOMEM; + ret = PTR_ERR(dmn->uar); goto clean_pd; } From 624bf42c2e3930acca9fcfc340b2fa38e712da84 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 12 Dec 2021 16:19:58 +0200 Subject: [PATCH 18/76] net/mlx5: DR, Fix querying eswitch manager vport for ECPF On BlueField the E-Switch manager is the ECPF (vport 0xFFFE), but when querying capabilities of ECPF eswitch manager, need to query vport 0 with other_vport = 0. Fixes: 9091b821aaa4 ("net/mlx5: DR, Handle eswitch manager and uplink vports separately") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index f6e6d9209766..c54cc45f63dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -164,9 +164,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn, static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn) { - return dr_domain_query_vport(dmn, - dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0, - false, + return dr_domain_query_vport(dmn, 0, false, &dmn->info.caps.vports.esw_manager_caps); } From 26a7993c93a74a3fee83a37b46e00e69e49e57c2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 26 Oct 2021 08:25:19 +0300 Subject: [PATCH 19/76] net/mlx5: Use first online CPU instead of hard coded CPU Hard coded CPU (0 in our case) might be offline. Hence, use the first online CPU instead. Fixes: f891b7cdbdcd ("net/mlx5: Enable single IRQ for PCI Function") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 830444f927d4..0e84c005d160 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -398,7 +398,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx, cpumask_copy(irq->mask, affinity); if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max && cpumask_empty(irq->mask)) - cpumask_set_cpu(0, irq->mask); + cpumask_set_cpu(cpumask_first(cpu_online_mask), irq->mask); irq_set_affinity_hint(irq->irqn, irq->mask); unlock: mutex_unlock(&pool->lock); From aa968f922039706f6d13e8870b49e424d0a8d9ad Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 24 Nov 2021 23:10:57 +0200 Subject: [PATCH 20/76] net/mlx5: Fix error print in case of IRQ request failed In case IRQ layer failed to find or to request irq, the driver is printing the first cpu of the provided affinity as part of the error print. Empty affinity is a valid input for the IRQ layer, and it is an error to call cpumask_first() on empty affinity. Remove the first cpu print from the error message. Fixes: c36326d38d93 ("net/mlx5: Round-Robin EQs over IRQs") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 0e84c005d160..bcee30f5de0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -356,8 +356,8 @@ static struct mlx5_irq *irq_pool_request_affinity(struct mlx5_irq_pool *pool, new_irq = irq_pool_create_irq(pool, affinity); if (IS_ERR(new_irq)) { if (!least_loaded_irq) { - mlx5_core_err(pool->dev, "Didn't find IRQ for cpu = %u\n", - cpumask_first(affinity)); + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", + PTR_ERR(new_irq)); mutex_unlock(&pool->lock); return new_irq; } From 33de865f7bce3968676e43b0182af0a2dd359dae Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 23 Nov 2021 20:08:13 +0200 Subject: [PATCH 21/76] net/mlx5: Fix SF health recovery flow SF do not directly control the PCI device. During recovery flow SF should not be allowed to do pci disable or pci reset, its PF will do it. It fixes the following kernel trace: mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:387:(pid 40948): starting health recovery flow mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called mlx5_core 0000:03:00.0: wait vital counter value 0xab175 after 1 iterations mlx5_core.sf mlx5_core.sf.25: firmware version: 24.32.532 mlx5_core.sf mlx5_core.sf.23: mlx5_health_try_recover:387:(pid 40946): starting health recovery flow mlx5_core 0000:03:00.0: mlx5_pci_slot_reset was called mlx5_core 0000:03:00.0: wait vital counter value 0xab193 after 1 iterations mlx5_core.sf mlx5_core.sf.23: firmware version: 24.32.532 mlx5_core.sf mlx5_core.sf.25: mlx5_cmd_check:813:(pid 40948): ENABLE_HCA(0x104) op_mod(0x0) failed, status bad resource state(0x9), syndrome (0x658908) mlx5_core.sf mlx5_core.sf.25: mlx5_function_setup:1292:(pid 40948): enable hca failed mlx5_core.sf mlx5_core.sf.25: mlx5_health_try_recover:389:(pid 40948): health recovery failed Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 7df9c7f8d9c8..65083496f913 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1809,12 +1809,13 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) int mlx5_recover_device(struct mlx5_core_dev *dev) { - int ret = -EIO; + if (!mlx5_core_is_sf(dev)) { + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) + return -EIO; + } - mlx5_pci_disable_device(dev); - if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) - ret = mlx5_load_one(dev); - return ret; + return mlx5_load_one(dev); } static struct pci_driver mlx5_core_driver = { From d671e109bd8548d067b27e39e183a484430bf102 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 14 Dec 2021 03:52:53 +0200 Subject: [PATCH 22/76] net/mlx5: Fix tc max supported prio for nic mode Only prio 1 is supported if firmware doesn't support ignore flow level for nic mode. The offending commit removed the check wrongly. Add it back. Fixes: 9a99c8f1253a ("net/mlx5e: E-Switch, Offload all chain 0 priorities when modify header and forward action is not supported") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 97e5845b4cfd..d5e47630e284 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -121,6 +121,9 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { + if (!mlx5_chains_prios_supported(chains)) + return 1; + if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; From 918fc3855a6507a200e9cf22c20be852c0982687 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Tue, 30 Nov 2021 16:05:44 +0200 Subject: [PATCH 23/76] net/mlx5e: Wrap the tx reporter dump callback to extract the sq Function mlx5e_tx_reporter_dump_sq() casts its void * argument to struct mlx5e_txqsq *, but in TX-timeout-recovery flow the argument is actually of type struct mlx5e_tx_timeout_ctx *. mlx5_core 0000:08:00.1 enp8s0f1: TX timeout detected mlx5_core 0000:08:00.1 enp8s0f1: TX timeout on queue: 1, SQ: 0x11ec, CQ: 0x146d, SQ Cons: 0x0 SQ Prod: 0x1, usecs since last trans: 21565000 BUG: stack guard page was hit at 0000000093f1a2de (stack is 00000000b66ea0dc..000000004d932dae) kernel stack overflow (page fault): 0000 [#1] SMP NOPTI CPU: 5 PID: 95 Comm: kworker/u20:1 Tainted: G W OE 5.13.0_mlnx #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 [mlx5_core] Call Trace: mlx5e_tx_reporter_dump+0x43/0x1c0 [mlx5_core] devlink_health_do_dump.part.91+0x71/0xd0 devlink_health_report+0x157/0x1b0 mlx5e_reporter_tx_timeout+0xb9/0xf0 [mlx5_core] ? mlx5e_tx_reporter_err_cqe_recover+0x1d0/0x1d0 [mlx5_core] ? mlx5e_health_queue_dump+0xd0/0xd0 [mlx5_core] ? update_load_avg+0x19b/0x550 ? set_next_entity+0x72/0x80 ? pick_next_task_fair+0x227/0x340 ? finish_task_switch+0xa2/0x280 mlx5e_tx_timeout_work+0x83/0xb0 [mlx5_core] process_one_work+0x1de/0x3a0 worker_thread+0x2d/0x3c0 ? process_one_work+0x3a0/0x3a0 kthread+0x115/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x30 --[ end trace 51ccabea504edaff ]--- RIP: 0010:mlx5e_tx_reporter_dump_sq+0xd3/0x180 PKRU: 55555554 Kernel panic - not syncing: Fatal exception Kernel Offset: disabled end Kernel panic - not syncing: Fatal exception To fix this bug add a wrapper for mlx5e_tx_reporter_dump_sq() which extracts the sq from struct mlx5e_tx_timeout_ctx and set it as the TX-timeout-recovery flow dump callback. Fixes: 5f29458b77d5 ("net/mlx5e: Support dump callback in TX reporter") Signed-off-by: Aya Levin Signed-off-by: Amir Tzin Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 4f4bc8726ec4..614cd9477600 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -466,6 +466,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); +} + static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { @@ -561,7 +569,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) to_ctx.sq = sq; err_ctx.ctx = &to_ctx; err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - err_ctx.dump = mlx5e_tx_reporter_dump_sq; + err_ctx.dump = mlx5e_tx_reporter_timeout_dump; snprintf(err_str, sizeof(err_str), "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", sq->ch_ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, From a0cb909644c36230a3c48904d14b91732de79fc0 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 13 Dec 2021 11:05:11 +0200 Subject: [PATCH 24/76] net/mlx5e: Fix skb memory leak when TC classifier action offloads are disabled When TC classifier action offloads are disabled (CONFIG_MLX5_CLS_ACT in Kconfig), the mlx5e_rep_tc_receive() function which is responsible for passing the skb to the stack (or freeing it) is defined as a nop, and results in leaking the skb memory. Replace the nop with a call to napi_gro_receive() to resolve the leak. Fixes: 28e7606fa8f1 ("net/mlx5e: Refactor rx handler of represetor device") Signed-off-by: Gal Pressman Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h index d6c7c81690eb..7c9dd3a75f8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h @@ -66,7 +66,7 @@ mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, static inline void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, - struct sk_buff *skb) {} + struct sk_buff *skb) { napi_gro_receive(rq->cq.napi, skb); } #endif /* CONFIG_MLX5_CLS_ACT */ From 17958d7cd731b977ae7d4af38d891c3a1235b5f1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 12 Oct 2021 19:40:09 +0300 Subject: [PATCH 25/76] net/mlx5e: Fix interoperability between XSK and ICOSQ recovery flow Both regular RQ and XSKRQ use the same ICOSQ for UMRs. When doing recovery for the ICOSQ, don't forget to deactivate XSKRQ. XSK can be opened and closed while channels are active, so a new mutex prevents the ICOSQ recovery from running at the same time. The ICOSQ recovery deactivates and reactivates XSKRQ, so any parallel change in XSK state would break consistency. As the regular RQ is running, it's not enough to just flush the recovery work, because it can be rescheduled. Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ .../ethernet/mellanox/mlx5/core/en/health.h | 2 ++ .../mellanox/mlx5/core/en/reporter_rx.c | 35 ++++++++++++++++++- .../mellanox/mlx5/core/en/xsk/setup.c | 16 ++++++++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++-- 5 files changed, 58 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f0ac6b0d9653..f42067adc79d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -783,6 +783,8 @@ struct mlx5e_channel { DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; + /* Sync between icosq recovery and XSK enable/disable. */ + struct mutex icosq_recovery_lock; }; struct mlx5e_ptp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index d5b7110a4265..0107e4e73bb0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -30,6 +30,8 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c); +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 74086eb556ae..2684e9da9f41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -62,6 +62,7 @@ static void mlx5e_reset_icosq_cc_pc(struct mlx5e_icosq *icosq) static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) { + struct mlx5e_rq *xskrq = NULL; struct mlx5_core_dev *mdev; struct mlx5e_icosq *icosq; struct net_device *dev; @@ -70,7 +71,13 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) int err; icosq = ctx; + + mutex_lock(&icosq->channel->icosq_recovery_lock); + + /* mlx5e_close_rq cancels this work before RQ and ICOSQ are killed. */ rq = &icosq->channel->rq; + if (test_bit(MLX5E_RQ_STATE_ENABLED, &icosq->channel->xskrq.state)) + xskrq = &icosq->channel->xskrq; mdev = icosq->channel->mdev; dev = icosq->channel->netdev; err = mlx5_core_query_sq_state(mdev, icosq->sqn, &state); @@ -84,6 +91,9 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_deactivate_rq(rq); + if (xskrq) + mlx5e_deactivate_rq(xskrq); + err = mlx5e_wait_for_icosq_flush(icosq); if (err) goto out; @@ -97,15 +107,28 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_reset_icosq_cc_pc(icosq); + mlx5e_free_rx_in_progress_descs(rq); + if (xskrq) + mlx5e_free_rx_in_progress_descs(xskrq); + clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); - mlx5e_activate_rq(rq); + mlx5e_activate_rq(rq); rq->stats->recover++; + + if (xskrq) { + mlx5e_activate_rq(xskrq); + xskrq->stats->recover++; + } + + mutex_unlock(&icosq->channel->icosq_recovery_lock); + return 0; out: clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); + mutex_unlock(&icosq->channel->icosq_recovery_lock); return err; } @@ -706,6 +729,16 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq) mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx); } +void mlx5e_reporter_icosq_suspend_recovery(struct mlx5e_channel *c) +{ + mutex_lock(&c->icosq_recovery_lock); +} + +void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) +{ + mutex_unlock(&c->icosq_recovery_lock); +} + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 538bc2419bd8..8526a5fbbf0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -4,6 +4,7 @@ #include "setup.h" #include "en/params.h" #include "en/txrx.h" +#include "en/health.h" /* It matches XDP_UMEM_MIN_CHUNK_SIZE, but as this constant is private and may * change unexpectedly, and mlx5e has a minimum valid stride size for striding @@ -170,7 +171,13 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) void mlx5e_activate_xsk(struct mlx5e_channel *c) { + /* ICOSQ recovery deactivates RQs. Suspend the recovery to avoid + * activating XSKRQ in the middle of recovery. + */ + mlx5e_reporter_icosq_suspend_recovery(c); set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + /* TX queue is created active. */ spin_lock_bh(&c->async_icosq_lock); @@ -180,6 +187,13 @@ void mlx5e_activate_xsk(struct mlx5e_channel *c) void mlx5e_deactivate_xsk(struct mlx5e_channel *c) { - mlx5e_deactivate_rq(&c->xskrq); + /* ICOSQ recovery may reactivate XSKRQ if clear_bit is called in the + * middle of recovery. Suspend the recovery to avoid it. + */ + mlx5e_reporter_icosq_suspend_recovery(c); + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + mlx5e_reporter_icosq_resume_recovery(c); + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + /* TX queue is disabled on close. */ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 65571593ec5c..a572fc9690ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1087,8 +1087,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq) void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); - if (rq->icosq) - cancel_work_sync(&rq->icosq->recover_work); cancel_work_sync(&rq->recover_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); @@ -2088,6 +2086,8 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_xdpsq_cq; + mutex_init(&c->icosq_recovery_lock); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); if (err) goto err_close_async_icosq; @@ -2156,9 +2156,12 @@ static void mlx5e_close_queues(struct mlx5e_channel *c) mlx5e_close_xdpsq(&c->xdpsq); if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); + /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */ + cancel_work_sync(&c->icosq.recover_work); mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); + mutex_destroy(&c->icosq_recovery_lock); mlx5e_close_icosq(&c->async_icosq); if (c->xdp) mlx5e_close_cq(&c->rq_xdpsq.cq); From 19c4aba2d4e23997061fb11aed8a3e41334bfa14 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 22 Jul 2020 16:32:44 +0300 Subject: [PATCH 26/76] net/mlx5e: Fix ICOSQ recovery flow for XSK There are two ICOSQs per channel: one is needed for RX, and the other for async operations (XSK TX, kTLS offload). Currently, the recovery flow for both is the same, and async ICOSQ is mistakenly treated like the regular ICOSQ. This patch prevents running the regular ICOSQ recovery on async ICOSQ. The purpose of async ICOSQ is to handle XSK wakeup requests and post kTLS offload RX parameters, it has nothing to do with RQ and XSKRQ UMRs, so the regular recovery sequence is not applicable here. Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 -- .../net/ethernet/mellanox/mlx5/core/en_main.c | 30 ++++++++++++++----- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f42067adc79d..b47a0d3ef22f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1016,9 +1016,6 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param); void mlx5e_destroy_rq(struct mlx5e_rq *rq); struct mlx5e_sq_param; -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); -void mlx5e_close_icosq(struct mlx5e_icosq *sq); int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a572fc9690ed..3b0f3a831216 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1214,9 +1214,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_icosq_cqe_err(sq); } +static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + /* Not implemented yet. */ + + netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); +} + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) + struct mlx5e_icosq *sq, + work_func_t recover_work_func) { void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); struct mlx5_core_dev *mdev = c->mdev; @@ -1237,7 +1248,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); + INIT_WORK(&sq->recover_work, recover_work_func); return 0; @@ -1573,13 +1584,14 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_tx_err_cqe(sq); } -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, + work_func_t recover_work_func) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_icosq(c, param, sq); + err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); if (err) return err; @@ -1618,7 +1630,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) synchronize_net(); /* Sync with NAPI. */ } -void mlx5e_close_icosq(struct mlx5e_icosq *sq) +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -2082,13 +2094,15 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, spin_lock_init(&c->async_icosq_lock); - err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq); + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, + mlx5e_async_icosq_err_cqe_work); if (err) goto err_close_xdpsq_cq; mutex_init(&c->icosq_recovery_lock); - err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, + mlx5e_icosq_err_cqe_work); if (err) goto err_close_async_icosq; From 2820110d945923ab2f4901753e4ccbb2a506fa8e Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 2 Dec 2021 11:18:02 +0800 Subject: [PATCH 27/76] net/mlx5e: Delete forward rule for ct or sample action When there is ct or sample action, the ct or sample rule will be deleted and return. But if there is an extra mirror action, the forward rule can't be deleted because of the return. Fix it by removing the return. Fixes: 69e2916ebce4 ("net/mlx5: CT: Add support for mirroring") Fixes: f94d6389f6a8 ("net/mlx5e: TC, Add support to offload sample action") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3d45f4ae80c0..f633448c3cc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1196,21 +1196,16 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) goto offload_rule_0; - if (flow_flag_test(flow, CT)) { - mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); - return; - } - - if (flow_flag_test(flow, SAMPLE)) { - mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); - return; - } - if (attr->esw_attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + if (flow_flag_test(flow, CT)) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + else if (flow_flag_test(flow, SAMPLE)) + mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr); + else offload_rule_0: - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); } struct mlx5_flow_handle * From 4390c6edc0fb390e699d0f886f45575dfeafeb4b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Nov 2021 18:08:11 +0100 Subject: [PATCH 28/76] net/mlx5: Fix some error handling paths in 'mlx5e_tc_add_fdb_flow()' All the error handling paths of 'mlx5e_tc_add_fdb_flow()' end to 'err_out' where 'flow_flag_set(flow, FAILED);' is called. All but the new error handling paths added by the commits given in the Fixes tag below. Fix these error handling paths and branch to 'err_out'. Fixes: 166f431ec6be ("net/mlx5e: Add indirect tc offload of ovs internal port") Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Christophe JAILLET Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed (cherry picked from commit 31108d142f3632970f6f3e0224bd1c6781c9f87d) --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index f633448c3cc7..a60c7680fd2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1440,7 +1440,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG, metadata); if (err) - return err; + goto err_out; } } @@ -1456,13 +1456,15 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (attr->chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule is only supported on chain 0"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } if (attr->dest_chain) { NL_SET_ERR_MSG_MOD(extack, "Internal port rule offload doesn't support goto action"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_out; } int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), @@ -1470,8 +1472,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, flow_flag_test(flow, EGRESS) ? MLX5E_TC_INT_PORT_EGRESS : MLX5E_TC_INT_PORT_INGRESS); - if (IS_ERR(int_port)) - return PTR_ERR(int_port); + if (IS_ERR(int_port)) { + err = PTR_ERR(int_port); + goto err_out; + } esw_attr->int_port = int_port; } From 736ef37fd9a44f5966e25319d08ff7ea99ac79e8 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Thu, 23 Dec 2021 22:24:40 +0000 Subject: [PATCH 29/76] udp: using datalen to cap ipv6 udp max gso segments The max number of UDP gso segments is intended to cap to UDP_MAX_SEGMENTS, this is checked in udp_send_skb(). skb->len contains network and transport header len here, we should use only data len instead. This is the ipv6 counterpart to the below referenced commit, which missed the ipv6 change Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") Signed-off-by: Coco Li Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211223222441.2975883-1-lixiaoyan@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a2caca6ccf11..8cde9efd7919 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1204,7 +1204,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } From 5471d5226c3b39b3d2f7011c082d5715795bd65c Mon Sep 17 00:00:00 2001 From: Coco Li Date: Thu, 23 Dec 2021 22:24:41 +0000 Subject: [PATCH 30/76] selftests: Calculate udpgso segment count without header adjustment The below referenced commit correctly updated the computation of number of segments (gso_size) by using only the gso payload size and removing the header lengths. With this change the regression test started failing. Update the tests to match this new behavior. Both IPv4 and IPv6 tests are updated, as a separate patch in this series will update udp_v6_send_skb to match this change in udp_send_skb. Fixes: 158390e45612 ("udp: using datalen to cap max gso segments") Signed-off-by: Coco Li Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211223222441.2975883-2-lixiaoyan@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index c66da6ffd6d8..7badaf215de2 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -156,13 +156,13 @@ struct testcase testcases_v4[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, @@ -259,13 +259,13 @@ struct testcase testcases_v6[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, From b45396afa4177f2b1ddfeff7185da733fade1dc3 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 24 Dec 2021 02:14:59 +0000 Subject: [PATCH 31/76] net: phy: fixed_phy: Fix NULL vs IS_ERR() checking in __fixed_phy_register The fixed_phy_get_gpiod function() returns NULL, it doesn't return error pointers, using NULL checking to fix this.i Fixes: 5468e82f7034 ("net: phy: fixed-phy: Drop GPIO from fixed_phy_add()") Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20211224021500.10362-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/fixed_phy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index c65fb5f5d2dc..a0c256bd5441 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -239,8 +239,8 @@ static struct phy_device *__fixed_phy_register(unsigned int irq, /* Check if we have a GPIO associated with this fixed phy */ if (!gpiod) { gpiod = fixed_phy_get_gpiod(np); - if (IS_ERR(gpiod)) - return ERR_CAST(gpiod); + if (!gpiod) + return ERR_PTR(-EINVAL); } /* Get the next available PHY address, up to PHY_MAX_ADDR */ From 5ec7d18d1813a5bead0b495045606c93873aecbb Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 23 Dec 2021 13:04:30 -0500 Subject: [PATCH 32/76] sctp: use call_rcu to free endpoint This patch is to delay the endpoint free by calling call_rcu() to fix another use-after-free issue in sctp_sock_dump(): BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20 Call Trace: __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218 lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline] _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168 spin_lock_bh include/linux/spinlock.h:334 [inline] __lock_sock+0x203/0x350 net/core/sock.c:2253 lock_sock_nested+0xfe/0x120 net/core/sock.c:2774 lock_sock include/net/sock.h:1492 [inline] sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324 sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091 sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527 __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049 inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065 netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274 This issue occurs when asoc is peeled off and the old sk is freed after getting it by asoc->base.sk and before calling lock_sock(sk). To prevent the sk free, as a holder of the sk, ep should be alive when calling lock_sock(). This patch uses call_rcu() and moves sock_put and ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to hold the ep under rcu_read_lock in sctp_transport_traverse_process(). If sctp_endpoint_hold() returns true, it means this ep is still alive and we have held it and can continue to dump it; If it returns false, it means this ep is dead and can be freed after rcu_read_unlock, and we should skip it. In sctp_sock_dump(), after locking the sk, if this ep is different from tsp->asoc->ep, it means during this dumping, this asoc was peeled off before calling lock_sock(), and the sk should be skipped; If this ep is the same with tsp->asoc->ep, it means no peeloff happens on this asoc, and due to lock_sock, no peeloff will happen either until release_sock. Note that delaying endpoint free won't delay the port release, as the port release happens in sctp_endpoint_destroy() before calling call_rcu(). Also, freeing endpoint by call_rcu() makes it safe to access the sk by asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv(). Thanks Jones to bring this issue up. v1->v2: - improve the changelog. - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed. Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com Reported-by: Lee Jones Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 +++--- include/net/sctp/structs.h | 3 ++- net/sctp/diag.c | 12 ++++++------ net/sctp/endpointola.c | 23 +++++++++++++++-------- net/sctp/socket.c | 23 +++++++++++++++-------- 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 189fdb9db162..d314a180ab93 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -105,6 +105,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, @@ -115,9 +116,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 899c29c326ba..8dabd8800006 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1355,6 +1355,7 @@ struct sctp_endpoint { reconf_enable:1; __u8 strreset_enable; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1370,7 +1371,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 760b367644c1..a7d623171501 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -290,9 +290,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) return err; } -static int sctp_sock_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; @@ -302,6 +301,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) int err = 0; lock_sock(sk); + if (ep != tsp->asoc->ep) + goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -344,9 +345,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) return err; } -static int sctp_sock_filter(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; @@ -505,8 +505,8 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, - net, &pos, &commp); + sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, + net, &pos, &commp); cb->args[2] = pos; done: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 48c9c2c7602f..efffde7f2328 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) } /* Final destructor for endpoint. */ +static void sctp_endpoint_destroy_rcu(struct rcu_head *head) +{ + struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); + struct sock *sk = ep->base.sk; + + sctp_sk(sk)->ep = NULL; + sock_put(sk); + + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); +} + static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; @@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - /* Give up our hold on the sock */ - sock_put(sk); - - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); + call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ -void sctp_endpoint_hold(struct sctp_endpoint *ep) +int sctp_endpoint_hold(struct sctp_endpoint *ep) { - refcount_inc(&ep->base.refcnt); + return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 33391254fa82..ad5028a07b18 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5338,11 +5338,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p) { +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p) +{ struct rhashtable_iter hti; struct sctp_transport *tsp; + struct sctp_endpoint *ep; int ret; again: @@ -5351,26 +5352,32 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - ret = cb(tsp, p); - if (ret) - break; + ep = tsp->asoc->ep; + if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ + ret = cb(ep, tsp, p); + if (ret) + break; + sctp_endpoint_put(ep); + } (*pos)++; sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); if (ret) { - if (cb_done && !cb_done(tsp, p)) { + if (cb_done && !cb_done(ep, tsp, p)) { (*pos)++; + sctp_endpoint_put(ep); sctp_transport_put(tsp); goto again; } + sctp_endpoint_put(ep); sctp_transport_put(tsp); } return ret; } -EXPORT_SYMBOL_GPL(sctp_for_each_transport); +EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); /* 7.2.1 Association Status (SCTP_STATUS) From e6007b85dfa284c4726c249e3c2fc4181ca8e179 Mon Sep 17 00:00:00 2001 From: Ma Xinjian Date: Fri, 24 Dec 2021 17:59:28 +0800 Subject: [PATCH 33/76] selftests: mptcp: Remove the deprecated config NFT_COUNTER NFT_COUNTER was removed since 390ad4295aa ("netfilter: nf_tables: make counter support built-in") LKP/0Day will check if all configs listing under selftests are able to be enabled properly. For the missing configs, it will report something like: LKP WARN miss config CONFIG_NFT_COUNTER= of net/mptcp/config - it's not reasonable to keep the deprecated configs. - configs under kselftests are recommended by corresponding tests. So if some configs are missing, it will impact the testing results Reported-by: kernel test robot Signed-off-by: Ma Xinjian Signed-off-by: David S. Miller --- tools/testing/selftests/net/mptcp/config | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 0faaccd21447..2b82628decb1 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -9,7 +9,6 @@ CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m CONFIG_NF_TABLES=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m CONFIG_NETFILTER_XT_MATCH_BPF=m From 0f9d36af8f211d296ffd23bdce61a72cdfbb1a3c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Sat, 18 Dec 2021 16:19:09 -0800 Subject: [PATCH 34/76] drm/i915: Fix possible uninitialized variable in parallel extension 'prev_engine' was declared inside the output loop and checked in the inner after at least 1 pass of either loop. The variable should be declared outside both loops as it needs to be persistent across the entire loop structure. Fixes: e5e32171a2cf ("drm/i915/guc: Connect UAPI to GuC multi-lrc interface") Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211219001909.24348-1-matthew.brost@intel.com (cherry picked from commit cbffbac9c14220b8716b0a9c29d72243f6b14ef3) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb33d0322960..c37c9f0d8167 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -564,6 +564,7 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, container_of_user(base, typeof(*ext), base); const struct set_proto_ctx_engines *set = data; struct drm_i915_private *i915 = set->i915; + struct i915_engine_class_instance prev_engine; u64 flags; int err = 0, n, i, j; u16 slot, width, num_siblings; @@ -629,7 +630,6 @@ set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base, /* Create contexts / engines */ for (i = 0; i < width; ++i) { intel_engine_mask_t current_mask = 0; - struct i915_engine_class_instance prev_engine; for (j = 0; j < num_siblings; ++j) { struct i915_engine_class_instance ci; From d46f329a3f6048e04736e86cb13c880645048792 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 14 Dec 2021 11:59:13 -0800 Subject: [PATCH 35/76] drm/i915: Increment composite fence seqno Increment composite fence seqno on each fence creation. Fixes: 544460c33821 ("drm/i915: Multi-BB execbuf") Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20211214195913.35735-1-matthew.brost@intel.com (cherry picked from commit 62eeb9ae1364cd96991ccc6e3c5c69d66b8c64df) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 9b24d9b5ade1..cb0bf6ffd0e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -3017,7 +3017,7 @@ eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd) fence_array = dma_fence_array_create(eb->num_batches, fences, eb->context->parallel.fence_context, - eb->context->parallel.seqno, + eb->context->parallel.seqno++, false); if (!fence_array) { kfree(fences); From c1833c3964d5bd8c163bd4e01736a38bc473cb8a Mon Sep 17 00:00:00 2001 From: William Zhao Date: Thu, 23 Dec 2021 12:33:16 -0500 Subject: [PATCH 36/76] ip6_vti: initialize __ip6_tnl_parm struct in vti6_siocdevprivate The "__ip6_tnl_parm" struct was left uninitialized causing an invalid load of random data when the "__ip6_tnl_parm" struct was used elsewhere. As an example, in the function "ip6_tnl_xmit_ctl()", it tries to access the "collect_md" member. With "__ip6_tnl_parm" being uninitialized and containing random data, the UBSAN detected that "collect_md" held a non-boolean value. The UBSAN issue is as follows: =============================================================== UBSAN: invalid-load in net/ipv6/ip6_tunnel.c:1025:14 load of value 30 is not a valid value for type '_Bool' CPU: 1 PID: 228 Comm: kworker/1:3 Not tainted 5.16.0-rc4+ #8 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work Call Trace: dump_stack_lvl+0x44/0x57 ubsan_epilogue+0x5/0x40 __ubsan_handle_load_invalid_value+0x66/0x70 ? __cpuhp_setup_state+0x1d3/0x210 ip6_tnl_xmit_ctl.cold.52+0x2c/0x6f [ip6_tunnel] vti6_tnl_xmit+0x79c/0x1e96 [ip6_vti] ? lock_is_held_type+0xd9/0x130 ? vti6_rcv+0x100/0x100 [ip6_vti] ? lock_is_held_type+0xd9/0x130 ? rcu_read_lock_bh_held+0xc0/0xc0 ? lock_acquired+0x262/0xb10 dev_hard_start_xmit+0x1e6/0x820 __dev_queue_xmit+0x2079/0x3340 ? mark_lock.part.52+0xf7/0x1050 ? netdev_core_pick_tx+0x290/0x290 ? kvm_clock_read+0x14/0x30 ? kvm_sched_clock_read+0x5/0x10 ? sched_clock_cpu+0x15/0x200 ? find_held_lock+0x3a/0x1c0 ? lock_release+0x42f/0xc90 ? lock_downgrade+0x6b0/0x6b0 ? mark_held_locks+0xb7/0x120 ? neigh_connected_output+0x31f/0x470 ? lockdep_hardirqs_on+0x79/0x100 ? neigh_connected_output+0x31f/0x470 ? ip6_finish_output2+0x9b0/0x1d90 ? rcu_read_lock_bh_held+0x62/0xc0 ? ip6_finish_output2+0x9b0/0x1d90 ip6_finish_output2+0x9b0/0x1d90 ? ip6_append_data+0x330/0x330 ? ip6_mtu+0x166/0x370 ? __ip6_finish_output+0x1ad/0xfb0 ? nf_hook_slow+0xa6/0x170 ip6_output+0x1fb/0x710 ? nf_hook.constprop.32+0x317/0x430 ? ip6_finish_output+0x180/0x180 ? __ip6_finish_output+0xfb0/0xfb0 ? lock_is_held_type+0xd9/0x130 ndisc_send_skb+0xb33/0x1590 ? __sk_mem_raise_allocated+0x11cf/0x1560 ? dst_output+0x4a0/0x4a0 ? ndisc_send_rs+0x432/0x610 addrconf_dad_completed+0x30c/0xbb0 ? addrconf_rs_timer+0x650/0x650 ? addrconf_dad_work+0x73c/0x10e0 addrconf_dad_work+0x73c/0x10e0 ? addrconf_dad_completed+0xbb0/0xbb0 ? rcu_read_lock_sched_held+0xaf/0xe0 ? rcu_read_lock_bh_held+0xc0/0xc0 process_one_work+0x97b/0x1740 ? pwq_dec_nr_in_flight+0x270/0x270 worker_thread+0x87/0xbf0 ? process_one_work+0x1740/0x1740 kthread+0x3ac/0x490 ? set_kthread_struct+0x100/0x100 ret_from_fork+0x22/0x30 =============================================================== The solution is to initialize "__ip6_tnl_parm" struct to zeros in the "vti6_siocdevprivate()" function. Signed-off-by: William Zhao Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 527e9ead7449..5e9474bc54fc 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -808,6 +808,8 @@ vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { From 6d7373dabfd3933ee30c40fc8c09d2a788f6ece1 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 27 Dec 2021 14:35:30 +0100 Subject: [PATCH 37/76] net/smc: fix using of uninitialized completions In smc_wr_tx_send_wait() the completion on index specified by pend->idx is initialized and after smc_wr_tx_send() was called the wait for completion starts. pend->idx is used to get the correct index for the wait, but the pend structure could already be cleared in smc_wr_tx_process_cqe(). Introduce pnd_idx to hold and use a local copy of the correct index. Fixes: 09c61d24f96d ("net/smc: wait for departure of an IB message") Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_wr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 600ab5889227..79a7431f534e 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -358,18 +358,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout) { struct smc_wr_tx_pend *pend; + u32 pnd_idx; int rc; pend = container_of(priv, struct smc_wr_tx_pend, priv); pend->compl_requested = 1; - init_completion(&link->wr_tx_compl[pend->idx]); + pnd_idx = pend->idx; + init_completion(&link->wr_tx_compl[pnd_idx]); rc = smc_wr_tx_send(link, priv); if (rc) return rc; /* wait for completion by smc_wr_tx_process_cqe() */ rc = wait_for_completion_interruptible_timeout( - &link->wr_tx_compl[pend->idx], timeout); + &link->wr_tx_compl[pnd_idx], timeout); if (rc <= 0) rc = -ENODATA; if (rc > 0) From 6c25449e1a32c594d743df8e8258e8ef870b6a77 Mon Sep 17 00:00:00 2001 From: yangxingwu Date: Mon, 27 Dec 2021 16:29:51 +0800 Subject: [PATCH 38/76] net: udp: fix alignment problem in udp4_seq_show() $ cat /pro/net/udp before: sl local_address rem_address st tx_queue rx_queue tr tm->when 26050: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 26320: 0100007F:0143 00000000:0000 07 00000000:00000000 00:00000000 27135: 00000000:8472 00000000:0000 07 00000000:00000000 00:00000000 after: sl local_address rem_address st tx_queue rx_queue tr tm->when 26050: 0100007F:0035 00000000:0000 07 00000000:00000000 00:00000000 26320: 0100007F:0143 00000000:0000 07 00000000:00000000 00:00000000 27135: 00000000:8472 00000000:0000 07 00000000:00000000 00:00000000 Signed-off-by: yangxingwu Signed-off-by: David S. Miller --- net/ipv4/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 15c6b450b8db..0cd6b857e7ec 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3075,7 +3075,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_puts(seq, " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { From 5f50153288452e10b6edd69ec9112c49442b054a Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Sun, 26 Dec 2021 21:32:45 -0500 Subject: [PATCH 39/76] atlantic: Fix buff_ring OOB in aq_ring_rx_clean The function obtain the next buffer without boundary check. We should return with I/O error code. The bug is found by fuzzing and the crash report is attached. It is an OOB bug although reported as use-after-free. [ 4.804724] BUG: KASAN: use-after-free in aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.805661] Read of size 4 at addr ffff888034fe93a8 by task ksoftirqd/0/9 [ 4.806505] [ 4.806703] CPU: 0 PID: 9 Comm: ksoftirqd/0 Tainted: G W 5.6.0 #34 [ 4.809030] Call Trace: [ 4.809343] dump_stack+0x76/0xa0 [ 4.809755] print_address_description.constprop.0+0x16/0x200 [ 4.810455] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.811234] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.813183] __kasan_report.cold+0x37/0x7c [ 4.813715] ? aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.814393] kasan_report+0xe/0x20 [ 4.814837] aq_ring_rx_clean+0x1e88/0x2730 [atlantic] [ 4.815499] ? hw_atl_b0_hw_ring_rx_receive+0x9a5/0xb90 [atlantic] [ 4.816290] aq_vec_poll+0x179/0x5d0 [atlantic] [ 4.816870] ? _GLOBAL__sub_I_65535_1_aq_pci_func_init+0x20/0x20 [atlantic] [ 4.817746] ? __next_timer_interrupt+0xba/0xf0 [ 4.818322] net_rx_action+0x363/0xbd0 [ 4.818803] ? call_timer_fn+0x240/0x240 [ 4.819302] ? __switch_to_asm+0x40/0x70 [ 4.819809] ? napi_busy_loop+0x520/0x520 [ 4.820324] __do_softirq+0x18c/0x634 [ 4.820797] ? takeover_tasklets+0x5f0/0x5f0 [ 4.821343] run_ksoftirqd+0x15/0x20 [ 4.821804] smpboot_thread_fn+0x2f1/0x6b0 [ 4.822331] ? smpboot_unregister_percpu_thread+0x160/0x160 [ 4.823041] ? __kthread_parkme+0x80/0x100 [ 4.823571] ? smpboot_unregister_percpu_thread+0x160/0x160 [ 4.824301] kthread+0x2b5/0x3b0 [ 4.824723] ? kthread_create_on_node+0xd0/0xd0 [ 4.825304] ret_from_fork+0x35/0x40 Signed-off-by: Zekun Shen Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 81b3756417ec..77e76c9efd32 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -366,6 +366,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -389,6 +393,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, (buff->is_lro && buff->is_cso_err)) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; From ca506fca461b260ab32952b610c3d4aadc6c11fd Mon Sep 17 00:00:00 2001 From: Matthias-Christian Ott Date: Sun, 26 Dec 2021 23:12:08 +0100 Subject: [PATCH 40/76] net: usb: pegasus: Do not drop long Ethernet frames The D-Link DSB-650TX (2001:4002) is unable to receive Ethernet frames that are longer than 1518 octets, for example, Ethernet frames that contain 802.1Q VLAN tags. The frames are sent to the pegasus driver via USB but the driver discards them because they have the Long_pkt field set to 1 in the received status report. The function read_bulk_callback of the pegasus driver treats such received "packets" (in the terminology of the hardware) as errors but the field simply does just indicate that the Ethernet frame (MAC destination to FCS) is longer than 1518 octets. It seems that in the 1990s there was a distinction between "giant" (> 1518) and "runt" (< 64) frames and the hardware includes flags to indicate this distinction. It seems that the purpose of the distinction "giant" frames was to not allow infinitely long frames due to transmission errors and to allow hardware to have an upper limit of the frame size. However, the hardware already has such limit with its 2048 octet receive buffer and, therefore, Long_pkt is merely a convention and should not be treated as a receive error. Actually, the hardware is even able to receive Ethernet frames with 2048 octets which exceeds the claimed limit frame size limit of the driver of 1536 octets (PEGASUS_MTU). Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Matthias-Christian Ott Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index c4cd40b090fd..feb247e355f7 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -493,11 +493,11 @@ static void read_bulk_callback(struct urb *urb) goto goon; rx_status = buf[count - 2]; - if (rx_status & 0x1e) { + if (rx_status & 0x1c) { netif_dbg(pegasus, rx_err, net, "RX packet error %x\n", rx_status); net->stats.rx_errors++; - if (rx_status & 0x06) /* long or runt */ + if (rx_status & 0x04) /* runt */ net->stats.rx_length_errors++; if (rx_status & 0x08) net->stats.rx_crc_errors++; From 7175f02c4e5f5a9430113ab9ca0fd0ce98b28a51 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 26 Dec 2021 16:01:27 +0300 Subject: [PATCH 41/76] uapi: fix linux/nfc.h userspace compilation errors Replace sa_family_t with __kernel_sa_family_t to fix the following linux/nfc.h userspace compilation errors: /usr/include/linux/nfc.h:266:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; /usr/include/linux/nfc.h:274:2: error: unknown type name 'sa_family_t' sa_family_t sa_family; Fixes: 23b7869c0fd0 ("NFC: add the NFC socket raw protocol") Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Dmitry V. Levin Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index f6e3c8c9c744..aadad43d943a 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -263,7 +263,7 @@ enum nfc_sdp_attr { #define NFC_SE_ENABLED 0x1 struct sockaddr_nfc { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; @@ -271,7 +271,7 @@ struct sockaddr_nfc { #define NFC_LLCP_MAX_SERVICE_NAME 63 struct sockaddr_nfc_llcp { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; From 79b69a83705e621b258ac6d8ae6d3bfdb4b930aa Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Dec 2021 13:03:47 +0100 Subject: [PATCH 42/76] nfc: uapi: use kernel size_t to fix user-space builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix user-space builds if it includes /usr/include/linux/nfc.h before some of other headers: /usr/include/linux/nfc.h:281:9: error: unknown type name ‘size_t’ 281 | size_t service_name_len; | ^~~~~~ Fixes: d646960f7986 ("NFC: Initial LLCP support") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- include/uapi/linux/nfc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index aadad43d943a..4fa4e979e948 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -278,7 +278,7 @@ struct sockaddr_nfc_llcp { __u8 dsap; /* Destination SAP, if known */ __u8 ssap; /* Source SAP to be bound to */ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; - size_t service_name_len; + __kernel_size_t service_name_len; }; /* NFC socket protocols */ From 8c45096c60d6ce6341c374636100ed1b2c1c33a1 Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Mon, 13 Dec 2021 16:17:02 +0800 Subject: [PATCH 43/76] drm/amd/pm: skip setting gfx cgpg in the s0ix suspend-resume In the s0ix entry need retain gfx in the gfxoff state,so here need't set gfx cgpg in the S0ix suspend-resume process. Moreover move the S0ix check into SMU12 can simplify the code condition check. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1712 Signed-off-by: Prike Liang Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 7 ++----- drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8a3244585d80..8a817932acdf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1568,9 +1568,7 @@ static int smu_suspend(void *handle) smu->watermarks_bitmap &= ~(WATERMARKS_LOADED); - /* skip CGPG when in S0ix */ - if (smu->is_apu && !adev->in_s0ix) - smu_set_gfx_cgpg(&adev->smu, false); + smu_set_gfx_cgpg(&adev->smu, false); return 0; } @@ -1601,8 +1599,7 @@ static int smu_resume(void *handle) return ret; } - if (smu->is_apu) - smu_set_gfx_cgpg(&adev->smu, true); + smu_set_gfx_cgpg(&adev->smu, true); smu->disable_uclk_switch = 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 43028f2cd28b..9c91e79c955f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -120,7 +120,8 @@ int smu_v12_0_powergate_sdma(struct smu_context *smu, bool gate) int smu_v12_0_set_gfx_cgpg(struct smu_context *smu, bool enable) { - if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) + /* Until now the SMU12 only implemented for Renoir series so here neen't do APU check. */ + if (!(smu->adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) || smu->adev->in_s0ix) return 0; return smu_cmn_send_smc_msg_with_param(smu, From daf8de0874ab5b74b38a38726fdd3d07ef98a7ee Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 12 Nov 2021 11:25:30 -0500 Subject: [PATCH 44/76] drm/amdgpu: always reset the asic in suspend (v2) If the platform suspend happens to fail and the power rail is not turned off, the GPU will be in an unknown state on resume, so reset the asic so that it will be in a known good state on resume even if the platform suspend failed. v2: handle s0ix Acked-by: Luben Tuikov Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 73ac02372827..1a97b8b237d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2154,7 +2154,10 @@ static int amdgpu_pmops_suspend(struct device *dev) adev->in_s3 = true; r = amdgpu_device_suspend(drm_dev, true); adev->in_s3 = false; - + if (r) + return r; + if (!adev->in_s0ix) + r = amdgpu_asic_reset(adev); return r; } From 7be3be2b027c12e84833b3dc9597d3bb7e4c5464 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 17 Dec 2021 19:05:06 +0800 Subject: [PATCH 45/76] drm/amdgpu: put SMU into proper state on runpm suspending for BOCO capable platform By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some proper cleanups and put itself into a state ready for PNP. That can workaround some random resuming failure observed on BOCO capable platforms. Signed-off-by: Evan Quan Acked-by: Alex Deucher Reviewed-by: Guchun Chen Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1a97b8b237d5..86ca80da9eea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2238,12 +2238,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) if (amdgpu_device_supports_px(drm_dev)) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + /* + * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some + * proper cleanups and put itself into a state ready for PNP. That + * can address some random resuming failure observed on BOCO capable + * platforms. + * TODO: this may be also needed for PX capable platform. + */ + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_UNLOAD; + ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_NONE; return ret; } + if (amdgpu_device_supports_boco(drm_dev)) + adev->mp1_state = PP_MP1_STATE_NONE; + if (amdgpu_device_supports_px(drm_dev)) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. From 8b5fdfc57cc2471179d1c51081424ded833c16c8 Mon Sep 17 00:00:00 2001 From: wolfgang huang Date: Tue, 28 Dec 2021 16:01:20 +0800 Subject: [PATCH 46/76] mISDN: change function names to avoid conflicts As we build for mips, we meet following error. l1_init error with multiple definition. Some architecture devices usually marked with l1, l2, lxx as the start-up phase. so we change the mISDN function names, align with Isdnl2_xxx. mips-linux-gnu-ld: drivers/isdn/mISDN/layer1.o: in function `l1_init': (.text+0x890): multiple definition of `l1_init'; \ arch/mips/kernel/bmips_5xxx_init.o:(.text+0xf0): first defined here make[1]: *** [home/mips/kernel-build/linux/Makefile:1161: vmlinux] Error 1 Signed-off-by: wolfgang huang Reported-by: k2ci Signed-off-by: David S. Miller --- drivers/isdn/mISDN/core.c | 6 +++--- drivers/isdn/mISDN/core.h | 4 ++-- drivers/isdn/mISDN/layer1.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 55891e420446..a41b4b264594 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -381,7 +381,7 @@ mISDNInit(void) err = mISDN_inittimer(&debug); if (err) goto error2; - err = l1_init(&debug); + err = Isdnl1_Init(&debug); if (err) goto error3; err = Isdnl2_Init(&debug); @@ -395,7 +395,7 @@ mISDNInit(void) error5: Isdnl2_cleanup(); error4: - l1_cleanup(); + Isdnl1_cleanup(); error3: mISDN_timer_cleanup(); error2: @@ -408,7 +408,7 @@ static void mISDN_cleanup(void) { misdn_sock_cleanup(); Isdnl2_cleanup(); - l1_cleanup(); + Isdnl1_cleanup(); mISDN_timer_cleanup(); class_unregister(&mISDN_class); diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h index 23b44d303327..42599f49c189 100644 --- a/drivers/isdn/mISDN/core.h +++ b/drivers/isdn/mISDN/core.h @@ -60,8 +60,8 @@ struct Bprotocol *get_Bprotocol4id(u_int); extern int mISDN_inittimer(u_int *); extern void mISDN_timer_cleanup(void); -extern int l1_init(u_int *); -extern void l1_cleanup(void); +extern int Isdnl1_Init(u_int *); +extern void Isdnl1_cleanup(void); extern int Isdnl2_Init(u_int *); extern void Isdnl2_cleanup(void); diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c index 98a3bc6c1700..7b31c25a550e 100644 --- a/drivers/isdn/mISDN/layer1.c +++ b/drivers/isdn/mISDN/layer1.c @@ -398,7 +398,7 @@ create_l1(struct dchannel *dch, dchannel_l1callback *dcb) { EXPORT_SYMBOL(create_l1); int -l1_init(u_int *deb) +Isdnl1_Init(u_int *deb) { debug = deb; l1fsm_s.state_count = L1S_STATE_COUNT; @@ -409,7 +409,7 @@ l1_init(u_int *deb) } void -l1_cleanup(void) +Isdnl1_cleanup(void) { mISDN_FsmFree(&l1fsm_s); } From 1cd5384c88af5b59bf9f3b6c1a151bc14b88c2cd Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 18:51:44 +0100 Subject: [PATCH 47/76] net: ag71xx: Fix a potential double free in error handling paths 'ndev' is a managed resource allocated with devm_alloc_etherdev(), so there is no need to call free_netdev() explicitly or there will be a double free(). Simplify all error handling paths accordingly. Fixes: d51b6ce441d3 ("net: ethernet: add ag71xx driver") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/ag71xx.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index 88d2ab748399..4579ddf9c427 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1913,15 +1913,12 @@ static int ag71xx_probe(struct platform_device *pdev) ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac"); if (IS_ERR(ag->mac_reset)) { netif_err(ag, probe, ndev, "missing mac reset\n"); - err = PTR_ERR(ag->mac_reset); - goto err_free; + return PTR_ERR(ag->mac_reset); } ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!ag->mac_base) { - err = -ENOMEM; - goto err_free; - } + if (!ag->mac_base) + return -ENOMEM; ndev->irq = platform_get_irq(pdev, 0); err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, @@ -1929,7 +1926,7 @@ static int ag71xx_probe(struct platform_device *pdev) if (err) { netif_err(ag, probe, ndev, "unable to request IRQ %d\n", ndev->irq); - goto err_free; + return err; } ndev->netdev_ops = &ag71xx_netdev_ops; @@ -1957,10 +1954,8 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc = dmam_alloc_coherent(&pdev->dev, sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL); - if (!ag->stop_desc) { - err = -ENOMEM; - goto err_free; - } + if (!ag->stop_desc) + return -ENOMEM; ag->stop_desc->data = 0; ag->stop_desc->ctrl = 0; @@ -1975,7 +1970,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = of_get_phy_mode(np, &ag->phy_if_mode); if (err) { netif_err(ag, probe, ndev, "missing phy-mode property in DT\n"); - goto err_free; + return err; } netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT); @@ -1983,7 +1978,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = clk_prepare_enable(ag->clk_eth); if (err) { netif_err(ag, probe, ndev, "Failed to enable eth clk.\n"); - goto err_free; + return err; } ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0); @@ -2019,8 +2014,6 @@ static int ag71xx_probe(struct platform_device *pdev) ag71xx_mdio_remove(ag); err_put_clk: clk_disable_unprepare(ag->clk_eth); -err_free: - free_netdev(ndev); return err; } From 5be60a945329d82f06fc755a43eeefbfc5f77d72 Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Mon, 27 Dec 2021 17:22:03 +0100 Subject: [PATCH 48/76] net: lantiq_xrx200: fix statistics of received bytes Received frames have FCS truncated. There is no need to subtract FCS length from the statistics. Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver") Signed-off-by: Aleksander Jan Bajkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 96bd6f2b21ed..80bfaf2fec92 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -224,7 +224,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) skb->protocol = eth_type_trans(skb, net_dev); netif_receive_skb(skb); net_dev->stats.rx_packets++; - net_dev->stats.rx_bytes += len - ETH_FCS_LEN; + net_dev->stats.rx_bytes += len; return 0; } From 1b9dadba502234eea7244879b8d5d126bfaf9f0c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 28 Dec 2021 12:48:11 +0000 Subject: [PATCH 49/76] NFC: st21nfca: Fix memory leak in device probe and remove 'phy->pending_skb' is alloced when device probe, but forgot to free in the error handling path and remove path, this cause memory leak as follows: unreferenced object 0xffff88800bc06800 (size 512): comm "8", pid 11775, jiffies 4295159829 (age 9.032s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d66c09ce>] __kmalloc_node_track_caller+0x1ed/0x450 [<00000000c93382b3>] kmalloc_reserve+0x37/0xd0 [<000000005fea522c>] __alloc_skb+0x124/0x380 [<0000000019f29f9a>] st21nfca_hci_i2c_probe+0x170/0x8f2 Fix it by freeing 'pending_skb' in error and remove. Fixes: 68957303f44a ("NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/nfc/st21nfca/i2c.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index f126ce96a7df..35b32fb90906 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -524,7 +524,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(phy->gpiod_ena)) { nfc_err(dev, "Unable to get ENABLE GPIO\n"); - return PTR_ERR(phy->gpiod_ena); + r = PTR_ERR(phy->gpiod_ena); + goto out_free; } phy->se_status.is_ese_present = @@ -535,7 +536,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, r = st21nfca_hci_platform_init(phy); if (r < 0) { nfc_err(&client->dev, "Unable to reboot st21nfca\n"); - return r; + goto out_free; } r = devm_request_threaded_irq(&client->dev, client->irq, NULL, @@ -544,15 +545,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, ST21NFCA_HCI_DRIVER_NAME, phy); if (r < 0) { nfc_err(&client->dev, "Unable to register IRQ handler\n"); - return r; + goto out_free; } - return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, - ST21NFCA_FRAME_HEADROOM, - ST21NFCA_FRAME_TAILROOM, - ST21NFCA_HCI_LLC_MAX_PAYLOAD, - &phy->hdev, - &phy->se_status); + r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, + ST21NFCA_FRAME_HEADROOM, + ST21NFCA_FRAME_TAILROOM, + ST21NFCA_HCI_LLC_MAX_PAYLOAD, + &phy->hdev, + &phy->se_status); + if (r) + goto out_free; + + return 0; + +out_free: + kfree_skb(phy->pending_skb); + return r; } static int st21nfca_hci_i2c_remove(struct i2c_client *client) @@ -563,6 +572,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client) if (phy->powered) st21nfca_hci_i2c_disable(phy); + if (phy->pending_skb) + kfree_skb(phy->pending_skb); return 0; } From 90cee52f2e780345d3629e278291aea5ac74f40f Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 28 Dec 2021 17:03:24 +0800 Subject: [PATCH 50/76] net/smc: don't send CDC/LLC message if link not ready We found smc_llc_send_link_delete_all() sometimes wait for 2s timeout when testing with RDMA link up/down. It is possible when a smc_link is in ACTIVATING state, the underlaying QP is still in RESET or RTR state, which cannot send any messages out. smc_llc_send_link_delete_all() use smc_link_usable() to checks whether the link is usable, if the QP is still in RESET or RTR state, but the smc_link is in ACTIVATING, this LLC message will always fail without any CQE entering the CQ, and we will always wait 2s before timeout. Since we cannot send any messages through the QP before the QP enter RTS. I add a wrapper smc_link_sendable() which checks the state of QP along with the link state. And replace smc_link_usable() with smc_link_sendable() in all LLC & CDC message sending routine. Fixes: 5f08318f617b ("smc: connection data control (CDC)") Signed-off-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 +- net/smc/smc_core.h | 6 ++++++ net/smc/smc_llc.c | 2 +- net/smc/smc_wr.c | 4 ++-- net/smc/smc_wr.h | 2 +- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 387d28b2f8dd..55ca175e8d57 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -647,7 +647,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; - if (smc_link_usable(lnk)) + if (smc_link_sendable(lnk)) lnk->state = SMC_LNK_INACTIVE; } wake_up_all(&lgr->llc_msg_waiter); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 59cef3b830d8..d63b08274197 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -415,6 +415,12 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } +static inline bool smc_link_sendable(struct smc_link *lnk) +{ + return smc_link_usable(lnk) && + lnk->qp_attr.cur_qp_state == IB_QPS_RTS; +} + static inline bool smc_link_active(struct smc_link *lnk) { return lnk->state == SMC_LNK_ACTIVE; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index b102680296b8..3e9fd8a3124c 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1630,7 +1630,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) delllc.reason = htonl(rsn); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) + if (!smc_link_sendable(&lgr->lnk[i])) continue; if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) break; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 79a7431f534e..df1dc225cbab 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -188,7 +188,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) { *idx = link->wr_tx_cnt; - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return -ENOLINK; for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) @@ -231,7 +231,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, } else { rc = wait_event_interruptible_timeout( link->wr_tx_wait, - !smc_link_usable(link) || + !smc_link_sendable(link) || lgr->terminating || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index f353311e6f84..48ed9b08ac7a 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -62,7 +62,7 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) static inline bool smc_wr_tx_link_hold(struct smc_link *link) { - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return false; atomic_inc(&link->wr_tx_refcnt); return true; From 349d43127dac00c15231e8ffbcaabd70f7b0e544 Mon Sep 17 00:00:00 2001 From: Dust Li Date: Tue, 28 Dec 2021 17:03:25 +0800 Subject: [PATCH 51/76] net/smc: fix kernel panic caused by race of smc_sock A crash occurs when smc_cdc_tx_handler() tries to access smc_sock but smc_release() has already freed it. [ 4570.695099] BUG: unable to handle page fault for address: 000000002eae9e88 [ 4570.696048] #PF: supervisor write access in kernel mode [ 4570.696728] #PF: error_code(0x0002) - not-present page [ 4570.697401] PGD 0 P4D 0 [ 4570.697716] Oops: 0002 [#1] PREEMPT SMP NOPTI [ 4570.698228] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.16.0-rc4+ #111 [ 4570.699013] Hardware name: Alibaba Cloud Alibaba Cloud ECS, BIOS 8c24b4c 04/0 [ 4570.699933] RIP: 0010:_raw_spin_lock+0x1a/0x30 <...> [ 4570.711446] Call Trace: [ 4570.711746] [ 4570.711992] smc_cdc_tx_handler+0x41/0xc0 [ 4570.712470] smc_wr_tx_tasklet_fn+0x213/0x560 [ 4570.712981] ? smc_cdc_tx_dismisser+0x10/0x10 [ 4570.713489] tasklet_action_common.isra.17+0x66/0x140 [ 4570.714083] __do_softirq+0x123/0x2f4 [ 4570.714521] irq_exit_rcu+0xc4/0xf0 [ 4570.714934] common_interrupt+0xba/0xe0 Though smc_cdc_tx_handler() checked the existence of smc connection, smc_release() may have already dismissed and released the smc socket before smc_cdc_tx_handler() further visits it. smc_cdc_tx_handler() |smc_release() if (!conn) | | |smc_cdc_tx_dismiss_slots() | smc_cdc_tx_dismisser() | |sock_put(&smc->sk) <- last sock_put, | smc_sock freed bh_lock_sock(&smc->sk) (panic) | To make sure we won't receive any CDC messages after we free the smc_sock, add a refcount on the smc_connection for inflight CDC message(posted to the QP but haven't received related CQE), and don't release the smc_connection until all the inflight CDC messages haven been done, for both success or failed ones. Using refcount on CDC messages brings another problem: when the link is going to be destroyed, smcr_link_clear() will reset the QP, which then remove all the pending CQEs related to the QP in the CQ. To make sure all the CQEs will always come back so the refcount on the smc_connection can always reach 0, smc_ib_modify_qp_reset() was replaced by smc_ib_modify_qp_error(). And remove the timeout in smc_wr_tx_wait_no_pending_sends() since we need to wait for all pending WQEs done, or we may encounter use-after- free when handling CQEs. For IB device removal routine, we need to wait for all the QPs on that device been destroyed before we can destroy CQs on the device, or the refcount on smc_connection won't reach 0 and smc_sock cannot be released. Fixes: 5f08318f617b ("smc: connection data control (CDC)") Reported-by: Wen Gu Signed-off-by: Dust Li Signed-off-by: David S. Miller --- net/smc/smc.h | 5 +++++ net/smc/smc_cdc.c | 52 +++++++++++++++++++++------------------------- net/smc/smc_cdc.h | 2 +- net/smc/smc_core.c | 25 +++++++++++++++++----- net/smc/smc_ib.c | 4 ++-- net/smc/smc_ib.h | 1 + net/smc/smc_wr.c | 41 +++--------------------------------- net/smc/smc_wr.h | 3 +-- 8 files changed, 57 insertions(+), 76 deletions(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index f4286ca1f228..1a4fc1c6c4ab 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -180,6 +180,11 @@ struct smc_connection { u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ + atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe + * - inc when post wqe, + * - dec on polled tx cqe + */ + wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 99acd337ba90..84c8a4374fdd 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_sock *smc; int diff; - if (!conn) - /* already dismissed */ - return; - smc = container_of(conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); if (!wc_status) { @@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, conn); conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; } + + if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) && + unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) + wake_up(&conn->cdc_pend_tx_wq); + WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0); + smc_tx_sndbuf_nonfull(smc); bh_unlock_sock(&smc->sk); } @@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); if (!rc) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); @@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, } else { conn->tx_cdc_seq--; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; + atomic_dec(&conn->cdc_pend_tx_wr); } return rc; @@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn, peer->token = htonl(local->token); peer->prod_flags.failover_validation = 1; + /* We need to set pend->conn here to make sure smc_cdc_tx_handler() + * can handle properly + */ + smc_cdc_add_pending_send(conn, pend); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); + if (unlikely(rc)) + atomic_dec(&conn->cdc_pend_tx_wr); + return rc; } @@ -193,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) return rc; } -static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, - unsigned long data) +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn) { - struct smc_connection *conn = (struct smc_connection *)data; - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - return cdc_pend->conn == conn; -} - -static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend) -{ - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - cdc_pend->conn = NULL; -} - -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) -{ - struct smc_link *link = conn->lnk; - - smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, smc_cdc_tx_dismisser, - (unsigned long)conn); + wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr)); } /* Send a SMC-D CDC header. diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 0a0a89abd38b..696cc11f2303 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 55ca175e8d57..a6849362f4dd 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1127,7 +1127,7 @@ void smc_conn_free(struct smc_connection *conn) smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); if (current_work() != &conn->abort_work) cancel_work_sync(&conn->abort_work); } @@ -1204,7 +1204,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_llc_link_clear(lnk, log); smcr_buf_unmap_lgr(lnk); smcr_rtoken_clear_link(lnk); - smc_ib_modify_qp_reset(lnk); + smc_ib_modify_qp_error(lnk); smc_wr_free_link(lnk); smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); @@ -1336,7 +1336,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) else tasklet_unlock_wait(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); } smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); @@ -1459,11 +1459,16 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) /* Called when an SMCR device is removed or the smc module is unloaded. * If smcibdev is given, all SMCR link groups using this device are terminated. * If smcibdev is NULL, all SMCR link groups are terminated. + * + * We must wait here for QPs been destroyed before we destroy the CQs, + * or we won't received any CQEs and cdc_pend_tx_wr cannot reach 0 thus + * smc_sock cannot be released. */ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_free_list); + LIST_HEAD(lgr_linkdown_list); int i; spin_lock_bh(&smc_lgr_list.lock); @@ -1475,7 +1480,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (lgr->lnk[i].smcibdev == smcibdev) - smcr_link_down_cond_sched(&lgr->lnk[i]); + list_move_tail(&lgr->list, &lgr_linkdown_list); } } } @@ -1487,6 +1492,16 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) __smc_lgr_terminate(lgr, false); } + list_for_each_entry_safe(lgr, lg, &lgr_linkdown_list, list) { + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].smcibdev == smcibdev) { + mutex_lock(&lgr->llc_conf_mutex); + smcr_link_down_cond(&lgr->lnk[i]); + mutex_unlock(&lgr->llc_conf_mutex); + } + } + } + if (smcibdev) { if (atomic_read(&smcibdev->lnk_cnt)) wait_event(smcibdev->lnks_deleted, @@ -1586,7 +1601,6 @@ static void smcr_link_down(struct smc_link *lnk) if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) return; - smc_ib_modify_qp_reset(lnk); to_lnk = smc_switch_conns(lgr, lnk, true); if (!to_lnk) { /* no backup link available */ smcr_link_clear(lnk, true); @@ -1824,6 +1838,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + init_waitqueue_head(&conn->cdc_pend_tx_wq); INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); if (ini->is_smcd) { conn->rx_off = sizeof(struct smcd_cdc_msg); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index d93055ec17ae..fe5d5399c4e8 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -109,12 +109,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk) IB_QP_MAX_QP_RD_ATOMIC); } -int smc_ib_modify_qp_reset(struct smc_link *lnk) +int smc_ib_modify_qp_error(struct smc_link *lnk) { struct ib_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.qp_state = IB_QPS_RESET; + qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 07585937370e..bfa1c6bf6313 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -90,6 +90,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); +int smc_ib_modify_qp_error(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, struct smc_buf_desc *buf_slot, u8 link_idx); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index df1dc225cbab..c6cfdea8b71b 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link) } /* wait till all pending tx work requests on the given link are completed */ -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link) +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link) { - if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link), - SMC_WR_TX_WAIT_PENDING_TIME)) - return 0; - else /* timeout */ - return -EPIPE; + wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link)); } static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) @@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) struct smc_wr_tx_pend pnd_snd; struct smc_link *link; u32 pnd_snd_idx; - int i; link = wc->qp->qp_context; @@ -128,14 +123,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) } if (wc->status) { - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - /* clear full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[i], 0, - sizeof(link->wr_tx_pends[i])); - memset(&link->wr_tx_bufs[i], 0, - sizeof(link->wr_tx_bufs[i])); - clear_bit(i, link->wr_tx_mask); - } if (link->lgr->smc_version == SMC_V2) { memset(link->wr_tx_v2_pend, 0, sizeof(*link->wr_tx_v2_pend)); @@ -421,25 +408,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, - smc_wr_tx_filter filter, - smc_wr_tx_dismisser dismisser, - unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_tx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; - if (wr_tx->type != wr_tx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - dismisser(tx_pend); - } -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) @@ -675,10 +643,7 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_wakeup_reg_wait(lnk); smc_wr_wakeup_tx_wait(lnk); - if (smc_wr_tx_wait_no_pending_sends(lnk)) - memset(lnk->wr_tx_mask, 0, - BITS_TO_LONGS(SMC_WR_BUF_CNT) * - sizeof(*lnk->wr_tx_mask)); + smc_wr_tx_wait_no_pending_sends(lnk); wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 48ed9b08ac7a..47512ccce5ef 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -22,7 +22,6 @@ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) -#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ) #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ @@ -130,7 +129,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, unsigned long data); -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link); +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); int smc_wr_rx_post_init(struct smc_link *link); From ebae8973884ee9ac703b3bfe34cabbb118b18538 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 23 Dec 2021 14:13:02 -0500 Subject: [PATCH 52/76] drm/amdgpu: no DC support for headless chips Chips with no display hardware should return false for DC support. v2: drop Arcturus and Aldebaran Fixes: f7f12b25823c0d ("drm/amdgpu: default to true in amdgpu_device_asic_has_dc_support") Reviewed-by: Evan Quan Reviewed-by: Guchun Chen Reported-by: Tareque Md.Hanif Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 9dc86c5a1cad..694c3726e0f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3166,6 +3166,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) { switch (asic_type) { +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_HAINAN: +#endif + case CHIP_TOPAZ: + /* chips with no display hardware */ + return false; #if defined(CONFIG_DRM_AMD_DC) case CHIP_TAHITI: case CHIP_PITCAIRN: From 1e81dcc1ab7de7a789e60042ce82d5a612632599 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 13 Dec 2021 16:39:49 -0800 Subject: [PATCH 53/76] igc: Do not enable crosstimestamping for i225-V models It was reported that when PCIe PTM is enabled, some lockups could be observed with some integrated i225-V models. While the issue is investigated, we can disable crosstimestamp for those models and see no loss of functionality, because those models don't have any support for time synchronization. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Link: https://lore.kernel.org/all/924175a188159f4e03bd69908a91e606b574139b.camel@gmx.de/ Reported-by: Stefan Dietrich Signed-off-by: Vinicius Costa Gomes Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 30568e3544cd..4f9245aa79a1 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -768,7 +768,20 @@ int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr) */ static bool igc_is_crosststamp_supported(struct igc_adapter *adapter) { - return IS_ENABLED(CONFIG_X86_TSC) ? pcie_ptm_enabled(adapter->pdev) : false; + if (!IS_ENABLED(CONFIG_X86_TSC)) + return false; + + /* FIXME: it was noticed that enabling support for PCIe PTM in + * some i225-V models could cause lockups when bringing the + * interface up/down. There should be no downsides to + * disabling crosstimestamping support for i225-V, as it + * doesn't have any PTP support. That way we gain some time + * while root causing the issue. + */ + if (adapter->pdev->device == IGC_DEV_ID_I225_V) + return false; + + return pcie_ptm_enabled(adapter->pdev); } static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp) From f85846bbf43de38fb2c89fe7d2a085608c4eb25a Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Fri, 17 Dec 2021 16:49:33 -0700 Subject: [PATCH 54/76] igc: Fix TX timestamp support for non-MSI-X platforms Time synchronization was not properly enabled on non-MSI-X platforms. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: James McLaughlin Reviewed-by: Vinicius Costa Gomes Tested-by: Nechama Kraus Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 8e448288ee26..d28a80a00953 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5467,6 +5467,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5510,6 +5513,9 @@ static irqreturn_t igc_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; From 2eb82577a16d4c8eb31e4ed520649850bb95b223 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Sun, 5 Dec 2021 21:19:30 -0500 Subject: [PATCH 55/76] drm/amd/display: fix B0 TMDS deepcolor no dislay issue [why] B0 PHY C map to F, D map to G driver use logic instance, dmub does the remap. Driver still need use the right PHY instance to access right HW. [how] use phyical instance when program PHY register. [note] could move resync_control programming to dmub next. Tested-by: Daniel Wheeler Reviewed-by: Dmytro Laktyushkin Reviewed-by: Jun Lei Acked-by: Rodrigo Siqueira Signed-off-by: Charlene Liu Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn31/dcn31_resource.c | 25 +++++++++++++-- .../drm/amd/display/dc/dcn31/dcn31_resource.h | 31 +++++++++++++++++++ 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 18896294ae12..2cc55ef97ec4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -355,6 +355,14 @@ static const struct dce110_clk_src_regs clk_src_regs[] = { clk_src_regs(3, D), clk_src_regs(4, E) }; +/*pll_id being rempped in dmub, in driver it is logical instance*/ +static const struct dce110_clk_src_regs clk_src_regs_b0[] = { + clk_src_regs(0, A), + clk_src_regs(1, B), + clk_src_regs(2, F), + clk_src_regs(3, G), + clk_src_regs(4, E) +}; static const struct dce110_clk_src_shift cs_shift = { CS_COMMON_MASK_SH_LIST_DCN2_0(__SHIFT) @@ -2276,14 +2284,27 @@ static bool dcn31_resource_construct( dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL1, &clk_src_regs[1], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + /*move phypllx_pixclk_resync to dmub next*/ + if (dc->ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL2, + &clk_src_regs_b0[2], false); + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + dcn30_clock_source_create(ctx, ctx->dc_bios, + CLOCK_SOURCE_COMBO_PHY_PLL3, + &clk_src_regs_b0[3], false); + } else { + pool->base.clock_sources[DCN31_CLK_SRC_PLL2] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL2, &clk_src_regs[2], false); - pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = + pool->base.clock_sources[DCN31_CLK_SRC_PLL3] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL3, &clk_src_regs[3], false); + } + pool->base.clock_sources[DCN31_CLK_SRC_PLL4] = dcn30_clock_source_create(ctx, ctx->dc_bios, CLOCK_SOURCE_COMBO_PHY_PLL4, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 416fe7a721d8..a513363b3326 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -49,4 +49,35 @@ struct resource_pool *dcn31_create_resource_pool( const struct dc_init_data *init_data, struct dc *dc); +/*temp: B0 specific before switch to dcn313 headers*/ +#ifndef regPHYPLLF_PIXCLK_RESYNC_CNTL +#define regPHYPLLF_PIXCLK_RESYNC_CNTL 0x007e +#define regPHYPLLF_PIXCLK_RESYNC_CNTL_BASE_IDX 1 +#define regPHYPLLG_PIXCLK_RESYNC_CNTL 0x005f +#define regPHYPLLG_PIXCLK_RESYNC_CNTL_BASE_IDX 1 + +//PHYPLLF_PIXCLK_RESYNC_CNTL +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLF_PIXCLK_RESYNC_CNTL__PHYPLLF_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L + +//PHYPLLG_PIXCLK_RESYNC_CNTL +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE__SHIFT 0x0 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS__SHIFT 0x1 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL__SHIFT 0x4 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE__SHIFT 0x8 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE__SHIFT 0x9 +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_RESYNC_ENABLE_MASK 0x00000001L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DEEP_COLOR_DTO_ENABLE_STATUS_MASK 0x00000002L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_DCCG_DEEP_COLOR_CNTL_MASK 0x00000030L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_ENABLE_MASK 0x00000100L +#define PHYPLLG_PIXCLK_RESYNC_CNTL__PHYPLLG_PIXCLK_DOUBLE_RATE_ENABLE_MASK 0x00000200L +#endif #endif /* _DCN31_RESOURCE_H_ */ From d97e631af2db84c8c9d63abf68d487d0bb559e4c Mon Sep 17 00:00:00 2001 From: "Lai, Derek" Date: Mon, 6 Dec 2021 17:10:59 +0800 Subject: [PATCH 56/76] drm/amd/display: Added power down for DCN10 [Why] The change of setting a timer callback on boot for 10 seconds is still working, just lacked power down for DCN10. [How] Added power down for DCN10. Tested-by: Daniel Wheeler Reviewed-by: Anthony Koo Acked-by: Rodrigo Siqueira Signed-off-by: Derek Lai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index 34001a30d449..10e613ec7d24 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -78,6 +78,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, From a07f8b9983543d465b50870ab4f845d4d710ed3f Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 9 Dec 2021 13:53:36 -0500 Subject: [PATCH 57/76] drm/amd/display: Send s0i2_rdy in stream_count == 0 optimization [Why] Otherwise SMU won't mark Display as idle when trying to perform s2idle. [How] Mark the bit in the dcn31 codepath, doesn't apply to older ASIC. It needed to be split from phy refclk off to prevent entering s2idle when PSR was engaged but driver was not ready. Fixes: 118a33151658 ("drm/amd/display: Add DCN3.1 clock manager support") Tested-by: Daniel Wheeler Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index f4c9a458ace8..9df38e2ee4f4 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -158,6 +158,7 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, union display_idle_optimization_u idle_info = { 0 }; idle_info.idle_info.df_request_disabled = 1; idle_info.idle_info.phy_ref_clk_off = 1; + idle_info.idle_info.s0i2_rdy = 1; dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data); /* update power state */ clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; From 33735c1c8d0223170d79dbe166976d9cd7339c7a Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 9 Dec 2021 16:05:36 -0500 Subject: [PATCH 58/76] drm/amd/display: Set optimize_pwr_state for DCN31 [Why] We'll exit optimized power state to do link detection but we won't enter back into the optimized power state. This could potentially block s2idle entry depending on the sequencing, but it also means we're losing some power during the transition period. [How] Hook up the handler like DCN21. It was also missed like the exit_optimized_pwr_state callback. Fixes: 64b1d0e8d500 ("drm/amd/display: Add DCN3.1 HWSEQ") Tested-by: Daniel Wheeler Reviewed-by: Eric Yang Acked-by: Rodrigo Siqueira Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index 4f6e639e9353..17e2f2bb29ec 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -101,6 +101,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .z10_restore = dcn31_z10_restore, .z10_save_init = dcn31_z10_save_init, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, + .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .update_visual_confirm_color = dcn20_update_visual_confirm_color, }; From 33bb63915fee190102cae7d6576bc51a0bc342b2 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 17 Dec 2021 14:18:59 -0500 Subject: [PATCH 59/76] drm/amd/display: Fix USB4 null pointer dereference in update_psp_stream_config [Why] A porting error on a previous patch left the block of code that causes the crash from a NULL pointer dereference. More specifically, we try to access link_enc before it's assigned in the USB4 case in the following assignment: config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; [How] That assignment occurs later depending on the ASIC version. It's only needed on DCN31 and only after link_enc is already assigned. Fixes: 986430446c917b ("drm/amd/display: fix a crash on USB4 over C20 PHY") Reviewed-by: Harry Wentland Signed-off-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index c8457babfdea..c0bdc23702c8 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3945,12 +3945,9 @@ static void update_psp_stream_config(struct pipe_ctx *pipe_ctx, bool dpms_off) config.dig_be = pipe_ctx->stream->link->link_enc_hw_inst; #if defined(CONFIG_DRM_AMD_DC_DCN) config.stream_enc_idx = pipe_ctx->stream_res.stream_enc->id - ENGINE_ID_DIGA; - + if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY || pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - link_enc = pipe_ctx->stream->link->link_enc; - config.dio_output_type = pipe_ctx->stream->link->ep_type; - config.dio_output_idx = link_enc->transmitter - TRANSMITTER_UNIPHY_A; if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_PHY) link_enc = pipe_ctx->stream->link->link_enc; else if (pipe_ctx->stream->link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) From ee2698cf79cc759a397c61086c758d4cc85938bf Mon Sep 17 00:00:00 2001 From: Angus Wang Date: Thu, 9 Dec 2021 17:27:01 -0500 Subject: [PATCH 60/76] drm/amd/display: Changed pipe split policy to allow for multi-display pipe split [WHY] Current implementation of pipe split policy prevents pipe split with multiple displays connected, which caused the MCLK speed to be stuck at max [HOW] Changed the pipe split policies so that pipe split is allowed for multi-display configurations Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1522 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1709 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1655 Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1403 Note this is a backport of this commit from amdgpu drm-next for 5.16. Tested-by: Daniel Wheeler Reviewed-by: Aric Cyr Acked-by: Rodrigo Siqueira Signed-off-by: Angus Wang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 3883f918b3bb..83f5d9aaffcb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1069,7 +1069,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c index 0fa381088d1d..faec0297ec0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn201/dcn201_resource.c @@ -603,7 +603,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d452a0d1777e..79313d1ab5d9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -874,7 +874,7 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 79a66e0c4303..98852b586295 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -840,7 +840,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index fbaa03f26d8b..e472b729d869 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_clock_gate = true, .disable_pplib_clock_request = true, .disable_pplib_wm_range = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index fcf96cf08c76..16e7059393fa 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -211,7 +211,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 4a9b64023675..87cec14b7870 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -193,7 +193,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 2cc55ef97ec4..27afbe6ec0fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -1002,7 +1002,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = false, - .pipe_split_policy = MPC_SPLIT_AVOID, + .pipe_split_policy = MPC_SPLIT_DYNAMIC, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, From 140c7bc7d1195750342ea0e6ab76179499ae7cd7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Dec 2021 15:06:17 +0100 Subject: [PATCH 61/76] ionic: Initialize the 'lif->dbid_inuse' bitmap When allocated, this bitmap is not initialized. Only the first bit is set a few lines below. Use bitmap_zalloc() to make sure that it is cleared before being used. Fixes: 6461b446f2a0 ("ionic: Add interrupts and doorbells") Signed-off-by: Christophe JAILLET Signed-off-by: Shannon Nelson Link: https://lore.kernel.org/r/6a478eae0b5e6c63774e1f0ddb1a3f8c38fa8ade.1640527506.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 63f8a8163b5f..2ff7be17e5af 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3135,7 +3135,7 @@ int ionic_lif_init(struct ionic_lif *lif) return -EINVAL; } - lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL); + lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL); if (!lif->dbid_inuse) { dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); return -ENOMEM; From 077cdda764c7f147e03e6065ba0cd1dbc1bf00d1 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 22 Dec 2021 09:20:58 +0200 Subject: [PATCH 62/76] net/mlx5e: TC, Fix memory leak with rules with internal port Fix a memory leak with decap rule with internal port as destination device. The driver allocates a modify hdr action but doesn't set the flow attr modify hdr action which results in skipping releasing the modify hdr action when releasing the flow. backtrace: [<000000005f8c651c>] krealloc+0x83/0xd0 [<000000009f59b143>] alloc_mod_hdr_actions+0x156/0x310 [mlx5_core] [<000000002257f342>] mlx5e_tc_match_to_reg_set_and_get_id+0x12a/0x360 [mlx5_core] [<00000000b44ea75a>] mlx5e_tc_add_fdb_flow+0x962/0x1470 [mlx5_core] [<0000000003e384a0>] __mlx5e_add_fdb_flow+0x54c/0xb90 [mlx5_core] [<00000000ed8b22b6>] mlx5e_configure_flower+0xe45/0x4af0 [mlx5_core] [<00000000024f4ab5>] mlx5e_rep_indr_offload.isra.0+0xfe/0x1b0 [mlx5_core] [<000000006c3bb494>] mlx5e_rep_indr_setup_tc_cb+0x90/0x130 [mlx5_core] [<00000000d3dac2ea>] tc_setup_cb_add+0x1d2/0x420 Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a60c7680fd2b..5e454a14428f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1441,6 +1441,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, metadata); if (err) goto err_out; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } } From 992d8a4e38f0527f24e273ce3a9cd6dea1a6a436 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 29 Nov 2021 11:08:41 +0200 Subject: [PATCH 63/76] net/mlx5e: Fix wrong features assignment in case of error In case of an error in mlx5e_set_features(), 'netdev->features' must be updated with the correct state of the device to indicate which features were updated successfully. To do that we maintain a copy of 'netdev->features' and update it after successful feature changes, so we can assign it to back to 'netdev->features' if needed. However, since not all netdev features are handled by the driver (e.g. GRO/TSO/etc), some features may not be updated correctly in case of an error updating another feature. For example, while requesting to disable TSO (feature which is not handled by the driver) and enable HW-GRO, if an error occurs during HW-GRO enable, 'oper_features' will be assigned with 'netdev->features' and HW-GRO turned off. TSO will remain enabled in such case, which is a bug. To solve that, instead of using 'netdev->features' as the baseline of 'oper_features' and changing it on set feature success, use 'features' instead and update it in case of errors. Fixes: 75b81ce719b7 ("net/mlx5e: Don't override netdev features field unless in error flow") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3b0f3a831216..41379844eee1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3741,12 +3741,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t *features, - netdev_features_t wanted_features, netdev_features_t feature, mlx5e_feature_handler feature_handler) { - netdev_features_t changes = wanted_features ^ netdev->features; - bool enable = !!(wanted_features & feature); + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); int err; if (!(changes & feature)) @@ -3754,22 +3753,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); netdev_err(netdev, "%s feature %pNF failed, err %d\n", enable ? "Enable" : "Disable", &feature, err); return err; } - MLX5E_SET_FEATURE(features, feature, enable); return 0; } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { - netdev_features_t oper_features = netdev->features; + netdev_features_t oper_features = features; int err = 0; #define MLX5E_HANDLE_FEATURE(feature, handler) \ - mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) + mlx5e_handle_feature(netdev, &oper_features, feature, handler) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro); From 5bec7ca2be6955ca1aa0d7bae2b981de9b1c9844 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Mon, 20 Dec 2021 15:52:50 +0000 Subject: [PATCH 64/76] xsk: Initialise xskb free_list_node This commit initialises the xskb's free_list_node when the xskb is allocated. This prevents a potential false negative returned from a call to list_empty for that node, such as the one introduced in commit 199d983bc015 ("xsk: Fix crash on double free in buffer pool") In my environment this issue caused packets to not be received by the xdpsock application if the traffic was running prior to application launch. This happened when the first batch of packets failed the xskmap lookup and XDP_PASS was returned from the bpf program. This action is handled in the i40e zc driver (and others) by allocating an skbuff, freeing the xdp_buff and adding the associated xskb to the xsk_buff_pool's free_list if it hadn't been added already. Without this fix, the xskb is not added to the free_list because the check to determine if it was added already returns an invalid positive result. Later, this caused allocation errors in the driver and the failure to receive packets. Fixes: 199d983bc015 ("xsk: Fix crash on double free in buffer pool") Fixes: 2b43470add8c ("xsk: Introduce AF_XDP buffer allocation API") Signed-off-by: Ciara Loftus Acked-by: Magnus Karlsson Link: https://lore.kernel.org/r/20211220155250.2746-1-ciara.loftus@intel.com Signed-off-by: Jakub Kicinski --- net/xdp/xsk_buff_pool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index bc4ad48ea4f0..fd39bb660ebc 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -83,6 +83,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; + INIT_LIST_HEAD(&xskb->free_list_node); if (pool->unaligned) pool->free_heads[i] = xskb; else From fb7bc9204095090731430c8921f9e629740c110a Mon Sep 17 00:00:00 2001 From: Tamir Duberstein Date: Wed, 29 Dec 2021 15:09:47 -0500 Subject: [PATCH 65/76] ipv6: raw: check passed optlen before reading Add a check that the user-provided option is at least as long as the number of bytes we intend to read. Before this patch we would blindly read sizeof(int) bytes even in cases where the user passed optlen Signed-off-by: Willem de Bruijn Link: https://lore.kernel.org/r/20211229200947.2862255-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/raw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 60f1e4f5be5a..c51d5ce3711c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1020,6 +1020,9 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, struct raw6_sock *rp = raw6_sk(sk); int val; + if (optlen < sizeof(val)) + return -EINVAL; + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; From 99b40610956a8a8755653a67392e2a8b772453be Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Dec 2021 19:21:15 +0200 Subject: [PATCH 66/76] net: bridge: mcast: add and enforce query interval minimum As reported[1] if query interval is set too low and we have multiple bridges or even a single bridge with multiple querier vlans configured we can crash the machine. Add a 1 second minimum which must be enforced by overwriting the value if set lower (i.e. without returning an error) to avoid breaking user-space. If that happens a log message is emitted to let the administrator know that the interval has been set to the minimum. The issue has been present since these intervals could be user-controlled. [1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Reported-by: Eric Dumazet Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_netlink.c | 2 +- net/bridge/br_private.h | 3 +++ net/bridge/br_sysfs_br.c | 2 +- net/bridge/br_vlan_options.c | 2 +- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f3d751105343..998da4a2d209 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4522,6 +4522,22 @@ int br_multicast_set_mld_version(struct net_bridge_mcast *brmctx, } #endif +void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val) +{ + unsigned long intvl_jiffies = clock_t_to_jiffies(val); + + if (intvl_jiffies < BR_MULTICAST_QUERY_INTVL_MIN) { + br_info(brmctx->br, + "trying to set multicast query interval below minimum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_QUERY_INTVL_MIN), + jiffies_to_msecs(BR_MULTICAST_QUERY_INTVL_MIN)); + intvl_jiffies = BR_MULTICAST_QUERY_INTVL_MIN; + } + + brmctx->multicast_query_interval = intvl_jiffies; +} + /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0c8b5f1a15bc..701dd8b8455e 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1357,7 +1357,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]); - br->multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&br->multicast_ctx, val); } if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c0efd697865a..4ed7f11042e8 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -28,6 +28,7 @@ #define BR_MAX_PORTS (1<multicast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&br->multicast_ctx, val); return 0; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index 8ffd4ed2563c..bf1ac0874279 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -521,7 +521,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, u64 val; val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_INTVL]); - v->br_mcast_ctx.multicast_query_interval = clock_t_to_jiffies(val); + br_multicast_set_query_intvl(&v->br_mcast_ctx, val); *changed = true; } if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERY_RESPONSE_INTVL]) { From f83a112bd91a494cdee671aec74e777470fb4a07 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Dec 2021 19:21:16 +0200 Subject: [PATCH 67/76] net: bridge: mcast: add and enforce startup query interval minimum As reported[1] if startup query interval is set too low in combination with large number of startup queries and we have multiple bridges or even a single bridge with multiple querier vlans configured we can crash the machine. Add a 1 second minimum which must be enforced by overwriting the value if set lower (i.e. without returning an error) to avoid breaking user-space. If that happens a log message is emitted to let the admin know that the startup interval has been set to the minimum. It doesn't make sense to make the startup interval lower than the normal query interval so use the same value of 1 second. The issue has been present since these intervals could be user-controlled. [1] https://lore.kernel.org/netdev/e8b9ce41-57b9-b6e2-a46a-ff9c791cf0ba@gmail.com/ Fixes: d902eee43f19 ("bridge: Add multicast count/interval sysfs entries") Reported-by: Eric Dumazet Signed-off-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- net/bridge/br_multicast.c | 16 ++++++++++++++++ net/bridge/br_netlink.c | 2 +- net/bridge/br_private.h | 3 +++ net/bridge/br_sysfs_br.c | 2 +- net/bridge/br_vlan_options.c | 2 +- 5 files changed, 22 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 998da4a2d209..de2409889489 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4538,6 +4538,22 @@ void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, brmctx->multicast_query_interval = intvl_jiffies; } +void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val) +{ + unsigned long intvl_jiffies = clock_t_to_jiffies(val); + + if (intvl_jiffies < BR_MULTICAST_STARTUP_QUERY_INTVL_MIN) { + br_info(brmctx->br, + "trying to set multicast startup query interval below minimum, setting to %lu (%ums)\n", + jiffies_to_clock_t(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN), + jiffies_to_msecs(BR_MULTICAST_STARTUP_QUERY_INTVL_MIN)); + intvl_jiffies = BR_MULTICAST_STARTUP_QUERY_INTVL_MIN; + } + + brmctx->multicast_startup_query_interval = intvl_jiffies; +} + /** * br_multicast_list_adjacent - Returns snooped multicast addresses * @dev: The bridge port adjacent to which to retrieve addresses diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 701dd8b8455e..2ff83d84230d 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1369,7 +1369,7 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) { u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]); - br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); } if (data[IFLA_BR_MCAST_STATS_ENABLED]) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 4ed7f11042e8..2187a0c3fd22 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -29,6 +29,7 @@ #define BR_MULTICAST_DEFAULT_HASH_MAX 4096 #define BR_MULTICAST_QUERY_INTVL_MIN msecs_to_jiffies(1000) +#define BR_MULTICAST_STARTUP_QUERY_INTVL_MIN BR_MULTICAST_QUERY_INTVL_MIN #define BR_HWDOM_MAX BITS_PER_LONG @@ -966,6 +967,8 @@ size_t br_multicast_querier_state_size(void); size_t br_rports_size(const struct net_bridge_mcast *brmctx); void br_multicast_set_query_intvl(struct net_bridge_mcast *brmctx, unsigned long val); +void br_multicast_set_startup_query_intvl(struct net_bridge_mcast *brmctx, + unsigned long val); static inline bool br_group_is_l2(const struct br_ip *group) { diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index f5bd1114a434..7b0c19772111 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -706,7 +706,7 @@ static ssize_t multicast_startup_query_interval_show( static int set_startup_query_interval(struct net_bridge *br, unsigned long val, struct netlink_ext_ack *extack) { - br->multicast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&br->multicast_ctx, val); return 0; } diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c index bf1ac0874279..a6382973b3e7 100644 --- a/net/bridge/br_vlan_options.c +++ b/net/bridge/br_vlan_options.c @@ -535,7 +535,7 @@ static int br_vlan_process_global_one_opts(const struct net_bridge *br, u64 val; val = nla_get_u64(tb[BRIDGE_VLANDB_GOPTS_MCAST_STARTUP_QUERY_INTVL]); - v->br_mcast_ctx.multicast_startup_query_interval = clock_t_to_jiffies(val); + br_multicast_set_startup_query_intvl(&v->br_mcast_ctx, val); *changed = true; } if (tb[BRIDGE_VLANDB_GOPTS_MCAST_QUERIER]) { From 9c1952aeaa98b3cfc49e2a79cb2c7d6a674213e9 Mon Sep 17 00:00:00 2001 From: wujianguo Date: Wed, 29 Dec 2021 18:58:10 +0800 Subject: [PATCH 68/76] selftests/net: udpgso_bench_tx: fix dst ip argument udpgso_bench_tx call setup_sockaddr() for dest address before parsing all arguments, if we specify "-p ${dst_port}" after "-D ${dst_ip}", then ${dst_port} will be ignored, and using default cfg_port 8000. This will cause test case "multiple GRO socks" failed in udpgro.sh. Setup sockaddr after parsing all arguments. Fixes: 3a687bef148d ("selftests: udp gso benchmark") Signed-off-by: Jianguo Wu Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/ff620d9f-5b52-06ab-5286-44b945453002@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso_bench_tx.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 17512a43885e..f1fdaa270291 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -419,6 +419,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int max_len, hdrlen; int c; @@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + bind_addr = optarg; break; case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; @@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv) } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr); + if (optind != argc) usage(argv[0]); From add25d6d6c85f7b6d00a055ee0a4169acf845681 Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Wed, 29 Dec 2021 15:27:30 +0800 Subject: [PATCH 69/76] selftests: net: Fix a typo in udpgro_fwd.sh $rvs -> $rcv Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Jianguo Wu Link: https://lore.kernel.org/r/d247d7c8-a03a-0abf-3c71-4006a051d133@163.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgro_fwd.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 7f26591f236b..6a3985b8cd7f 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -132,7 +132,7 @@ run_test() { local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \ sed -e 's/\[//' -e 's/:.*//'` if [ $rcv != $pkts ]; then - echo " fail - received $rvs packets, expected $pkts" + echo " fail - received $rcv packets, expected $pkts" ret=1 return fi From e22e45fc9e41bf9fcc1e92cfb78eb92786728ef0 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Tue, 28 Dec 2021 18:41:45 +0800 Subject: [PATCH 70/76] net: fix use-after-free in tw_timer_handler A real world panic issue was found as follow in Linux 5.4. BUG: unable to handle page fault for address: ffffde49a863de28 PGD 7e6fe62067 P4D 7e6fe62067 PUD 7e6fe63067 PMD f51e064067 PTE 0 RIP: 0010:tw_timer_handler+0x20/0x40 Call Trace: call_timer_fn+0x2b/0x120 run_timer_softirq+0x1ef/0x450 __do_softirq+0x10d/0x2b8 irq_exit+0xc7/0xd0 smp_apic_timer_interrupt+0x68/0x120 apic_timer_interrupt+0xf/0x20 This issue was also reported since 2017 in the thread [1], unfortunately, the issue was still can be reproduced after fixing DCCP. The ipv4_mib_exit_net is called before tcp_sk_exit_batch when a net namespace is destroyed since tcp_sk_ops is registered befrore ipv4_mib_ops, which means tcp_sk_ops is in the front of ipv4_mib_ops in the list of pernet_list. There will be a use-after-free on net->mib.net_statistics in tw_timer_handler after ipv4_mib_exit_net if there are some inflight time-wait timers. This bug is not introduced by commit f2bf415cfed7 ("mib: add net to NET_ADD_STATS_BH") since the net_statistics is a global variable instead of dynamic allocation and freeing. Actually, commit 61a7e26028b9 ("mib: put net statistics on struct net") introduces the bug since it put net statistics on struct net and free it when net namespace is destroyed. Moving init_ipv4_mibs() to the front of tcp_init() to fix this bug and replace pr_crit() with panic() since continuing is meaningless when init_ipv4_mibs() fails. [1] https://groups.google.com/g/syzkaller/c/p1tn-_Kc6l4/m/smuL_FMAAgAJ?pli=1 Fixes: 61a7e26028b9 ("mib: put net statistics on struct net") Signed-off-by: Muchun Song Cc: Cong Wang Cc: Fam Zheng Cc: Link: https://lore.kernel.org/r/20211228104145.9426-1-songmuchun@bytedance.com Signed-off-by: Jakub Kicinski --- net/ipv4/af_inet.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6b5956500436..5f70ffdae1b5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1994,6 +1994,10 @@ static int __init inet_init(void) ip_init(); + /* Initialise per-cpu ipv4 mibs */ + if (init_ipv4_mibs()) + panic("%s: Cannot init ipv4 mibs\n", __func__); + /* Setup TCP slab cache for open requests. */ tcp_init(); @@ -2024,12 +2028,6 @@ static int __init inet_init(void) if (init_inet_pernet_ops()) pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); - /* - * Initialise per-cpu ipv4 mibs - */ - - if (init_ipv4_mibs()) - pr_crit("%s: Cannot init ipv4 mibs\n", __func__); ipv4_proc_init(); From 168fed986b3a7ec7b98cab1fe84e2f282b9e6a8f Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Dec 2021 17:31:42 +0200 Subject: [PATCH 71/76] net: bridge: mcast: fix br_multicast_ctx_vlan_global_disabled helper We need to first check if the context is a vlan one, then we need to check the global bridge multicast vlan snooping flag, and finally the vlan's multicast flag, otherwise we will unnecessarily enable vlan mcast processing (e.g. querier timers). Fixes: 7b54aaaf53cb ("net: bridge: multicast: add vlan state initialization and control") Signed-off-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20211228153142.536969-1-nikolay@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_private.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2187a0c3fd22..e8c6ee322c71 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1153,9 +1153,9 @@ br_multicast_port_ctx_get_global(const struct net_bridge_mcast_port *pmctx) static inline bool br_multicast_ctx_vlan_global_disabled(const struct net_bridge_mcast *brmctx) { - return br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) && - br_multicast_ctx_is_vlan(brmctx) && - !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED); + return br_multicast_ctx_is_vlan(brmctx) && + (!br_opt_get(brmctx->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED) || + !(brmctx->vlan->priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED)); } static inline bool From 92a34ab169f9eefe29cd420ce96b0a0a2a1da853 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Wed, 29 Dec 2021 11:21:18 +0800 Subject: [PATCH 72/76] net/ncsi: check for error return from call to nla_put_u32 As we can see from the comment of the nla_put() that it could return -EMSGSIZE if the tailroom of the skb is insufficient. Therefore, it should be better to check the return value of the nla_put_u32 and return the error code if error accurs. Also, there are many other functions have the same problem, and if this patch is correct, I will commit a new version to fix all. Fixes: 955dc68cb9b2 ("net/ncsi: Add generic netlink family") Signed-off-by: Jiasheng Jiang Link: https://lore.kernel.org/r/20211229032118.1706294-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- net/ncsi/ncsi-netlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index bb5f1650f11c..c189b4c8a182 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb, pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; - nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + if (rc) { + nla_nest_cancel(skb, pnest); + return rc; + } if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); From be1c5b53227ba8280f1ebb01c6f5da3c9eebdaad Mon Sep 17 00:00:00 2001 From: xu xin Date: Thu, 30 Dec 2021 03:28:56 +0000 Subject: [PATCH 73/76] Documentation: fix outdated interpretation of ip_no_pmtu_disc The updating way of pmtu has changed, but documentation is still in the old way. So this patch updates the interpretation of ip_no_pmtu_disc and min_pmtu. See commit 28d35bcdd3925 ("net: ipv4: don't let PMTU updates increase route MTU") Reported-by: Zeal Robot Signed-off-by: xu xin Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c04431144f7a..2572eecc3e86 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -25,7 +25,8 @@ ip_default_ttl - INTEGER ip_no_pmtu_disc - INTEGER Disable Path MTU Discovery. If enabled in mode 1 and a fragmentation-required ICMP is received, the PMTU to this - destination will be set to min_pmtu (see below). You will need + destination will be set to the smallest of the old MTU to + this destination and min_pmtu (see below). You will need to raise min_pmtu to the smallest interface MTU on your system manually if you want to avoid locally generated fragments. @@ -49,7 +50,8 @@ ip_no_pmtu_disc - INTEGER Default: FALSE min_pmtu - INTEGER - default 552 - minimum discovered Path MTU + default 552 - minimum Path MTU. Unless this is changed mannually, + each cached pmtu will never be lower than this setting. ip_forward_use_pmtu - BOOLEAN By default we don't trust protocol path MTUs while forwarding From 8b3170e07539855ee91bc5e2fa7780a4c9b5c7aa Mon Sep 17 00:00:00 2001 From: Jianguo Wu Date: Thu, 30 Dec 2021 18:40:29 +0800 Subject: [PATCH 74/76] selftests: net: using ping6 for IPv6 in udpgro_fwd.sh udpgro_fwd.sh output following message: ping: 2001:db8:1::100: Address family for hostname not supported Using ping6 when pinging IPv6 addresses. Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") Signed-off-by: Jianguo Wu Signed-off-by: David S. Miller --- tools/testing/selftests/net/udpgro_fwd.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 6a3985b8cd7f..3ea73013d956 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -185,6 +185,7 @@ for family in 4 6; do IPT=iptables SUFFIX=24 VXDEV=vxlan + PING=ping if [ $family = 6 ]; then BM_NET=$BM_NET_V6 @@ -192,6 +193,7 @@ for family in 4 6; do SUFFIX="64 nodad" VXDEV=vxlan6 IPT=ip6tables + PING="ping6" fi echo "IPv$family" @@ -237,7 +239,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel - ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null + ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST cleanup From bf2b09fedc17248b315f80fb249087b7d28a69a6 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 30 Dec 2021 12:26:27 +0000 Subject: [PATCH 75/76] fsl/fman: Fix missing put_device() call in fman_port_probe The reference taken by 'of_find_device_by_node()' must be released when not needed anymore. Add the corresponding 'put_device()' in the and error handling paths. Fixes: 18a6c85fcc78 ("fsl/fman: Add FMan Port Support") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_port.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index d9baac0dbc7d..4c9d05c45c03 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev) fman = dev_get_drvdata(&fm_pdev->dev); if (!fman) { err = -EINVAL; - goto return_err; + goto put_device; } err = of_property_read_u32(port_node, "cell-index", &val); @@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: reading cell-index for %pOF failed\n", __func__, port_node); err = -EINVAL; - goto return_err; + goto put_device; } port_id = (u8)val; port->dts_params.id = port_id; @@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev) } else { dev_err(port->dev, "%s: Illegal port type\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.type = port_type; @@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: incorrect qman-channel-id\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.qman_channel_id = qman_channel_id; } @@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: of_address_to_resource() failed\n", __func__); err = -ENOMEM; - goto return_err; + goto put_device; } port->dts_params.fman = fman; @@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev) return 0; +put_device: + put_device(&fm_pdev->dev); return_err: of_node_put(port_node); free_port: From 012e332286e2bb9f6ac77d195f17e74b2963d663 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 30 Dec 2021 20:23:09 +0100 Subject: [PATCH 76/76] fs/mount_setattr: always cleanup mount_kattr Make sure that finish_mount_kattr() is called after mount_kattr was succesfully built in both the success and failure case to prevent leaking any references we took when we built it. We returned early if path lookup failed thereby risking to leak an additional reference we took when building mount_kattr when an idmapped mount was requested. Cc: linux-fsdevel@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 9caccd41541a ("fs: introduce MOUNT_ATTR_IDMAP") Signed-off-by: Christian Brauner Signed-off-by: Linus Torvalds --- fs/namespace.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 659a8f39c61a..b696543adab8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4263,12 +4263,11 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, return err; err = user_path_at(dfd, path, kattr.lookup_flags, &target); - if (err) - return err; - - err = do_mount_setattr(&target, &kattr); + if (!err) { + err = do_mount_setattr(&target, &kattr); + path_put(&target); + } finish_mount_kattr(&kattr); - path_put(&target); return err; }