From b1c37a0030b27a4b5d159d87461f6a7d453fd067 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Sat, 14 Jun 2025 01:01:39 +0530 Subject: [PATCH 001/358] drm/xe/xe3: Add support for graphics IP version 30.03 Graphics version 30.03 should be treated the same as other Xe3 IP, but will have a slightly different set of workarounds. -v2: Merge and extend the WA onto existing entry (Bala) -v3: Revert v2's feedback changes and keep entry saparate (Matt). Signed-off-by: Matt Roper Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20250613193146.3549862-3-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 89814b32e585..014689898d96 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -184,6 +184,7 @@ static const struct xe_ip graphics_ips[] = { { 2004, "Xe2_LPG", &graphics_xe2 }, { 3000, "Xe3_LPG", &graphics_xe2 }, { 3001, "Xe3_LPG", &graphics_xe2 }, + { 3003, "Xe3_LPG", &graphics_xe2 }, }; /* Pre-GMDID Media IPs */ diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 4a76de391abb..6baf369fb330 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -644,6 +644,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(3003), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, }; static const struct xe_rtp_entry_sr lrc_was[] = { diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 0ee74a5b2407..6496a5a17393 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -34,9 +34,11 @@ 14022293748 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 22019794406 GRAPHICS_VERSION_RANGE(2001, 2002) GRAPHICS_VERSION(2004) GRAPHICS_VERSION_RANGE(3000, 3001) + GRAPHICS_VERSION(3003) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) @@ -58,8 +60,10 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) + GRAPHICS_VERSION(3003) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + GRAPHICS_VERSION(3003) # SoC workaround - currently applies to all platforms with the following # primary GT GMDID From c96e0df4e9f5f0d6690994bb01bcfbd01af0e1f7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Sat, 14 Jun 2025 01:01:40 +0530 Subject: [PATCH 002/358] drm/xe/xe3: Add support for media IP version 30.02 Media version 30.02 should be treated the same as other Xe3 IP, but will have a slightly different set of workarounds. -v2: Extend the range in existing WA entry (Bala) -v3: Revert v2, Do not extend the range for the time being(Matt) Signed-off-by: Matt Roper Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Balasubramani Vivekanandan Link: https://lore.kernel.org/r/20250613193146.3549862-4-dnyaneshwar.bhadane@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 1 + drivers/gpu/drm/xe/xe_wa.c | 12 ++++++++++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 3 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 014689898d96..cd239457f542 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -197,6 +197,7 @@ static const struct xe_ip media_ips[] = { { 1301, "Xe2_HPM", &media_xelpmp }, { 2000, "Xe2_LPM", &media_xelpmp }, { 3000, "Xe3_LPM", &media_xelpmp }, + { 3002, "Xe3_LPM", &media_xelpmp }, }; static const struct xe_device_desc tgl_desc = { diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 6baf369fb330..f51218a7a580 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -285,6 +285,18 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, + { XE_RTP_NAME("16021865536"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, + { XE_RTP_NAME("16021867713"), + XE_RTP_RULES(MEDIA_VERSION(3002), + ENGINE_CLASS(VIDEO_DECODE)), + XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), + }, { XE_RTP_NAME("14021486841"), XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), ENGINE_CLASS(VIDEO_DECODE)), diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 6496a5a17393..96cc33da0fb5 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -60,9 +60,11 @@ no_media_l3 MEDIA_VERSION(3000) GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0) 16023105232 GRAPHICS_VERSION_RANGE(2001, 3001) MEDIA_VERSION_RANGE(1301, 3000) + MEDIA_VERSION(3002) GRAPHICS_VERSION(3003) 16026508708 GRAPHICS_VERSION_RANGE(1200, 3001) MEDIA_VERSION_RANGE(1300, 3000) + MEDIA_VERSION(3002) GRAPHICS_VERSION(3003) # SoC workaround - currently applies to all platforms with the following From 3c0f211bc8fc0d0a0b4c29c471b57ffff48eec60 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Sat, 14 Jun 2025 01:01:43 +0530 Subject: [PATCH 003/358] drm/xe: Add Wildcat Lake device IDs to PTL list Introduce wildcat lake device Id. Wildcat Lake uses slightly different graphics and media IP versions than Panther Lake, but can still be treated as PTL for general driver flows. Bspec: 73951 Signed-off-by: Matt Roper Signed-off-by: Dnyaneshwar Bhadane Reviewed-by: Tejas Upadhyay Link: https://lore.kernel.org/r/20250613193146.3549862-7-dnyaneshwar.bhadane@intel.com --- include/drm/intel/pciids.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index 129c3172aa5f..a0180d10e260 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -871,6 +871,8 @@ MACRO__(0xB08F, ## __VA_ARGS__), \ MACRO__(0xB090, ## __VA_ARGS__), \ MACRO__(0xB0A0, ## __VA_ARGS__), \ - MACRO__(0xB0B0, ## __VA_ARGS__) + MACRO__(0xB0B0, ## __VA_ARGS__), \ + MACRO__(0xFD80, ## __VA_ARGS__), \ + MACRO__(0xFD81, ## __VA_ARGS__) #endif /* __PCIIDS_H__ */ From 3972872e459d812ab5e481a231a6066cf4f4d0f4 Mon Sep 17 00:00:00 2001 From: sanirban Date: Thu, 19 Jun 2025 19:04:14 +0530 Subject: [PATCH 004/358] drm/xe/ptl: Apply Wa_16026007364 As part of this WA GuC will save and restore value of two XE3_Media control registers that were not included in the HW power context. v2: - Update klv name (Badal) Signed-off-by: sanirban Reviewed-by: Badal Nilawar Link: https://lore.kernel.org/r/20250619133413.107423-2-sk.anirban@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 1 + drivers/gpu/drm/xe/xe_guc_ads.c | 5 +++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 1 + 3 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 7de8f827281f..e2583761a70d 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -368,6 +368,7 @@ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a, GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b, + GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG = 0x900c, }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 07a027755627..f3c74f9a40c6 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -382,6 +382,11 @@ static void guc_waklv_init(struct xe_guc_ads *ads) GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH, &offset, &remain); + if (XE_WA(gt, 16026007364)) + guc_waklv_enable_simple(ads, + GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG, + &offset, &remain); + size = guc_ads_waklv_size(ads) - remain; if (!size) return; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 96cc33da0fb5..8c2aa48cb33a 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -70,3 +70,4 @@ no_media_l3 MEDIA_VERSION(3000) # SoC workaround - currently applies to all platforms with the following # primary GT GMDID 14022085890 GRAPHICS_VERSION(2001) +16026007364 MEDIA_VERSION(3000) From 9863f7743339ae53a0cf80e5f229cf6d2a42a6e6 Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Mon, 2 Jun 2025 11:53:11 +0300 Subject: [PATCH 005/358] rust: dma: clarify wording and be consistent in `coherent` nomenclature In the kernel, `consistent` and `coherent` are used interchangeably for the region described in this api. Stick with `coherent` nomenclature to show that dma_alloc_coherent() is being used, in addition to improving the clarity in the DMA mapping attributes documentation. Reviewed-by: Benno Lossin Signed-off-by: Abdiel Janulgue Reviewed-by: Andreas Hindborg Link: https://lore.kernel.org/r/20250602085444.1925053-2-abdiel.janulgue@gmail.com Signed-off-by: Danilo Krummrich --- rust/kernel/dma.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index a33261c62e0c..e5d1ffef1a53 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -89,7 +89,7 @@ pub mod attrs { /// Forces contiguous allocation of the buffer in physical memory. pub const DMA_ATTR_FORCE_CONTIGUOUS: Attrs = Attrs(bindings::DMA_ATTR_FORCE_CONTIGUOUS); - /// This is a hint to the DMA-mapping subsystem that it's probably not worth the time to try + /// Hints DMA-mapping subsystem that it's probably not worth the time to try /// to allocate memory to in a way that gives better TLB efficiency. pub const DMA_ATTR_ALLOC_SINGLE_PAGES: Attrs = Attrs(bindings::DMA_ATTR_ALLOC_SINGLE_PAGES); @@ -97,7 +97,7 @@ pub mod attrs { /// `__GFP_NOWARN`). pub const DMA_ATTR_NO_WARN: Attrs = Attrs(bindings::DMA_ATTR_NO_WARN); - /// Used to indicate that the buffer is fully accessible at an elevated privilege level (and + /// Indicates that the buffer is fully accessible at an elevated privilege level (and /// ideally inaccessible or at least read-only at lesser-privileged levels). pub const DMA_ATTR_PRIVILEGED: Attrs = Attrs(bindings::DMA_ATTR_PRIVILEGED); } @@ -105,7 +105,7 @@ pub mod attrs { /// An abstraction of the `dma_alloc_coherent` API. /// /// This is an abstraction around the `dma_alloc_coherent` API which is used to allocate and map -/// large consistent DMA regions. +/// large coherent DMA regions. /// /// A [`CoherentAllocation`] instance contains a pointer to the allocated region (in the /// processor's virtual address space) and the device address which can be given to the device @@ -115,7 +115,7 @@ pub mod attrs { /// # Invariants /// /// For the lifetime of an instance of [`CoherentAllocation`], the `cpu_addr` is a valid pointer -/// to an allocated region of consistent memory and `dma_handle` is the DMA address base of +/// to an allocated region of coherent memory and `dma_handle` is the DMA address base of /// the region. // TODO // @@ -138,7 +138,7 @@ pub struct CoherentAllocation { } impl CoherentAllocation { - /// Allocates a region of `size_of:: * count` of consistent memory. + /// Allocates a region of `size_of:: * count` of coherent memory. /// /// # Examples /// From fe58465905550576cc47cf93efeaaa6990e6c3b3 Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Mon, 2 Jun 2025 11:53:12 +0300 Subject: [PATCH 006/358] rust: dma: convert the read/write macros to return Result We could do better here by having the macros return `Result`, so that we don't have to wrap these calls in a closure for validation which is confusing. Co-developed-by: Andreas Hindborg Signed-off-by: Andreas Hindborg Link: https://lore.kernel.org/rust-for-linux/87h63qhz4q.fsf@kernel.org/ Reviewed-by: Andreas Hindborg Signed-off-by: Abdiel Janulgue Link: https://lore.kernel.org/r/20250602085444.1925053-3-abdiel.janulgue@gmail.com [ Fix line length in dma_read!(). - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/dma.rs | 56 +++++++++++++++++++++++----------------- samples/rust/rust_dma.rs | 28 ++++++++++---------- 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index e5d1ffef1a53..38356f61c90b 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -328,20 +328,24 @@ unsafe impl Send for CoherentAllocation {} #[macro_export] macro_rules! dma_read { ($dma:expr, $idx: expr, $($field:tt)*) => {{ - let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; - // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be - // dereferenced. The compiler also further validates the expression on whether `field` - // is a member of `item` when expanded by the macro. - unsafe { - let ptr_field = ::core::ptr::addr_of!((*item) $($field)*); - $crate::dma::CoherentAllocation::field_read(&$dma, ptr_field) - } + (|| -> ::core::result::Result<_, $crate::error::Error> { + let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; + // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be + // dereferenced. The compiler also further validates the expression on whether `field` + // is a member of `item` when expanded by the macro. + unsafe { + let ptr_field = ::core::ptr::addr_of!((*item) $($field)*); + ::core::result::Result::Ok( + $crate::dma::CoherentAllocation::field_read(&$dma, ptr_field) + ) + } + })() }}; ($dma:ident [ $idx:expr ] $($field:tt)* ) => { - $crate::dma_read!($dma, $idx, $($field)*); + $crate::dma_read!($dma, $idx, $($field)*) }; ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => { - $crate::dma_read!($($dma).*, $idx, $($field)*); + $crate::dma_read!($($dma).*, $idx, $($field)*) }; } @@ -368,24 +372,30 @@ macro_rules! dma_read { #[macro_export] macro_rules! dma_write { ($dma:ident [ $idx:expr ] $($field:tt)*) => {{ - $crate::dma_write!($dma, $idx, $($field)*); + $crate::dma_write!($dma, $idx, $($field)*) }}; ($($dma:ident).* [ $idx:expr ] $($field:tt)* ) => {{ - $crate::dma_write!($($dma).*, $idx, $($field)*); + $crate::dma_write!($($dma).*, $idx, $($field)*) }}; ($dma:expr, $idx: expr, = $val:expr) => { - let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; - // SAFETY: `item_from_index` ensures that `item` is always a valid item. - unsafe { $crate::dma::CoherentAllocation::field_write(&$dma, item, $val) } + (|| -> ::core::result::Result<_, $crate::error::Error> { + let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; + // SAFETY: `item_from_index` ensures that `item` is always a valid item. + unsafe { $crate::dma::CoherentAllocation::field_write(&$dma, item, $val) } + ::core::result::Result::Ok(()) + })() }; ($dma:expr, $idx: expr, $(.$field:ident)* = $val:expr) => { - let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; - // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be - // dereferenced. The compiler also further validates the expression on whether `field` - // is a member of `item` when expanded by the macro. - unsafe { - let ptr_field = ::core::ptr::addr_of_mut!((*item) $(.$field)*); - $crate::dma::CoherentAllocation::field_write(&$dma, ptr_field, $val) - } + (|| -> ::core::result::Result<_, $crate::error::Error> { + let item = $crate::dma::CoherentAllocation::item_from_index(&$dma, $idx)?; + // SAFETY: `item_from_index` ensures that `item` is always a valid pointer and can be + // dereferenced. The compiler also further validates the expression on whether `field` + // is a member of `item` when expanded by the macro. + unsafe { + let ptr_field = ::core::ptr::addr_of_mut!((*item) $(.$field)*); + $crate::dma::CoherentAllocation::field_write(&$dma, ptr_field, $val) + } + ::core::result::Result::Ok(()) + })() }; } diff --git a/samples/rust/rust_dma.rs b/samples/rust/rust_dma.rs index 874c2c964afa..9e05d5c0cdae 100644 --- a/samples/rust/rust_dma.rs +++ b/samples/rust/rust_dma.rs @@ -54,13 +54,9 @@ fn probe(pdev: &pci::Device, _info: &Self::IdInfo) -> Result = CoherentAllocation::alloc_coherent(pdev.as_ref(), TEST_VALUES.len(), GFP_KERNEL)?; - || -> Result { - for (i, value) in TEST_VALUES.into_iter().enumerate() { - kernel::dma_write!(ca[i] = MyStruct::new(value.0, value.1)); - } - - Ok(()) - }()?; + for (i, value) in TEST_VALUES.into_iter().enumerate() { + kernel::dma_write!(ca[i] = MyStruct::new(value.0, value.1))?; + } let drvdata = KBox::new( Self { @@ -78,13 +74,19 @@ impl Drop for DmaSampleDriver { fn drop(&mut self) { dev_info!(self.pdev.as_ref(), "Unload DMA test driver.\n"); - let _ = || -> Result { - for (i, value) in TEST_VALUES.into_iter().enumerate() { - assert_eq!(kernel::dma_read!(self.ca[i].h), value.0); - assert_eq!(kernel::dma_read!(self.ca[i].b), value.1); + for (i, value) in TEST_VALUES.into_iter().enumerate() { + let val0 = kernel::dma_read!(self.ca[i].h); + let val1 = kernel::dma_read!(self.ca[i].b); + assert!(val0.is_ok()); + assert!(val1.is_ok()); + + if let Ok(val0) = val0 { + assert_eq!(val0, value.0); } - Ok(()) - }(); + if let Ok(val1) = val1 { + assert_eq!(val1, value.1); + } + } } } From d37a39f607c4a479d59781639b9c2a05448568a3 Mon Sep 17 00:00:00 2001 From: Abdiel Janulgue Date: Mon, 2 Jun 2025 11:53:13 +0300 Subject: [PATCH 007/358] rust: dma: add as_slice/write functions for CoherentAllocation Add unsafe accessors for the region for reading or writing large blocks of data. Reviewed-by: Andreas Hindborg Signed-off-by: Abdiel Janulgue Reviewed-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250602085444.1925053-4-abdiel.janulgue@gmail.com [ Fix line length and slightly reword safety comment in doc-test of CoherentAllocation::write(); fix formatting issue. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/dma.rs | 87 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 38356f61c90b..3446a6d46878 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -218,6 +218,93 @@ pub fn dma_handle(&self) -> bindings::dma_addr_t { self.dma_handle } + /// Common helper to validate a range applied from the allocated region in the CPU's virtual + /// address space. + fn validate_range(&self, offset: usize, count: usize) -> Result { + if offset.checked_add(count).ok_or(EOVERFLOW)? > self.count { + return Err(EINVAL); + } + Ok(()) + } + + /// Returns the data from the region starting from `offset` as a slice. + /// `offset` and `count` are in units of `T`, not the number of bytes. + /// + /// For ringbuffer type of r/w access or use-cases where the pointer to the live data is needed, + /// [`CoherentAllocation::start_ptr`] or [`CoherentAllocation::start_ptr_mut`] could be used + /// instead. + /// + /// # Safety + /// + /// * Callers must ensure that the device does not read/write to/from memory while the returned + /// slice is live. + /// * Callers must ensure that this call does not race with a write to the same region while + /// the returned slice is live. + pub unsafe fn as_slice(&self, offset: usize, count: usize) -> Result<&[T]> { + self.validate_range(offset, count)?; + // SAFETY: + // - The pointer is valid due to type invariant on `CoherentAllocation`, + // we've just checked that the range and index is within bounds. The immutability of the + // data is also guaranteed by the safety requirements of the function. + // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked + // that `self.count` won't overflow early in the constructor. + Ok(unsafe { core::slice::from_raw_parts(self.cpu_addr.add(offset), count) }) + } + + /// Performs the same functionality as [`CoherentAllocation::as_slice`], except that a mutable + /// slice is returned. + /// + /// # Safety + /// + /// * Callers must ensure that the device does not read/write to/from memory while the returned + /// slice is live. + /// * Callers must ensure that this call does not race with a read or write to the same region + /// while the returned slice is live. + pub unsafe fn as_slice_mut(&self, offset: usize, count: usize) -> Result<&mut [T]> { + self.validate_range(offset, count)?; + // SAFETY: + // - The pointer is valid due to type invariant on `CoherentAllocation`, + // we've just checked that the range and index is within bounds. The immutability of the + // data is also guaranteed by the safety requirements of the function. + // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked + // that `self.count` won't overflow early in the constructor. + Ok(unsafe { core::slice::from_raw_parts_mut(self.cpu_addr.add(offset), count) }) + } + + /// Writes data to the region starting from `offset`. `offset` is in units of `T`, not the + /// number of bytes. + /// + /// # Safety + /// + /// * Callers must ensure that the device does not read/write to/from memory while the returned + /// slice is live. + /// * Callers must ensure that this call does not race with a read or write to the same region + /// that overlaps with this write. + /// + /// # Examples + /// + /// ``` + /// # fn test(alloc: &mut kernel::dma::CoherentAllocation) -> Result { + /// let somedata: [u8; 4] = [0xf; 4]; + /// let buf: &[u8] = &somedata; + /// // SAFETY: There is no concurrent HW operation on the device and no other R/W access to the + /// // region. + /// unsafe { alloc.write(buf, 0)?; } + /// # Ok::<(), Error>(()) } + /// ``` + pub unsafe fn write(&self, src: &[T], offset: usize) -> Result { + self.validate_range(offset, src.len())?; + // SAFETY: + // - The pointer is valid due to type invariant on `CoherentAllocation` + // and we've just checked that the range and index is within bounds. + // - `offset + count` can't overflow since it is smaller than `self.count` and we've checked + // that `self.count` won't overflow early in the constructor. + unsafe { + core::ptr::copy_nonoverlapping(src.as_ptr(), self.cpu_addr.add(offset), src.len()) + }; + Ok(()) + } + /// Returns a pointer to an element from the region with bounds checking. `offset` is in /// units of `T`, not the number of bytes. /// From 14371e58cb2705b9ddc00efc3b94fb32612a753e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:45 +0900 Subject: [PATCH 008/358] rust: dma: fix doc-comment of dma_handle() A word was apparently missing in this sentence, hence fix it. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-1-ecf41ef99252@nvidia.com Fixes: ad2907b4e308 ("rust: add dma coherent allocator abstraction") [ Slightly expand commit subject and add 'Fixes:' tag. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/dma.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 3446a6d46878..038e68c5a293 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -212,7 +212,7 @@ pub fn start_ptr_mut(&mut self) -> *mut T { self.cpu_addr } - /// Returns a DMA handle which may given to the device as the DMA address base of + /// Returns a DMA handle which may be given to the device as the DMA address base of /// the region. pub fn dma_handle(&self) -> bindings::dma_addr_t { self.dma_handle From c0a3065d5def59a894afc8cf5e988396cd0c2f5e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:46 +0900 Subject: [PATCH 009/358] rust: dma: expose the count and size of CoherentAllocation These properties are very useful to have (and to be used by nova-core) and should be accessible, hence add them. Additionally, add type invariants for the size of an allocation. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-2-ecf41ef99252@nvidia.com [ Slightly extend the commit message. - Danilo ] Signed-off-by: Danilo Krummrich --- rust/kernel/dma.rs | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 038e68c5a293..34a406a0602e 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -114,9 +114,11 @@ pub mod attrs { /// /// # Invariants /// -/// For the lifetime of an instance of [`CoherentAllocation`], the `cpu_addr` is a valid pointer -/// to an allocated region of coherent memory and `dma_handle` is the DMA address base of -/// the region. +/// - For the lifetime of an instance of [`CoherentAllocation`], the `cpu_addr` is a valid pointer +/// to an allocated region of coherent memory and `dma_handle` is the DMA address base of the +/// region. +/// - The size in bytes of the allocation is equal to `size_of:: * count`. +/// - `size_of:: * count` fits into a `usize`. // TODO // // DMA allocations potentially carry device resources (e.g.IOMMU mappings), hence for soundness @@ -179,9 +181,12 @@ pub fn alloc_attrs( if ret.is_null() { return Err(ENOMEM); } - // INVARIANT: We just successfully allocated a coherent region which is accessible for - // `count` elements, hence the cpu address is valid. We also hold a refcounted reference - // to the device. + // INVARIANT: + // - We just successfully allocated a coherent region which is accessible for + // `count` elements, hence the cpu address is valid. We also hold a refcounted reference + // to the device. + // - The allocated `size` is equal to `size_of:: * count`. + // - The allocated `size` fits into a `usize`. Ok(Self { dev: dev.into(), dma_handle, @@ -201,6 +206,21 @@ pub fn alloc_coherent( CoherentAllocation::alloc_attrs(dev, count, gfp_flags, Attrs(0)) } + /// Returns the number of elements `T` in this allocation. + /// + /// Note that this is not the size of the allocation in bytes, which is provided by + /// [`Self::size`]. + pub fn count(&self) -> usize { + self.count + } + + /// Returns the size in bytes of this allocation. + pub fn size(&self) -> usize { + // INVARIANT: The type invariant of `Self` guarantees that `size_of:: * count` fits into + // a `usize`. + self.count * core::mem::size_of::() + } + /// Returns the base address to the allocated region in the CPU's virtual address space. pub fn start_ptr(&self) -> *const T { self.cpu_addr From 26af856539842a4107c821e133fdda896022737f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:47 +0900 Subject: [PATCH 010/358] rust: dma: add dma_handle_with_offset method to CoherentAllocation Sometimes one may want to obtain a DMA handle starting at a given offset. This can be done by adding said offset to the result of `dma_handle()`, but doing so on the client side carries the risk that the operation will go outside the bounds of the allocation. Thus, add a `dma_handle_with_offset` method that adds the desired offset after checking that it is still valid. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-3-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/dma.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/rust/kernel/dma.rs b/rust/kernel/dma.rs index 34a406a0602e..25dfa0e6cc3c 100644 --- a/rust/kernel/dma.rs +++ b/rust/kernel/dma.rs @@ -238,6 +238,20 @@ pub fn dma_handle(&self) -> bindings::dma_addr_t { self.dma_handle } + /// Returns a DMA handle starting at `offset` (in units of `T`) which may be given to the + /// device as the DMA address base of the region. + /// + /// Returns `EINVAL` if `offset` is not within the bounds of the allocation. + pub fn dma_handle_with_offset(&self, offset: usize) -> Result { + if offset >= self.count { + Err(EINVAL) + } else { + // INVARIANT: The type invariant of `Self` guarantees that `size_of:: * count` fits + // into a `usize`, and `offset` is inferior to `count`. + Ok(self.dma_handle + (offset * core::mem::size_of::()) as bindings::dma_addr_t) + } + } + /// Common helper to validate a range applied from the allocated region in the CPU's virtual /// address space. fn validate_range(&self, offset: usize, count: usize) -> Result { From 69ba00fed95769e904e4000ad92ec586e5f49ad6 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:48 +0900 Subject: [PATCH 011/358] rust: make ETIMEDOUT error available We will use this error in the nova-core driver. Reviewed-by: Benno Lossin Signed-off-by: Alexandre Courbot Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250619-nova-frts-v6-4-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/error.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 3dee3139fcd4..083c7b068cf4 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -65,6 +65,7 @@ macro_rules! declare_err { declare_err!(EDOM, "Math argument out of domain of func."); declare_err!(ERANGE, "Math result not representable."); declare_err!(EOVERFLOW, "Value too large for defined data type."); + declare_err!(ETIMEDOUT, "Connection timed out."); declare_err!(ERESTARTSYS, "Restart the system call."); declare_err!(ERESTARTNOINTR, "System call was interrupted by a signal and will be restarted."); declare_err!(ERESTARTNOHAND, "Restart if no handler."); From 06a93197e29d45596d0eba40e7e7b2e6848d31ff Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:49 +0900 Subject: [PATCH 012/358] rust: sizes: add constants up to SZ_2G nova-core will need to use SZ_1M, so make the remaining constants available. Reviewed-by: Boqun Feng Signed-off-by: Alexandre Courbot Acked-by: Miguel Ojeda Link: https://lore.kernel.org/r/20250619-nova-frts-v6-5-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- rust/kernel/sizes.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/rust/kernel/sizes.rs b/rust/kernel/sizes.rs index 834c343e4170..661e680d9330 100644 --- a/rust/kernel/sizes.rs +++ b/rust/kernel/sizes.rs @@ -24,3 +24,27 @@ pub const SZ_256K: usize = bindings::SZ_256K as usize; /// 0x00080000 pub const SZ_512K: usize = bindings::SZ_512K as usize; +/// 0x00100000 +pub const SZ_1M: usize = bindings::SZ_1M as usize; +/// 0x00200000 +pub const SZ_2M: usize = bindings::SZ_2M as usize; +/// 0x00400000 +pub const SZ_4M: usize = bindings::SZ_4M as usize; +/// 0x00800000 +pub const SZ_8M: usize = bindings::SZ_8M as usize; +/// 0x01000000 +pub const SZ_16M: usize = bindings::SZ_16M as usize; +/// 0x02000000 +pub const SZ_32M: usize = bindings::SZ_32M as usize; +/// 0x04000000 +pub const SZ_64M: usize = bindings::SZ_64M as usize; +/// 0x08000000 +pub const SZ_128M: usize = bindings::SZ_128M as usize; +/// 0x10000000 +pub const SZ_256M: usize = bindings::SZ_256M as usize; +/// 0x20000000 +pub const SZ_512M: usize = bindings::SZ_512M as usize; +/// 0x40000000 +pub const SZ_1G: usize = bindings::SZ_1G as usize; +/// 0x80000000 +pub const SZ_2G: usize = bindings::SZ_2G as usize; From 8d5fbb8d027d67b015d5fc8e2ca9ef131959dd12 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:50 +0900 Subject: [PATCH 013/358] gpu: nova-core: use absolute paths in register!() macro Fix the paths that were not absolute to prevent a potential local module from being picked up. Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-6-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/regs/macros.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 7ecc70efb3cd..40bf9346cd06 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -114,7 +114,7 @@ fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { } } - impl core::ops::BitOr for $name { + impl ::core::ops::BitOr for $name { type Output = Self; fn bitor(self, rhs: Self) -> Self::Output { @@ -161,7 +161,7 @@ impl $name { (@check_field_bounds $hi:tt:$lo:tt $field:ident as bool) => { #[allow(clippy::eq_op)] const _: () = { - kernel::build_assert!( + ::kernel::build_assert!( $hi == $lo, concat!("boolean field `", stringify!($field), "` covers more than one bit") ); @@ -172,7 +172,7 @@ impl $name { (@check_field_bounds $hi:tt:$lo:tt $field:ident as $type:tt) => { #[allow(clippy::eq_op)] const _: () = { - kernel::build_assert!( + ::kernel::build_assert!( $hi >= $lo, concat!("field `", stringify!($field), "`'s MSB is smaller than its LSB") ); @@ -234,7 +234,7 @@ impl $name { @leaf_accessor $name:ident $hi:tt:$lo:tt $field:ident as $type:ty { $process:expr } $to_type:ty => $res_type:ty $(, $comment:literal)?; ) => { - kernel::macros::paste!( + ::kernel::macros::paste!( const [<$field:upper>]: ::core::ops::RangeInclusive = $lo..=$hi; const [<$field:upper _MASK>]: u32 = ((((1 << $hi) - 1) << 1) + 1) - ((1 << $lo) - 1); const [<$field:upper _SHIFT>]: u32 = Self::[<$field:upper _MASK>].trailing_zeros(); @@ -246,7 +246,7 @@ impl $name { )? #[inline] pub(crate) fn $field(self) -> $res_type { - kernel::macros::paste!( + ::kernel::macros::paste!( const MASK: u32 = $name::[<$field:upper _MASK>]; const SHIFT: u32 = $name::[<$field:upper _SHIFT>]; ); @@ -255,7 +255,7 @@ pub(crate) fn $field(self) -> $res_type { $process(field) } - kernel::macros::paste!( + ::kernel::macros::paste!( $( #[doc="Sets the value of this field:"] #[doc=$comment] From 8dd1433d905051f2d2e5ad8864da2de915a2f11f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:51 +0900 Subject: [PATCH 014/358] gpu: nova-core: add delimiter for helper rules in register!() macro This macro is pretty complex, and most rules are just helper, so add a delimiter to indicate when users only interested in using it can stop reading. Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-7-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/regs/macros.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 40bf9346cd06..d7f09026390b 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -94,6 +94,8 @@ macro_rules! register { register!(@io$name @ + $offset); }; + // All rules below are helpers. + // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, // and conversion to regular `u32`). (@common $name:ident $(, $comment:literal)?) => { From cdfe233ee6cde6b82f0527fe7e0e599d4118b336 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:52 +0900 Subject: [PATCH 015/358] gpu: nova-core: expose the offset of each register as a type constant Although we want to access registers using the provided methods, it is sometimes needed to use their raw offset, for instance when working with a register array. Expose the offset of each register using a type constant to avoid resorting to hardcoded values. Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-8-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/regs/macros.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index d7f09026390b..7cd013f3c90b 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -78,7 +78,7 @@ macro_rules! register { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); + register!(@common $name @ $offset $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io $name @ $offset); }; @@ -89,7 +89,7 @@ macro_rules! register { $($fields:tt)* } ) => { - register!(@common $name $(, $comment)?); + register!(@common $name @ $offset $(, $comment)?); register!(@field_accessors $name { $($fields)* }); register!(@io$name @ + $offset); }; @@ -98,7 +98,7 @@ macro_rules! register { // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, // and conversion to regular `u32`). - (@common $name:ident $(, $comment:literal)?) => { + (@common $name:ident @ $offset:literal $(, $comment:literal)?) => { $( #[doc=$comment] )? @@ -106,6 +106,11 @@ macro_rules! register { #[derive(Clone, Copy, Default)] pub(crate) struct $name(u32); + #[allow(dead_code)] + impl $name { + pub(crate) const OFFSET: usize = $offset; + } + // TODO: display the raw hex value, then the value of all the fields. This requires // matching the fields, which will complexify the syntax considerably... impl ::core::fmt::Debug for $name { From e66aaaffe017bffedc98abfcc12afed6214173bc Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:53 +0900 Subject: [PATCH 016/358] gpu: nova-core: allow register aliases Some registers (notably scratch registers) don't have a definitive purpose, but need to be interpreted differently depending on context. Expand the register!() macro to support a syntax indicating that a register type should be at the same offset as another one, but under a different name, and with different fields and documentation. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-9-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/regs/macros.rs | 40 ++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index 7cd013f3c90b..e0e6fef3796f 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -71,6 +71,20 @@ /// pr_info!("CPU CTL: {:#x}", cpuctl); /// cpuctl.set_start(true).write(&bar, CPU_BASE); /// ``` +/// +/// It is also possible to create a alias register by using the `=> ALIAS` syntax. This is useful +/// for cases where a register's interpretation depends on the context: +/// +/// ```no_run +/// register!(SCRATCH_0 @ 0x0000100, "Scratch register 0" { +/// 31:0 value as u32, "Raw value"; +/// +/// register!(SCRATCH_0_BOOT_STATUS => SCRATCH_0, "Boot status of the firmware" { +/// 0:0 completed as bool, "Whether the firmware has completed booting"; +/// ``` +/// +/// In this example, `SCRATCH_0_BOOT_STATUS` uses the same I/O address as `SCRATCH_0`, while also +/// providing its own `completed` method. macro_rules! register { // Creates a register at a fixed offset of the MMIO space. ( @@ -83,6 +97,17 @@ macro_rules! register { register!(@io $name @ $offset); }; + // Creates a alias register of fixed offset register `alias` with its own fields. + ( + $name:ident => $alias:ident $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + register!(@common $name @ $alias::OFFSET $(, $comment)?); + register!(@field_accessors $name { $($fields)* }); + register!(@io $name @ $alias::OFFSET); + }; + // Creates a register at a relative offset from a base address. ( $name:ident @ + $offset:literal $(, $comment:literal)? { @@ -94,11 +119,22 @@ macro_rules! register { register!(@io$name @ + $offset); }; + // Creates a alias register of relative offset register `alias` with its own fields. + ( + $name:ident => + $alias:ident $(, $comment:literal)? { + $($fields:tt)* + } + ) => { + register!(@common $name @ $alias::OFFSET $(, $comment)?); + register!(@field_accessors $name { $($fields)* }); + register!(@io $name @ + $alias::OFFSET); + }; + // All rules below are helpers. // Defines the wrapper `$name` type, as well as its relevant implementations (`Debug`, `BitOr`, // and conversion to regular `u32`). - (@common $name:ident @ $offset:literal $(, $comment:literal)?) => { + (@common $name:ident @ $offset:expr $(, $comment:literal)?) => { $( #[doc=$comment] )? @@ -280,7 +316,7 @@ pub(crate) fn [](mut self, value: $to_type) -> Self { }; // Creates the IO accessors for a fixed offset register. - (@io $name:ident @ $offset:literal) => { + (@io $name:ident @ $offset:expr) => { #[allow(dead_code)] impl $name { #[inline] From 94a08721435cc9a8de9f9ce83e9fe13d38d24771 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:54 +0900 Subject: [PATCH 017/358] gpu: nova-core: increase BAR0 size to 16MB The Turing+ register address space spans over that range, so increase it as future patches will access more registers. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-10-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/driver.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index 8c86101c26cb..ffe25c7a2fda 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -use kernel::{auxiliary, bindings, c_str, device::Core, pci, prelude::*}; +use kernel::{auxiliary, bindings, c_str, device::Core, pci, prelude::*, sizes::SZ_16M}; use crate::gpu::Gpu; @@ -11,7 +11,7 @@ pub(crate) struct NovaCore { _reg: auxiliary::Registration, } -const BAR0_SIZE: usize = 8; +const BAR0_SIZE: usize = SZ_16M; pub(crate) type Bar0 = pci::Bar; kernel::pci_device_table!( From a03c9bd953c2482aec8013c9c857b4d53031b54d Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:55 +0900 Subject: [PATCH 018/358] gpu: nova-core: add helper function to wait on condition While programming the hardware, we frequently need to busy-wait until a condition (like a given bit of a register to switch value) happens. Add a basic `wait_on` helper function to wait on such conditions expressed as a closure, with a timeout argument. This is temporary as we will switch to `read_poll_timeout` [1] once it is available. Link: https://lore.kernel.org/lkml/20250220070611.214262-8-fujita.tomonori@gmail.com/ [1] Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-11-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/util.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index 332a64cfc6a9..c50bfa5ab7fe 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -1,5 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 +use core::time::Duration; + +use kernel::prelude::*; +use kernel::time::Instant; + pub(crate) const fn to_lowercase_bytes(s: &str) -> [u8; N] { let src = s.as_bytes(); let mut dst = [0; N]; @@ -19,3 +24,27 @@ pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { Err(_) => kernel::build_error!("Bytes are not valid UTF-8."), } } + +/// Wait until `cond` is true or `timeout` elapsed. +/// +/// When `cond` evaluates to `Some`, its return value is returned. +/// +/// `Err(ETIMEDOUT)` is returned if `timeout` has been reached without `cond` evaluating to +/// `Some`. +/// +/// TODO: replace with `read_poll_timeout` once it is available. +/// (https://lore.kernel.org/lkml/20250220070611.214262-8-fujita.tomonori@gmail.com/) +#[expect(dead_code)] +pub(crate) fn wait_on Option>(timeout: Duration, cond: F) -> Result { + let start_time = Instant::now(); + + loop { + if let Some(ret) = cond() { + return Ok(ret); + } + + if start_time.elapsed().as_nanos() > timeout.as_nanos() as i64 { + return Err(ETIMEDOUT); + } + } +} From ceb5ab3cb64637952657be23d347e1c79dd02212 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 17 Jun 2025 17:51:51 +0300 Subject: [PATCH 019/358] mtd: add driver for intel graphics non-volatile memory device Add auxiliary driver for intel discrete graphics non-volatile memory device. CC: Lucas De Marchi Reviewed-by: Raag Jadav Reviewed-by: Rodrigo Vivi Acked-by: Miquel Raynal Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250617145159.3803852-2-alexander.usyskin@intel.com Signed-off-by: Rodrigo Vivi --- MAINTAINERS | 7 ++ drivers/mtd/devices/Kconfig | 11 +++ drivers/mtd/devices/Makefile | 1 + drivers/mtd/devices/mtd_intel_dg.c | 134 +++++++++++++++++++++++++++++ include/linux/intel_dg_nvm_aux.h | 30 +++++++ 5 files changed, 183 insertions(+) create mode 100644 drivers/mtd/devices/mtd_intel_dg.c create mode 100644 include/linux/intel_dg_nvm_aux.h diff --git a/MAINTAINERS b/MAINTAINERS index 7e7515a412e9..521730530243 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12086,6 +12086,13 @@ L: linux-kernel@vger.kernel.org S: Supported F: arch/x86/include/asm/intel-family.h +INTEL DISCRETE GRAPHICS NVM MTD DRIVER +M: Alexander Usyskin +L: linux-mtd@lists.infradead.org +S: Supported +F: drivers/mtd/devices/mtd_intel_dg.c +F: include/linux/intel_dg_nvm_aux.h + INTEL DRM DISPLAY FOR XE AND I915 DRIVERS M: Jani Nikula M: Rodrigo Vivi diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index aed653ce8fa2..46cebde79f34 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -183,6 +183,17 @@ config MTD_POWERNV_FLASH platforms from Linux. This device abstracts away the firmware interface for flash access. +config MTD_INTEL_DG + tristate "Intel Discrete Graphics non-volatile memory driver" + depends on AUXILIARY_BUS + depends on MTD + help + This provides an MTD device to access Intel Discrete Graphics + non-volatile memory. + + To compile this driver as a module, choose M here: the module + will be called mtd-intel-dg. + comment "Disk-On-Chip Device Drivers" config MTD_DOCG3 diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile index d11eb2b8b6f8..9fe4ce9cffde 100644 --- a/drivers/mtd/devices/Makefile +++ b/drivers/mtd/devices/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_MTD_SST25L) += sst25l.o obj-$(CONFIG_MTD_BCM47XXSFLASH) += bcm47xxsflash.o obj-$(CONFIG_MTD_ST_SPI_FSM) += st_spi_fsm.o obj-$(CONFIG_MTD_POWERNV_FLASH) += powernv_flash.o +obj-$(CONFIG_MTD_INTEL_DG) += mtd_intel_dg.o CFLAGS_docg3.o += -I$(src) diff --git a/drivers/mtd/devices/mtd_intel_dg.c b/drivers/mtd/devices/mtd_intel_dg.c new file mode 100644 index 000000000000..c2d0dbfb0378 --- /dev/null +++ b/drivers/mtd/devices/mtd_intel_dg.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct intel_dg_nvm { + struct kref refcnt; + void __iomem *base; + size_t size; + unsigned int nregions; + struct { + const char *name; + u8 id; + u64 offset; + u64 size; + } regions[] __counted_by(nregions); +}; + +static void intel_dg_nvm_release(struct kref *kref) +{ + struct intel_dg_nvm *nvm = container_of(kref, struct intel_dg_nvm, refcnt); + int i; + + pr_debug("freeing intel_dg nvm\n"); + for (i = 0; i < nvm->nregions; i++) + kfree(nvm->regions[i].name); + kfree(nvm); +} + +static int intel_dg_mtd_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *aux_dev_id) +{ + struct intel_dg_nvm_dev *invm = auxiliary_dev_to_intel_dg_nvm_dev(aux_dev); + struct intel_dg_nvm *nvm; + struct device *device; + unsigned int nregions; + unsigned int i, n; + int ret; + + device = &aux_dev->dev; + + /* count available regions */ + for (nregions = 0, i = 0; i < INTEL_DG_NVM_REGIONS; i++) { + if (invm->regions[i].name) + nregions++; + } + + if (!nregions) { + dev_err(device, "no regions defined\n"); + return -ENODEV; + } + + nvm = kzalloc(struct_size(nvm, regions, nregions), GFP_KERNEL); + if (!nvm) + return -ENOMEM; + + kref_init(&nvm->refcnt); + + for (n = 0, i = 0; i < INTEL_DG_NVM_REGIONS; i++) { + if (!invm->regions[i].name) + continue; + + char *name = kasprintf(GFP_KERNEL, "%s.%s", + dev_name(&aux_dev->dev), invm->regions[i].name); + if (!name) + continue; + nvm->regions[n].name = name; + nvm->regions[n].id = i; + n++; + } + nvm->nregions = n; /* in case where kasprintf fail */ + + nvm->base = devm_ioremap_resource(device, &invm->bar); + if (IS_ERR(nvm->base)) { + ret = PTR_ERR(nvm->base); + goto err; + } + + dev_set_drvdata(&aux_dev->dev, nvm); + + return 0; + +err: + kref_put(&nvm->refcnt, intel_dg_nvm_release); + return ret; +} + +static void intel_dg_mtd_remove(struct auxiliary_device *aux_dev) +{ + struct intel_dg_nvm *nvm = dev_get_drvdata(&aux_dev->dev); + + if (!nvm) + return; + + dev_set_drvdata(&aux_dev->dev, NULL); + + kref_put(&nvm->refcnt, intel_dg_nvm_release); +} + +static const struct auxiliary_device_id intel_dg_mtd_id_table[] = { + { + .name = "i915.nvm", + }, + { + .name = "xe.nvm", + }, + { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(auxiliary, intel_dg_mtd_id_table); + +static struct auxiliary_driver intel_dg_mtd_driver = { + .probe = intel_dg_mtd_probe, + .remove = intel_dg_mtd_remove, + .driver = { + /* auxiliary_driver_register() sets .name to be the modname */ + }, + .id_table = intel_dg_mtd_id_table +}; +module_auxiliary_driver(intel_dg_mtd_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Intel DGFX MTD driver"); diff --git a/include/linux/intel_dg_nvm_aux.h b/include/linux/intel_dg_nvm_aux.h new file mode 100644 index 000000000000..00b6c1301bd8 --- /dev/null +++ b/include/linux/intel_dg_nvm_aux.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. + */ + +#ifndef __INTEL_DG_NVM_AUX_H__ +#define __INTEL_DG_NVM_AUX_H__ + +#include +#include +#include +#include + +#define INTEL_DG_NVM_REGIONS 13 + +struct intel_dg_nvm_region { + const char *name; +}; + +struct intel_dg_nvm_dev { + struct auxiliary_device aux_dev; + bool writable_override; + struct resource bar; + const struct intel_dg_nvm_region *regions; +}; + +#define auxiliary_dev_to_intel_dg_nvm_dev(auxiliary_dev) \ + container_of(auxiliary_dev, struct intel_dg_nvm_dev, aux_dev) + +#endif /* __INTEL_DG_NVM_AUX_H__ */ From 7234b321dd991926c4598a3eab8df0ab839cb3a6 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 17 Jun 2025 17:51:52 +0300 Subject: [PATCH 020/358] mtd: intel-dg: implement region enumeration In intel-dg, there is no access to the spi controller, the information is extracted from the descriptor region. CC: Lucas De Marchi Reviewed-by: Raag Jadav Reviewed-by: Rodrigo Vivi Acked-by: Miquel Raynal Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250617145159.3803852-3-alexander.usyskin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/mtd/devices/mtd_intel_dg.c | 206 +++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) diff --git a/drivers/mtd/devices/mtd_intel_dg.c b/drivers/mtd/devices/mtd_intel_dg.c index c2d0dbfb0378..478dd2b158e3 100644 --- a/drivers/mtd/devices/mtd_intel_dg.c +++ b/drivers/mtd/devices/mtd_intel_dg.c @@ -3,6 +3,8 @@ * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. */ +#include +#include #include #include #include @@ -22,9 +24,207 @@ struct intel_dg_nvm { u8 id; u64 offset; u64 size; + unsigned int is_readable:1; + unsigned int is_writable:1; } regions[] __counted_by(nregions); }; +#define NVM_TRIGGER_REG 0x00000000 +#define NVM_VALSIG_REG 0x00000010 +#define NVM_ADDRESS_REG 0x00000040 +#define NVM_REGION_ID_REG 0x00000044 +/* + * [15:0]-Erase size = 0x0010 4K 0x0080 32K 0x0100 64K + * [23:16]-Reserved + * [31:24]-Erase MEM RegionID + */ +#define NVM_ERASE_REG 0x00000048 +#define NVM_ACCESS_ERROR_REG 0x00000070 +#define NVM_ADDRESS_ERROR_REG 0x00000074 + +/* Flash Valid Signature */ +#define NVM_FLVALSIG 0x0FF0A55A + +#define NVM_MAP_ADDR_MASK GENMASK(7, 0) +#define NVM_MAP_ADDR_SHIFT 0x00000004 + +#define NVM_REGION_ID_DESCRIPTOR 0 +/* Flash Region Base Address */ +#define NVM_FRBA 0x40 +/* Flash Region __n - Flash Descriptor Record */ +#define NVM_FLREG(__n) (NVM_FRBA + ((__n) * 4)) +/* Flash Map 1 Register */ +#define NVM_FLMAP1_REG 0x18 +#define NVM_FLMSTR4_OFFSET 0x00C + +#define NVM_ACCESS_ERROR_PCIE_MASK 0x7 + +#define NVM_FREG_BASE_MASK GENMASK(15, 0) +#define NVM_FREG_ADDR_MASK GENMASK(31, 16) +#define NVM_FREG_ADDR_SHIFT 12 +#define NVM_FREG_MIN_REGION_SIZE 0xFFF + +static inline void idg_nvm_set_region_id(struct intel_dg_nvm *nvm, u8 region) +{ + iowrite32((u32)region, nvm->base + NVM_REGION_ID_REG); +} + +static inline u32 idg_nvm_error(struct intel_dg_nvm *nvm) +{ + void __iomem *base = nvm->base; + + u32 reg = ioread32(base + NVM_ACCESS_ERROR_REG) & NVM_ACCESS_ERROR_PCIE_MASK; + + /* reset error bits */ + if (reg) + iowrite32(reg, base + NVM_ACCESS_ERROR_REG); + + return reg; +} + +static inline u32 idg_nvm_read32(struct intel_dg_nvm *nvm, u32 address) +{ + void __iomem *base = nvm->base; + + iowrite32(address, base + NVM_ADDRESS_REG); + + return ioread32(base + NVM_TRIGGER_REG); +} + +static int idg_nvm_get_access_map(struct intel_dg_nvm *nvm, u32 *access_map) +{ + u32 fmstr4_addr; + u32 fmstr4; + u32 flmap1; + u32 fmba; + + idg_nvm_set_region_id(nvm, NVM_REGION_ID_DESCRIPTOR); + + flmap1 = idg_nvm_read32(nvm, NVM_FLMAP1_REG); + if (idg_nvm_error(nvm)) + return -EIO; + /* Get Flash Master Baser Address (FMBA) */ + fmba = (FIELD_GET(NVM_MAP_ADDR_MASK, flmap1) << NVM_MAP_ADDR_SHIFT); + fmstr4_addr = fmba + NVM_FLMSTR4_OFFSET; + + fmstr4 = idg_nvm_read32(nvm, fmstr4_addr); + if (idg_nvm_error(nvm)) + return -EIO; + + *access_map = fmstr4; + return 0; +} + +/* + * Region read/write access encoded in the access map + * in the following order from the lower bit: + * [3:0] regions 12-15 read state + * [7:4] regions 12-15 write state + * [19:8] regions 0-11 read state + * [31:20] regions 0-11 write state + */ +static bool idg_nvm_region_readable(u32 access_map, u8 region) +{ + if (region < 12) + return access_map & BIT(region + 8); /* [19:8] */ + else + return access_map & BIT(region - 12); /* [3:0] */ +} + +static bool idg_nvm_region_writable(u32 access_map, u8 region) +{ + if (region < 12) + return access_map & BIT(region + 20); /* [31:20] */ + else + return access_map & BIT(region - 8); /* [7:4] */ +} + +static int idg_nvm_is_valid(struct intel_dg_nvm *nvm) +{ + u32 is_valid; + + idg_nvm_set_region_id(nvm, NVM_REGION_ID_DESCRIPTOR); + + is_valid = idg_nvm_read32(nvm, NVM_VALSIG_REG); + if (idg_nvm_error(nvm)) + return -EIO; + + if (is_valid != NVM_FLVALSIG) + return -ENODEV; + + return 0; +} + +static int intel_dg_nvm_init(struct intel_dg_nvm *nvm, struct device *device) +{ + u32 access_map = 0; + unsigned int i, n; + int ret; + + /* clean error register, previous errors are ignored */ + idg_nvm_error(nvm); + + ret = idg_nvm_is_valid(nvm); + if (ret) { + dev_err(device, "The MEM is not valid %d\n", ret); + return ret; + } + + if (idg_nvm_get_access_map(nvm, &access_map)) + return -EIO; + + for (i = 0, n = 0; i < nvm->nregions; i++) { + u32 address, base, limit, region; + u8 id = nvm->regions[i].id; + + address = NVM_FLREG(id); + region = idg_nvm_read32(nvm, address); + + base = FIELD_GET(NVM_FREG_BASE_MASK, region) << NVM_FREG_ADDR_SHIFT; + limit = (FIELD_GET(NVM_FREG_ADDR_MASK, region) << NVM_FREG_ADDR_SHIFT) | + NVM_FREG_MIN_REGION_SIZE; + + dev_dbg(device, "[%d] %s: region: 0x%08X base: 0x%08x limit: 0x%08x\n", + id, nvm->regions[i].name, region, base, limit); + + if (base >= limit || (i > 0 && limit == 0)) { + dev_dbg(device, "[%d] %s: disabled\n", + id, nvm->regions[i].name); + nvm->regions[i].is_readable = 0; + continue; + } + + if (nvm->size < limit) + nvm->size = limit; + + nvm->regions[i].offset = base; + nvm->regions[i].size = limit - base + 1; + /* No write access to descriptor; mask it out*/ + nvm->regions[i].is_writable = idg_nvm_region_writable(access_map, id); + + nvm->regions[i].is_readable = idg_nvm_region_readable(access_map, id); + dev_dbg(device, "Registered, %s id=%d offset=%lld size=%lld rd=%d wr=%d\n", + nvm->regions[i].name, + nvm->regions[i].id, + nvm->regions[i].offset, + nvm->regions[i].size, + nvm->regions[i].is_readable, + nvm->regions[i].is_writable); + + if (nvm->regions[i].is_readable) + n++; + } + + dev_dbg(device, "Registered %d regions\n", n); + + /* Need to add 1 to the amount of memory + * so it is reported as an even block + */ + nvm->size += 1; + + return n; +} + static void intel_dg_nvm_release(struct kref *kref) { struct intel_dg_nvm *nvm = container_of(kref, struct intel_dg_nvm, refcnt); @@ -85,6 +285,12 @@ static int intel_dg_mtd_probe(struct auxiliary_device *aux_dev, goto err; } + ret = intel_dg_nvm_init(nvm, device); + if (ret < 0) { + dev_err(device, "cannot initialize nvm %d\n", ret); + goto err; + } + dev_set_drvdata(&aux_dev->dev, nvm); return 0; From 9fe53abfac0b158e2db7c516264bf11b64489384 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 17 Jun 2025 17:51:53 +0300 Subject: [PATCH 021/358] mtd: intel-dg: implement access functions Implement read(), erase() and write() functions. CC: Lucas De Marchi CC: Rodrigo Vivi Reviewed-by: Raag Jadav Acked-by: Miquel Raynal Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Co-developed-by: Vitaly Lubart Signed-off-by: Vitaly Lubart Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250617145159.3803852-4-alexander.usyskin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/mtd/devices/mtd_intel_dg.c | 197 +++++++++++++++++++++++++++++ 1 file changed, 197 insertions(+) diff --git a/drivers/mtd/devices/mtd_intel_dg.c b/drivers/mtd/devices/mtd_intel_dg.c index 478dd2b158e3..4d6e2c009b45 100644 --- a/drivers/mtd/devices/mtd_intel_dg.c +++ b/drivers/mtd/devices/mtd_intel_dg.c @@ -5,13 +5,16 @@ #include #include +#include #include #include #include +#include #include #include #include #include +#include #include struct intel_dg_nvm { @@ -91,6 +94,33 @@ static inline u32 idg_nvm_read32(struct intel_dg_nvm *nvm, u32 address) return ioread32(base + NVM_TRIGGER_REG); } +static inline u64 idg_nvm_read64(struct intel_dg_nvm *nvm, u32 address) +{ + void __iomem *base = nvm->base; + + iowrite32(address, base + NVM_ADDRESS_REG); + + return readq(base + NVM_TRIGGER_REG); +} + +static void idg_nvm_write32(struct intel_dg_nvm *nvm, u32 address, u32 data) +{ + void __iomem *base = nvm->base; + + iowrite32(address, base + NVM_ADDRESS_REG); + + iowrite32(data, base + NVM_TRIGGER_REG); +} + +static void idg_nvm_write64(struct intel_dg_nvm *nvm, u32 address, u64 data) +{ + void __iomem *base = nvm->base; + + iowrite32(address, base + NVM_ADDRESS_REG); + + writeq(data, base + NVM_TRIGGER_REG); +} + static int idg_nvm_get_access_map(struct intel_dg_nvm *nvm, u32 *access_map) { u32 fmstr4_addr; @@ -155,6 +185,173 @@ static int idg_nvm_is_valid(struct intel_dg_nvm *nvm) return 0; } +__maybe_unused +static unsigned int idg_nvm_get_region(const struct intel_dg_nvm *nvm, loff_t from) +{ + unsigned int i; + + for (i = 0; i < nvm->nregions; i++) { + if ((nvm->regions[i].offset + nvm->regions[i].size - 1) >= from && + nvm->regions[i].offset <= from && + nvm->regions[i].size != 0) + break; + } + + return i; +} + +static ssize_t idg_nvm_rewrite_partial(struct intel_dg_nvm *nvm, loff_t to, + loff_t offset, size_t len, const u32 *newdata) +{ + u32 data = idg_nvm_read32(nvm, to); + + if (idg_nvm_error(nvm)) + return -EIO; + + memcpy((u8 *)&data + offset, newdata, len); + + idg_nvm_write32(nvm, to, data); + if (idg_nvm_error(nvm)) + return -EIO; + + return len; +} + +__maybe_unused +static ssize_t idg_write(struct intel_dg_nvm *nvm, u8 region, + loff_t to, size_t len, const unsigned char *buf) +{ + size_t len_s = len; + size_t to_shift; + size_t len8; + size_t len4; + ssize_t ret; + size_t to4; + size_t i; + + idg_nvm_set_region_id(nvm, region); + + to4 = ALIGN_DOWN(to, sizeof(u32)); + to_shift = min(sizeof(u32) - ((size_t)to - to4), len); + if (to - to4) { + ret = idg_nvm_rewrite_partial(nvm, to4, to - to4, to_shift, (u32 *)&buf[0]); + if (ret < 0) + return ret; + + buf += to_shift; + to += to_shift; + len_s -= to_shift; + } + + len8 = ALIGN_DOWN(len_s, sizeof(u64)); + for (i = 0; i < len8; i += sizeof(u64)) { + u64 data; + + memcpy(&data, &buf[i], sizeof(u64)); + idg_nvm_write64(nvm, to + i, data); + if (idg_nvm_error(nvm)) + return -EIO; + } + + len4 = len_s - len8; + if (len4 >= sizeof(u32)) { + u32 data; + + memcpy(&data, &buf[i], sizeof(u32)); + idg_nvm_write32(nvm, to + i, data); + if (idg_nvm_error(nvm)) + return -EIO; + i += sizeof(u32); + len4 -= sizeof(u32); + } + + if (len4 > 0) { + ret = idg_nvm_rewrite_partial(nvm, to + i, 0, len4, (u32 *)&buf[i]); + if (ret < 0) + return ret; + } + + return len; +} + +__maybe_unused +static ssize_t idg_read(struct intel_dg_nvm *nvm, u8 region, + loff_t from, size_t len, unsigned char *buf) +{ + size_t len_s = len; + size_t from_shift; + size_t from4; + size_t len8; + size_t len4; + size_t i; + + idg_nvm_set_region_id(nvm, region); + + from4 = ALIGN_DOWN(from, sizeof(u32)); + from_shift = min(sizeof(u32) - ((size_t)from - from4), len); + + if (from - from4) { + u32 data = idg_nvm_read32(nvm, from4); + + if (idg_nvm_error(nvm)) + return -EIO; + memcpy(&buf[0], (u8 *)&data + (from - from4), from_shift); + len_s -= from_shift; + buf += from_shift; + from += from_shift; + } + + len8 = ALIGN_DOWN(len_s, sizeof(u64)); + for (i = 0; i < len8; i += sizeof(u64)) { + u64 data = idg_nvm_read64(nvm, from + i); + + if (idg_nvm_error(nvm)) + return -EIO; + + memcpy(&buf[i], &data, sizeof(data)); + } + + len4 = len_s - len8; + if (len4 >= sizeof(u32)) { + u32 data = idg_nvm_read32(nvm, from + i); + + if (idg_nvm_error(nvm)) + return -EIO; + memcpy(&buf[i], &data, sizeof(data)); + i += sizeof(u32); + len4 -= sizeof(u32); + } + + if (len4 > 0) { + u32 data = idg_nvm_read32(nvm, from + i); + + if (idg_nvm_error(nvm)) + return -EIO; + memcpy(&buf[i], &data, len4); + } + + return len; +} + +__maybe_unused +static ssize_t +idg_erase(struct intel_dg_nvm *nvm, u8 region, loff_t from, u64 len, u64 *fail_addr) +{ + void __iomem *base = nvm->base; + const u32 block = 0x10; + u64 i; + + for (i = 0; i < len; i += SZ_4K) { + iowrite32(from + i, base + NVM_ADDRESS_REG); + iowrite32(region << 24 | block, base + NVM_ERASE_REG); + /* Since the writes are via sgunit + * we cannot do back to back erases. + */ + msleep(50); + } + return len; +} + static int intel_dg_nvm_init(struct intel_dg_nvm *nvm, struct device *device) { u32 access_map = 0; From 044c8be05a39d79821251d8e49fe90531e6c7108 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 17 Jun 2025 17:51:54 +0300 Subject: [PATCH 022/358] mtd: intel-dg: register with mtd Register the on-die nvm device with the mtd subsystem. Refcount nvm object on _get and _put mtd callbacks. For erase operation address and size should be 4K aligned. For write operation address and size has to be 4bytes aligned. CC: Rodrigo Vivi CC: Lucas De Marchi Reviewed-by: Raag Jadav Acked-by: Miquel Raynal Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Co-developed-by: Vitaly Lubart Signed-off-by: Vitaly Lubart Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250617145159.3803852-5-alexander.usyskin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/mtd/devices/mtd_intel_dg.c | 228 ++++++++++++++++++++++++++++- 1 file changed, 224 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/devices/mtd_intel_dg.c b/drivers/mtd/devices/mtd_intel_dg.c index 4d6e2c009b45..6d971fb77938 100644 --- a/drivers/mtd/devices/mtd_intel_dg.c +++ b/drivers/mtd/devices/mtd_intel_dg.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -12,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -19,6 +22,8 @@ struct intel_dg_nvm { struct kref refcnt; + struct mtd_info mtd; + struct mutex lock; /* region access lock */ void __iomem *base; size_t size; unsigned int nregions; @@ -185,7 +190,6 @@ static int idg_nvm_is_valid(struct intel_dg_nvm *nvm) return 0; } -__maybe_unused static unsigned int idg_nvm_get_region(const struct intel_dg_nvm *nvm, loff_t from) { unsigned int i; @@ -217,7 +221,6 @@ static ssize_t idg_nvm_rewrite_partial(struct intel_dg_nvm *nvm, loff_t to, return len; } -__maybe_unused static ssize_t idg_write(struct intel_dg_nvm *nvm, u8 region, loff_t to, size_t len, const unsigned char *buf) { @@ -274,7 +277,6 @@ static ssize_t idg_write(struct intel_dg_nvm *nvm, u8 region, return len; } -__maybe_unused static ssize_t idg_read(struct intel_dg_nvm *nvm, u8 region, loff_t from, size_t len, unsigned char *buf) { @@ -333,7 +335,6 @@ static ssize_t idg_read(struct intel_dg_nvm *nvm, u8 region, return len; } -__maybe_unused static ssize_t idg_erase(struct intel_dg_nvm *nvm, u8 region, loff_t from, u64 len, u64 *fail_addr) { @@ -422,6 +423,147 @@ static int intel_dg_nvm_init(struct intel_dg_nvm *nvm, struct device *device) return n; } +static int intel_dg_mtd_erase(struct mtd_info *mtd, struct erase_info *info) +{ + struct intel_dg_nvm *nvm = mtd->priv; + size_t total_len; + unsigned int idx; + ssize_t bytes; + loff_t from; + size_t len; + u8 region; + u64 addr; + + if (WARN_ON(!nvm)) + return -EINVAL; + + if (!IS_ALIGNED(info->addr, SZ_4K) || !IS_ALIGNED(info->len, SZ_4K)) { + dev_err(&mtd->dev, "unaligned erase %llx %llx\n", + info->addr, info->len); + info->fail_addr = MTD_FAIL_ADDR_UNKNOWN; + return -EINVAL; + } + + total_len = info->len; + addr = info->addr; + + guard(mutex)(&nvm->lock); + + while (total_len > 0) { + if (!IS_ALIGNED(addr, SZ_4K) || !IS_ALIGNED(total_len, SZ_4K)) { + dev_err(&mtd->dev, "unaligned erase %llx %zx\n", addr, total_len); + info->fail_addr = addr; + return -ERANGE; + } + + idx = idg_nvm_get_region(nvm, addr); + if (idx >= nvm->nregions) { + dev_err(&mtd->dev, "out of range"); + info->fail_addr = MTD_FAIL_ADDR_UNKNOWN; + return -ERANGE; + } + + from = addr - nvm->regions[idx].offset; + region = nvm->regions[idx].id; + len = total_len; + if (len > nvm->regions[idx].size - from) + len = nvm->regions[idx].size - from; + + dev_dbg(&mtd->dev, "erasing region[%d] %s from %llx len %zx\n", + region, nvm->regions[idx].name, from, len); + + bytes = idg_erase(nvm, region, from, len, &info->fail_addr); + if (bytes < 0) { + dev_dbg(&mtd->dev, "erase failed with %zd\n", bytes); + info->fail_addr += nvm->regions[idx].offset; + return bytes; + } + + addr += len; + total_len -= len; + } + + return 0; +} + +static int intel_dg_mtd_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, u_char *buf) +{ + struct intel_dg_nvm *nvm = mtd->priv; + unsigned int idx; + ssize_t ret; + u8 region; + + if (WARN_ON(!nvm)) + return -EINVAL; + + idx = idg_nvm_get_region(nvm, from); + + dev_dbg(&mtd->dev, "reading region[%d] %s from %lld len %zd\n", + nvm->regions[idx].id, nvm->regions[idx].name, from, len); + + if (idx >= nvm->nregions) { + dev_err(&mtd->dev, "out of range"); + return -ERANGE; + } + + from -= nvm->regions[idx].offset; + region = nvm->regions[idx].id; + if (len > nvm->regions[idx].size - from) + len = nvm->regions[idx].size - from; + + guard(mutex)(&nvm->lock); + + ret = idg_read(nvm, region, from, len, buf); + if (ret < 0) { + dev_dbg(&mtd->dev, "read failed with %zd\n", ret); + return ret; + } + + *retlen = ret; + + return 0; +} + +static int intel_dg_mtd_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf) +{ + struct intel_dg_nvm *nvm = mtd->priv; + unsigned int idx; + ssize_t ret; + u8 region; + + if (WARN_ON(!nvm)) + return -EINVAL; + + idx = idg_nvm_get_region(nvm, to); + + dev_dbg(&mtd->dev, "writing region[%d] %s to %lld len %zd\n", + nvm->regions[idx].id, nvm->regions[idx].name, to, len); + + if (idx >= nvm->nregions) { + dev_err(&mtd->dev, "out of range"); + return -ERANGE; + } + + to -= nvm->regions[idx].offset; + region = nvm->regions[idx].id; + if (len > nvm->regions[idx].size - to) + len = nvm->regions[idx].size - to; + + guard(mutex)(&nvm->lock); + + ret = idg_write(nvm, region, to, len, buf); + if (ret < 0) { + dev_dbg(&mtd->dev, "write failed with %zd\n", ret); + return ret; + } + + *retlen = ret; + + return 0; +} + static void intel_dg_nvm_release(struct kref *kref) { struct intel_dg_nvm *nvm = container_of(kref, struct intel_dg_nvm, refcnt); @@ -430,9 +572,78 @@ static void intel_dg_nvm_release(struct kref *kref) pr_debug("freeing intel_dg nvm\n"); for (i = 0; i < nvm->nregions; i++) kfree(nvm->regions[i].name); + mutex_destroy(&nvm->lock); kfree(nvm); } +static int intel_dg_mtd_get_device(struct mtd_info *mtd) +{ + struct mtd_info *master = mtd_get_master(mtd); + struct intel_dg_nvm *nvm = master->priv; + + if (WARN_ON(!nvm)) + return -EINVAL; + pr_debug("get mtd %s %d\n", mtd->name, kref_read(&nvm->refcnt)); + kref_get(&nvm->refcnt); + + return 0; +} + +static void intel_dg_mtd_put_device(struct mtd_info *mtd) +{ + struct mtd_info *master = mtd_get_master(mtd); + struct intel_dg_nvm *nvm = master->priv; + + if (WARN_ON(!nvm)) + return; + pr_debug("put mtd %s %d\n", mtd->name, kref_read(&nvm->refcnt)); + kref_put(&nvm->refcnt, intel_dg_nvm_release); +} + +static int intel_dg_nvm_init_mtd(struct intel_dg_nvm *nvm, struct device *device, + unsigned int nparts, bool writable_override) +{ + struct mtd_partition *parts = NULL; + unsigned int i, n; + int ret; + + dev_dbg(device, "registering with mtd\n"); + + nvm->mtd.owner = THIS_MODULE; + nvm->mtd.dev.parent = device; + nvm->mtd.flags = MTD_CAP_NORFLASH; + nvm->mtd.type = MTD_DATAFLASH; + nvm->mtd.priv = nvm; + nvm->mtd._write = intel_dg_mtd_write; + nvm->mtd._read = intel_dg_mtd_read; + nvm->mtd._erase = intel_dg_mtd_erase; + nvm->mtd._get_device = intel_dg_mtd_get_device; + nvm->mtd._put_device = intel_dg_mtd_put_device; + nvm->mtd.writesize = SZ_1; /* 1 byte granularity */ + nvm->mtd.erasesize = SZ_4K; /* 4K bytes granularity */ + nvm->mtd.size = nvm->size; + + parts = kcalloc(nvm->nregions, sizeof(*parts), GFP_KERNEL); + if (!parts) + return -ENOMEM; + + for (i = 0, n = 0; i < nvm->nregions && n < nparts; i++) { + if (!nvm->regions[i].is_readable) + continue; + parts[n].name = nvm->regions[i].name; + parts[n].offset = nvm->regions[i].offset; + parts[n].size = nvm->regions[i].size; + if (!nvm->regions[i].is_writable && !writable_override) + parts[n].mask_flags = MTD_WRITEABLE; + n++; + } + + ret = mtd_device_register(&nvm->mtd, parts, n); + + kfree(parts); + return ret; +} + static int intel_dg_mtd_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *aux_dev_id) { @@ -461,6 +672,7 @@ static int intel_dg_mtd_probe(struct auxiliary_device *aux_dev, return -ENOMEM; kref_init(&nvm->refcnt); + mutex_init(&nvm->lock); for (n = 0, i = 0; i < INTEL_DG_NVM_REGIONS; i++) { if (!invm->regions[i].name) @@ -488,6 +700,12 @@ static int intel_dg_mtd_probe(struct auxiliary_device *aux_dev, goto err; } + ret = intel_dg_nvm_init_mtd(nvm, device, ret, invm->writable_override); + if (ret) { + dev_err(device, "failed init mtd %d\n", ret); + goto err; + } + dev_set_drvdata(&aux_dev->dev, nvm); return 0; @@ -504,6 +722,8 @@ static void intel_dg_mtd_remove(struct auxiliary_device *aux_dev) if (!nvm) return; + mtd_device_unregister(&nvm->mtd); + dev_set_drvdata(&aux_dev->dev, NULL); kref_put(&nvm->refcnt, intel_dg_nvm_release); From 4b93f5fc3bdff9e89beb7aa7f39ca8c7e4d02924 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 17 Jun 2025 17:51:55 +0300 Subject: [PATCH 023/358] mtd: intel-dg: align 64bit read and write GSC NVM controller HW errors on quad access overlapping 1K border. Align 64bit read and write to avoid readq/writeq over 1K border. Reviewed-by: Raag Jadav Acked-by: Miquel Raynal Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250617145159.3803852-6-alexander.usyskin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/mtd/devices/mtd_intel_dg.c | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/drivers/mtd/devices/mtd_intel_dg.c b/drivers/mtd/devices/mtd_intel_dg.c index 6d971fb77938..97e1dc1ada5d 100644 --- a/drivers/mtd/devices/mtd_intel_dg.c +++ b/drivers/mtd/devices/mtd_intel_dg.c @@ -246,6 +246,24 @@ static ssize_t idg_write(struct intel_dg_nvm *nvm, u8 region, len_s -= to_shift; } + if (!IS_ALIGNED(to, sizeof(u64)) && + ((to ^ (to + len_s)) & GENMASK(31, 10))) { + /* + * Workaround reads/writes across 1k-aligned addresses + * (start u32 before 1k, end u32 after) + * as this fails on hardware. + */ + u32 data; + + memcpy(&data, &buf[0], sizeof(u32)); + idg_nvm_write32(nvm, to, data); + if (idg_nvm_error(nvm)) + return -EIO; + buf += sizeof(u32); + to += sizeof(u32); + len_s -= sizeof(u32); + } + len8 = ALIGN_DOWN(len_s, sizeof(u64)); for (i = 0; i < len8; i += sizeof(u64)) { u64 data; @@ -303,6 +321,23 @@ static ssize_t idg_read(struct intel_dg_nvm *nvm, u8 region, from += from_shift; } + if (!IS_ALIGNED(from, sizeof(u64)) && + ((from ^ (from + len_s)) & GENMASK(31, 10))) { + /* + * Workaround reads/writes across 1k-aligned addresses + * (start u32 before 1k, end u32 after) + * as this fails on hardware. + */ + u32 data = idg_nvm_read32(nvm, from); + + if (idg_nvm_error(nvm)) + return -EIO; + memcpy(&buf[0], &data, sizeof(data)); + len_s -= sizeof(u32); + buf += sizeof(u32); + from += sizeof(u32); + } + len8 = ALIGN_DOWN(len_s, sizeof(u64)); for (i = 0; i < len8; i += sizeof(u64)) { u64 data = idg_nvm_read64(nvm, from + i); From c28bfb107daca4e9ba2e567a412e885933057282 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 17 Jun 2025 17:51:56 +0300 Subject: [PATCH 024/358] drm/xe/nvm: add on-die non-volatile memory device Enable access to internal non-volatile memory on DGFX with GSC/CSC devices via a child device. The nvm child device is exposed via auxiliary bus. Reviewed-by: Rodrigo Vivi Acked-by: Rodrigo Vivi Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250617145159.3803852-7-alexander.usyskin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 5 ++ drivers/gpu/drm/xe/xe_device_types.h | 6 ++ drivers/gpu/drm/xe/xe_nvm.c | 107 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_nvm.h | 15 ++++ drivers/gpu/drm/xe/xe_pci.c | 6 ++ 6 files changed, 140 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_nvm.c create mode 100644 drivers/gpu/drm/xe/xe_nvm.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f5f5775acdc0..7c039caefd00 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -80,6 +80,7 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_nvm.o \ xe_oa.o \ xe_observation.o \ xe_pat.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 8cfcfff250ca..e160e7be84f0 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -46,6 +46,7 @@ #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_nvm.h" #include "xe_oa.h" #include "xe_observation.h" #include "xe_pat.h" @@ -881,6 +882,8 @@ int xe_device_probe(struct xe_device *xe) return err; } + xe_nvm_init(xe); + err = xe_heci_gsc_init(xe); if (err) return err; @@ -938,6 +941,8 @@ void xe_device_remove(struct xe_device *xe) { xe_display_unregister(xe); + xe_nvm_fini(xe); + drm_dev_unplug(&xe->drm); xe_bo_pci_dev_remove_all(xe); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 003afb279a5e..6aca4b1a2824 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -32,6 +32,7 @@ struct dram_info; struct intel_display; +struct intel_dg_nvm_dev; struct xe_ggtt; struct xe_pat_ops; struct xe_pxp; @@ -316,6 +317,8 @@ struct xe_device { u8 has_fan_control:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; + /** @info.has_gsc_nvm: Device has gsc non-volatile memory */ + u8 has_gsc_nvm:1; /** @info.has_heci_cscfi: device has heci cscfi */ u8 has_heci_cscfi:1; /** @info.has_heci_gscfi: device has heci gscfi */ @@ -549,6 +552,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @nvm: discrete graphics non-volatile memory */ + struct intel_dg_nvm_dev *nvm; + /** @oa: oa observation subsystem */ struct xe_oa oa; diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c new file mode 100644 index 000000000000..33ba635ce116 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright(c) 2019-2025, Intel Corporation. All rights reserved. + */ + +#include +#include + +#include "xe_device_types.h" +#include "xe_nvm.h" +#include "xe_sriov.h" + +#define GEN12_GUNIT_NVM_BASE 0x00102040 +#define GEN12_GUNIT_NVM_SIZE 0x80 +#define HECI_FW_STATUS_2_NVM_ACCESS_MODE BIT(3) + +static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { + [0] = { .name = "DESCRIPTOR", }, + [2] = { .name = "GSC", }, + [9] = { .name = "PADDING", }, + [11] = { .name = "OptionROM", }, + [12] = { .name = "DAM", }, +}; + +static void xe_nvm_release_dev(struct device *dev) +{ +} + +int xe_nvm_init(struct xe_device *xe) +{ + struct pci_dev *pdev = to_pci_dev(xe->drm.dev); + struct auxiliary_device *aux_dev; + struct intel_dg_nvm_dev *nvm; + int ret; + + if (!xe->info.has_gsc_nvm) + return 0; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return 0; + + /* Nvm pointer should be NULL here */ + if (WARN_ON(xe->nvm)) + return -EFAULT; + + xe->nvm = kzalloc(sizeof(*nvm), GFP_KERNEL); + if (!xe->nvm) + return -ENOMEM; + + nvm = xe->nvm; + + nvm->writable_override = false; + nvm->bar.parent = &pdev->resource[0]; + nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start; + nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1; + nvm->bar.flags = IORESOURCE_MEM; + nvm->bar.desc = IORES_DESC_NONE; + nvm->regions = regions; + + aux_dev = &nvm->aux_dev; + + aux_dev->name = "nvm"; + aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | pci_dev_id(pdev); + aux_dev->dev.parent = &pdev->dev; + aux_dev->dev.release = xe_nvm_release_dev; + + ret = auxiliary_device_init(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret); + goto err; + } + + ret = auxiliary_device_add(aux_dev); + if (ret) { + drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret); + auxiliary_device_uninit(aux_dev); + goto err; + } + return 0; + +err: + kfree(nvm); + xe->nvm = NULL; + return ret; +} + +void xe_nvm_fini(struct xe_device *xe) +{ + struct intel_dg_nvm_dev *nvm = xe->nvm; + + if (!xe->info.has_gsc_nvm) + return; + + /* No access to internal NVM from VFs */ + if (IS_SRIOV_VF(xe)) + return; + + /* Nvm pointer should not be NULL here */ + if (WARN_ON(!nvm)) + return; + + auxiliary_device_delete(&nvm->aux_dev); + auxiliary_device_uninit(&nvm->aux_dev); + kfree(nvm); + xe->nvm = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_nvm.h b/drivers/gpu/drm/xe/xe_nvm.h new file mode 100644 index 000000000000..7f3d5f57bed0 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_nvm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright(c) 2019-2025 Intel Corporation. All rights reserved. + */ + +#ifndef __XE_NVM_H__ +#define __XE_NVM_H__ + +struct xe_device; + +int xe_nvm_init(struct xe_device *xe); + +void xe_nvm_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index cd239457f542..08e21d4099e0 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -63,6 +63,7 @@ struct xe_device_desc { u8 has_display:1; u8 has_fan_control:1; + u8 has_gsc_nvm:1; u8 has_heci_gscfi:1; u8 has_heci_cscfi:1; u8 has_llc:1; @@ -272,6 +273,7 @@ static const struct xe_device_desc dg1_desc = { PLATFORM(DG1), .dma_mask_size = 39, .has_display = true, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, .require_force_probe = true, }; @@ -283,6 +285,7 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 }; #define DG2_FEATURES \ DGFX_FEATURES, \ PLATFORM(DG2), \ + .has_gsc_nvm = 1, \ .has_heci_gscfi = 1, \ .subplatforms = (const struct xe_subplatform_desc[]) { \ { XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \ @@ -319,6 +322,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { PLATFORM(PVC), .dma_mask_size = 52, .has_display = false, + .has_gsc_nvm = 1, .has_heci_gscfi = 1, .max_remote_tiles = 1, .require_force_probe = true, @@ -349,6 +353,7 @@ static const struct xe_device_desc bmg_desc = { .has_display = true, .has_fan_control = true, .has_mbx_power_limits = true, + .has_gsc_nvm = 1, .has_heci_cscfi = 1, .needs_scratch = true, }; @@ -592,6 +597,7 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.is_dgfx = desc->is_dgfx; xe->info.has_fan_control = desc->has_fan_control; xe->info.has_mbx_power_limits = desc->has_mbx_power_limits; + xe->info.has_gsc_nvm = desc->has_gsc_nvm; xe->info.has_heci_gscfi = desc->has_heci_gscfi; xe->info.has_heci_cscfi = desc->has_heci_cscfi; xe->info.has_llc = desc->has_llc; From 87e1ebbafbea1e5e8de1f2beaf01b378149262f2 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 17 Jun 2025 17:51:57 +0300 Subject: [PATCH 025/358] drm/xe/nvm: add support for access mode Check NVM access mode from GSC FW status registers and overwrite access status read from SPI descriptor, if needed. Reviewed-by: Rodrigo Vivi Acked-by: Rodrigo Vivi Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250617145159.3803852-8-alexander.usyskin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_gsc_regs.h | 4 +++ drivers/gpu/drm/xe/xe_heci_gsc.c | 5 +--- drivers/gpu/drm/xe/xe_nvm.c | 37 ++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h index 7702364b65f1..9b66cc972a63 100644 --- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h @@ -16,6 +16,10 @@ #define MTL_GSC_HECI1_BASE 0x00116000 #define MTL_GSC_HECI2_BASE 0x00117000 +#define DG1_GSC_HECI2_BASE 0x00259000 +#define PVC_GSC_HECI2_BASE 0x00285000 +#define DG2_GSC_HECI2_BASE 0x00374000 + #define HECI_H_CSR(base) XE_REG((base) + 0x4) #define HECI_H_CSR_IE REG_BIT(0) #define HECI_H_CSR_IS REG_BIT(1) diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 27d11e06a82b..6d7b62724126 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -11,15 +11,12 @@ #include "xe_device_types.h" #include "xe_drv.h" #include "xe_heci_gsc.h" +#include "regs/xe_gsc_regs.h" #include "xe_platform_types.h" #include "xe_survivability_mode.h" #define GSC_BAR_LENGTH 0x00000FFC -#define DG1_GSC_HECI2_BASE 0x259000 -#define PVC_GSC_HECI2_BASE 0x285000 -#define DG2_GSC_HECI2_BASE 0x374000 - static void heci_gsc_irq_mask(struct irq_data *d) { /* generic irq handling */ diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 33ba635ce116..20aa3b5d3637 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -6,8 +6,11 @@ #include #include +#include "xe_device.h" #include "xe_device_types.h" +#include "xe_mmio.h" #include "xe_nvm.h" +#include "regs/xe_gsc_regs.h" #include "xe_sriov.h" #define GEN12_GUNIT_NVM_BASE 0x00102040 @@ -26,6 +29,38 @@ static void xe_nvm_release_dev(struct device *dev) { } +static bool xe_nvm_writable_override(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + bool writable_override; + resource_size_t base; + + switch (xe->info.platform) { + case XE_BATTLEMAGE: + base = DG2_GSC_HECI2_BASE; + break; + case XE_PVC: + base = PVC_GSC_HECI2_BASE; + break; + case XE_DG2: + base = DG2_GSC_HECI2_BASE; + break; + case XE_DG1: + base = DG1_GSC_HECI2_BASE; + break; + default: + drm_err(&xe->drm, "Unknown platform\n"); + return true; + } + + writable_override = + !(xe_mmio_read32(>->mmio, HECI_FWSTS2(base)) & + HECI_FW_STATUS_2_NVM_ACCESS_MODE); + if (writable_override) + drm_info(&xe->drm, "NVM access overridden by jumper\n"); + return writable_override; +} + int xe_nvm_init(struct xe_device *xe) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); @@ -50,7 +85,7 @@ int xe_nvm_init(struct xe_device *xe) nvm = xe->nvm; - nvm->writable_override = false; + nvm->writable_override = xe_nvm_writable_override(xe); nvm->bar.parent = &pdev->resource[0]; nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start; nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1; From a1c940cbf505e2342ebb5ea996f0acf205d6af7b Mon Sep 17 00:00:00 2001 From: Reuven Abliyev Date: Tue, 17 Jun 2025 17:51:58 +0300 Subject: [PATCH 026/358] drm/xe/nvm: add support for non-posted erase Erase command is slow on discrete graphics storage and may overshot PCI completion timeout. BMG introduces the ability to have non-posted erase. Add driver support for non-posted erase with polling for erase completion. Reviewed-by: Rodrigo Vivi Acked-by: Rodrigo Vivi Signed-off-by: Reuven Abliyev Signed-off-by: Alexander Usyskin Link: https://lore.kernel.org/r/20250617145159.3803852-9-alexander.usyskin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_nvm.c | 25 ++++++++++++++++++ drivers/mtd/devices/mtd_intel_dg.c | 42 ++++++++++++++++++++++++++++-- include/linux/intel_dg_nvm_aux.h | 2 ++ 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c index 20aa3b5d3637..61b0a1531a53 100644 --- a/drivers/gpu/drm/xe/xe_nvm.c +++ b/drivers/gpu/drm/xe/xe_nvm.c @@ -14,7 +14,15 @@ #include "xe_sriov.h" #define GEN12_GUNIT_NVM_BASE 0x00102040 +#define GEN12_DEBUG_NVM_BASE 0x00101018 + +#define GEN12_CNTL_PROTECTED_NVM_REG 0x0010100C + #define GEN12_GUNIT_NVM_SIZE 0x80 +#define GEN12_DEBUG_NVM_SIZE 0x4 + +#define NVM_NON_POSTED_ERASE_CHICKEN_BIT BIT(13) + #define HECI_FW_STATUS_2_NVM_ACCESS_MODE BIT(3) static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = { @@ -29,6 +37,16 @@ static void xe_nvm_release_dev(struct device *dev) { } +static bool xe_nvm_non_posted_erase(struct xe_device *xe) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + + if (xe->info.platform != XE_BATTLEMAGE) + return false; + return !(xe_mmio_read32(>->mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) & + NVM_NON_POSTED_ERASE_CHICKEN_BIT); +} + static bool xe_nvm_writable_override(struct xe_device *xe) { struct xe_gt *gt = xe_root_mmio_gt(xe); @@ -86,6 +104,7 @@ int xe_nvm_init(struct xe_device *xe) nvm = xe->nvm; nvm->writable_override = xe_nvm_writable_override(xe); + nvm->non_posted_erase = xe_nvm_non_posted_erase(xe); nvm->bar.parent = &pdev->resource[0]; nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start; nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1; @@ -93,6 +112,12 @@ int xe_nvm_init(struct xe_device *xe) nvm->bar.desc = IORES_DESC_NONE; nvm->regions = regions; + nvm->bar2.parent = &pdev->resource[0]; + nvm->bar2.start = GEN12_DEBUG_NVM_BASE + pdev->resource[0].start; + nvm->bar2.end = nvm->bar2.start + GEN12_DEBUG_NVM_SIZE - 1; + nvm->bar2.flags = IORESOURCE_MEM; + nvm->bar2.desc = IORES_DESC_NONE; + aux_dev = &nvm->aux_dev; aux_dev->name = "nvm"; diff --git a/drivers/mtd/devices/mtd_intel_dg.c b/drivers/mtd/devices/mtd_intel_dg.c index 97e1dc1ada5d..b438ee5aacc3 100644 --- a/drivers/mtd/devices/mtd_intel_dg.c +++ b/drivers/mtd/devices/mtd_intel_dg.c @@ -25,6 +25,9 @@ struct intel_dg_nvm { struct mtd_info mtd; struct mutex lock; /* region access lock */ void __iomem *base; + void __iomem *base2; + bool non_posted_erase; + size_t size; unsigned int nregions; struct { @@ -41,6 +44,7 @@ struct intel_dg_nvm { #define NVM_VALSIG_REG 0x00000010 #define NVM_ADDRESS_REG 0x00000040 #define NVM_REGION_ID_REG 0x00000044 +#define NVM_DEBUG_REG 0x00000000 /* * [15:0]-Erase size = 0x0010 4K 0x0080 32K 0x0100 64K * [23:16]-Reserved @@ -72,6 +76,9 @@ struct intel_dg_nvm { #define NVM_FREG_ADDR_SHIFT 12 #define NVM_FREG_MIN_REGION_SIZE 0xFFF +#define NVM_NON_POSTED_ERASE_DONE BIT(23) +#define NVM_NON_POSTED_ERASE_DONE_ITER 3000 + static inline void idg_nvm_set_region_id(struct intel_dg_nvm *nvm, u8 region) { iowrite32((u32)region, nvm->base + NVM_REGION_ID_REG); @@ -373,13 +380,32 @@ static ssize_t idg_read(struct intel_dg_nvm *nvm, u8 region, static ssize_t idg_erase(struct intel_dg_nvm *nvm, u8 region, loff_t from, u64 len, u64 *fail_addr) { + void __iomem *base2 = nvm->base2; void __iomem *base = nvm->base; const u32 block = 0x10; + u32 iter = 0; + u32 reg; u64 i; for (i = 0; i < len; i += SZ_4K) { iowrite32(from + i, base + NVM_ADDRESS_REG); iowrite32(region << 24 | block, base + NVM_ERASE_REG); + if (nvm->non_posted_erase) { + /* Wait for Erase Done */ + reg = ioread32(base2 + NVM_DEBUG_REG); + while (!(reg & NVM_NON_POSTED_ERASE_DONE) && + ++iter < NVM_NON_POSTED_ERASE_DONE_ITER) { + msleep(10); + reg = ioread32(base2 + NVM_DEBUG_REG); + } + if (reg & NVM_NON_POSTED_ERASE_DONE) { + /* Clear Erase Done */ + iowrite32(reg, base2 + NVM_DEBUG_REG); + } else { + *fail_addr = from + i; + return -ETIME; + } + } /* Since the writes are via sgunit * we cannot do back to back erases. */ @@ -388,7 +414,8 @@ idg_erase(struct intel_dg_nvm *nvm, u8 region, loff_t from, u64 len, u64 *fail_a return len; } -static int intel_dg_nvm_init(struct intel_dg_nvm *nvm, struct device *device) +static int intel_dg_nvm_init(struct intel_dg_nvm *nvm, struct device *device, + bool non_posted_erase) { u32 access_map = 0; unsigned int i, n; @@ -448,7 +475,10 @@ static int intel_dg_nvm_init(struct intel_dg_nvm *nvm, struct device *device) n++; } + nvm->non_posted_erase = non_posted_erase; + dev_dbg(device, "Registered %d regions\n", n); + dev_dbg(device, "Non posted erase %d\n", nvm->non_posted_erase); /* Need to add 1 to the amount of memory * so it is reported as an even block @@ -729,7 +759,15 @@ static int intel_dg_mtd_probe(struct auxiliary_device *aux_dev, goto err; } - ret = intel_dg_nvm_init(nvm, device); + if (invm->non_posted_erase) { + nvm->base2 = devm_ioremap_resource(device, &invm->bar2); + if (IS_ERR(nvm->base2)) { + ret = PTR_ERR(nvm->base2); + goto err; + } + } + + ret = intel_dg_nvm_init(nvm, device, invm->non_posted_erase); if (ret < 0) { dev_err(device, "cannot initialize nvm %d\n", ret); goto err; diff --git a/include/linux/intel_dg_nvm_aux.h b/include/linux/intel_dg_nvm_aux.h index 00b6c1301bd8..625d46a6b96e 100644 --- a/include/linux/intel_dg_nvm_aux.h +++ b/include/linux/intel_dg_nvm_aux.h @@ -20,7 +20,9 @@ struct intel_dg_nvm_region { struct intel_dg_nvm_dev { struct auxiliary_device aux_dev; bool writable_override; + bool non_posted_erase; struct resource bar; + struct resource bar2; const struct intel_dg_nvm_region *regions; }; From bbe5db761086e870678697f8920db518a6297a18 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:56 +0900 Subject: [PATCH 027/358] gpu: nova-core: wait for GFW_BOOT completion Upon reset, the GPU executes the GFW (GPU Firmware) in order to initialize its base parameters such as clocks. The driver must ensure that this step is completed before using the hardware. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-12-ecf41ef99252@nvidia.com [ Slightly adjust comments in wait_gfw_boot_completion(). - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/gfw.rs | 41 ++++++++++++++++++++++++++++++ drivers/gpu/nova-core/gpu.rs | 5 ++++ drivers/gpu/nova-core/nova_core.rs | 1 + drivers/gpu/nova-core/regs.rs | 25 ++++++++++++++++++ drivers/gpu/nova-core/util.rs | 1 - 5 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/nova-core/gfw.rs diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs new file mode 100644 index 000000000000..937e820e00fc --- /dev/null +++ b/drivers/gpu/nova-core/gfw.rs @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! GPU Firmware (GFW) support. +//! +//! Upon reset, the GPU runs some firmware code from the BIOS to setup its core parameters. Most of +//! the GPU is considered unusable until this step is completed, so we must wait on it before +//! performing driver initialization. + +use core::time::Duration; + +use kernel::bindings; +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::regs; +use crate::util; + +/// Wait until `GFW` (GPU Firmware) completes, or a 4 seconds timeout elapses. +pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { + // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of + // reset, and should complete in less time than that. + util::wait_on(Duration::from_secs(4), || { + // Check that FWSEC has lowered its protection level before reading the GFW_BOOT + // status. + let gfw_booted = regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) + .read_protection_level0() + && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(); + + if gfw_booted { + Some(()) + } else { + // TODO: replace with [1] once merged. + // [1] https://lore.kernel.org/rust-for-linux/20250423192857.199712-6-fujita.tomonori@gmail.com/ + // + // SAFETY: `msleep()` is safe to call with any parameter. + unsafe { bindings::msleep(1) }; + + None + } + }) +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 60b86f370284..e44ff6fa0714 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -4,6 +4,7 @@ use crate::driver::Bar0; use crate::firmware::{Firmware, FIRMWARE_VERSION}; +use crate::gfw; use crate::regs; use crate::util; use core::fmt; @@ -182,6 +183,10 @@ pub(crate) fn new( spec.revision ); + // We must wait for GFW_BOOT completion before doing any significant setup on the GPU. + gfw::wait_gfw_boot_completion(bar) + .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; + Ok(pin_init!(Self { spec, bar: devres_bar, diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 618632f0abcc..c3fde3e132ea 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -4,6 +4,7 @@ mod driver; mod firmware; +mod gfw; mod gpu; mod regs; mod util; diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 5a1273230306..cba442da5118 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -37,3 +37,28 @@ pub(crate) fn chipset(self) -> Result { .and_then(Chipset::try_from) } } + +/* PGC6 */ + +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128 { + 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; +}); + +// TODO: This is an array of registers. +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234 { + 31:0 value as u32; +}); + +register!( + NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT => NV_PGC6_AON_SECURE_SCRATCH_GROUP_05, + "Scratch group 05 register 0 used as GFW boot progress indicator" { + 7:0 progress as u8, "Progress of GFW boot (0xff means completed)"; + } +); + +impl NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT { + /// Returns `true` if GFW boot is completed. + pub(crate) fn completed(self) -> bool { + self.progress() == 0xff + } +} diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index c50bfa5ab7fe..69f29238b25e 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -34,7 +34,6 @@ pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { /// /// TODO: replace with `read_poll_timeout` once it is available. /// (https://lore.kernel.org/lkml/20250220070611.214262-8-fujita.tomonori@gmail.com/) -#[expect(dead_code)] pub(crate) fn wait_on Option>(timeout: Duration, cond: F) -> Result { let start_time = Instant::now(); From 9bcc046a929f73358bf23678306b31d927a7e088 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:57 +0900 Subject: [PATCH 028/358] gpu: nova-core: add DMA object struct Since we will need to allocate lots of distinct memory chunks to be shared between GPU and CPU, introduce a type dedicated to that. It is a light wrapper around CoherentAllocation. Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-13-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/dma.rs | 61 ++++++++++++++++++++++++++++++ drivers/gpu/nova-core/nova_core.rs | 1 + 2 files changed, 62 insertions(+) create mode 100644 drivers/gpu/nova-core/dma.rs diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs new file mode 100644 index 000000000000..4b063aaef65e --- /dev/null +++ b/drivers/gpu/nova-core/dma.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Simple DMA object wrapper. + +// To be removed when all code is used. +#![expect(dead_code)] + +use core::ops::{Deref, DerefMut}; + +use kernel::device; +use kernel::dma::CoherentAllocation; +use kernel::page::PAGE_SIZE; +use kernel::prelude::*; + +pub(crate) struct DmaObject { + dma: CoherentAllocation, +} + +impl DmaObject { + pub(crate) fn new(dev: &device::Device, len: usize) -> Result { + let len = core::alloc::Layout::from_size_align(len, PAGE_SIZE) + .map_err(|_| EINVAL)? + .pad_to_align() + .size(); + let dma = CoherentAllocation::alloc_coherent(dev, len, GFP_KERNEL | __GFP_ZERO)?; + + Ok(Self { dma }) + } + + pub(crate) fn from_data(dev: &device::Device, data: &[u8]) -> Result { + Self::new(dev, data.len()).map(|mut dma_obj| { + // TODO: replace with `CoherentAllocation::write()` once available. + // SAFETY: + // - `dma_obj`'s size is at least `data.len()`. + // - We have just created this object and there is no other user at this stage. + unsafe { + core::ptr::copy_nonoverlapping( + data.as_ptr(), + dma_obj.dma.start_ptr_mut(), + data.len(), + ); + } + + dma_obj + }) + } +} + +impl Deref for DmaObject { + type Target = CoherentAllocation; + + fn deref(&self) -> &Self::Target { + &self.dma + } +} + +impl DerefMut for DmaObject { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.dma + } +} diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index c3fde3e132ea..121fe5c11044 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -2,6 +2,7 @@ //! Nova Core GPU Driver +mod dma; mod driver; mod firmware; mod gfw; From 6554ad65b5891e52689061606e277a69e44462c4 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:58 +0900 Subject: [PATCH 029/358] gpu: nova-core: register sysmem flush page Reserve a page of system memory so sysmembar can perform a read on it if a system write occurred since the last flush. Do this early as it can be required to e.g. reset the GPU falcons. Chipsets capabilities differ in that respect, so this commit also introduces the FB HAL. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-14-ecf41ef99252@nvidia.com [ * Use kernel::page::PAGE_SIZE instead of kernel::bindings::PAGE_SIZE. * Get rid of the Option for SysmemFlush. * Slightly reword SysmemFlush doc-comments. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/fb.rs | 67 +++++++++++++++++++++++++++ drivers/gpu/nova-core/fb/hal.rs | 31 +++++++++++++ drivers/gpu/nova-core/fb/hal/ga100.rs | 45 ++++++++++++++++++ drivers/gpu/nova-core/fb/hal/tu102.rs | 42 +++++++++++++++++ drivers/gpu/nova-core/gpu.rs | 21 ++++++++- drivers/gpu/nova-core/nova_core.rs | 1 + drivers/gpu/nova-core/regs.rs | 10 ++++ 7 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/nova-core/fb.rs create mode 100644 drivers/gpu/nova-core/fb/hal.rs create mode 100644 drivers/gpu/nova-core/fb/hal/ga100.rs create mode 100644 drivers/gpu/nova-core/fb/hal/tu102.rs diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs new file mode 100644 index 000000000000..bfe3ed063572 --- /dev/null +++ b/drivers/gpu/nova-core/fb.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; +use kernel::types::ARef; +use kernel::{dev_warn, device}; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::gpu::Chipset; + +mod hal; + +/// Type holding the sysmem flush memory page, a page of memory to be written into the +/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR*` registers and used to maintain memory coherency. +/// +/// Users are responsible for manually calling [`Self::unregister`] before dropping this object, +/// otherwise the GPU might still use it even after it has been freed. +pub(crate) struct SysmemFlush { + /// Chipset we are operating on. + chipset: Chipset, + device: ARef, + /// Keep the page alive as long as we need it. + page: DmaObject, +} + +impl SysmemFlush { + /// Allocate a memory page and register it as the sysmem flush page. + pub(crate) fn register( + dev: &device::Device, + bar: &Bar0, + chipset: Chipset, + ) -> Result { + let page = DmaObject::new(dev, kernel::page::PAGE_SIZE)?; + + hal::fb_hal(chipset).write_sysmem_flush_page(bar, page.dma_handle())?; + + Ok(Self { + chipset, + device: dev.into(), + page, + }) + } + + /// Unregister the managed sysmem flush page. + /// + /// In order to gracefully tear down the GPU, users must make sure to call this method before + /// dropping the object. + pub(crate) fn unregister(&self, bar: &Bar0) { + let hal = hal::fb_hal(self.chipset); + + if hal.read_sysmem_flush_page(bar) == self.page.dma_handle() { + let _ = hal.write_sysmem_flush_page(bar, 0).inspect_err(|e| { + dev_warn!( + &self.device, + "failed to unregister sysmem flush page: {:?}", + e + ) + }); + } else { + // Another page has been registered after us for some reason - warn as this is a bug. + dev_warn!( + &self.device, + "attempt to unregister a sysmem flush page that is not active\n" + ); + } + } +} diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs new file mode 100644 index 000000000000..23eab57eec9f --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal.rs @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::gpu::Chipset; + +mod ga100; +mod tu102; + +pub(crate) trait FbHal { + /// Returns the address of the currently-registered sysmem flush page. + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64; + + /// Register `addr` as the address of the sysmem flush page. + /// + /// This might fail if the address is too large for the receiving register. + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result; +} + +/// Returns the HAL corresponding to `chipset`. +pub(super) fn fb_hal(chipset: Chipset) -> &'static dyn FbHal { + use Chipset::*; + + match chipset { + TU102 | TU104 | TU106 | TU117 | TU116 => tu102::TU102_HAL, + GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => { + ga100::GA100_HAL + } + } +} diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs new file mode 100644 index 000000000000..7c10436c1c59 --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 + +struct Ga100; + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::fb::hal::FbHal; +use crate::regs; + +use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT; + +pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { + (regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08() as u64) << FLUSH_SYSMEM_ADDR_SHIFT + | (regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40() as u64) + << FLUSH_SYSMEM_ADDR_SHIFT_HI +} + +pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) { + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::default() + .set_adr_63_40((addr >> FLUSH_SYSMEM_ADDR_SHIFT_HI) as u32) + .write(bar); + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) + .write(bar); +} + +/// Shift applied to the sysmem address before it is written into +/// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`, +const FLUSH_SYSMEM_ADDR_SHIFT_HI: u32 = 40; + +impl FbHal for Ga100 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + read_sysmem_flush_page_ga100(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + write_sysmem_flush_page_ga100(bar, addr); + + Ok(()) + } +} + +const GA100: Ga100 = Ga100; +pub(super) const GA100_HAL: &dyn FbHal = &GA100; diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs new file mode 100644 index 000000000000..048859f9fd9d --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::driver::Bar0; +use crate::fb::hal::FbHal; +use crate::regs; +use kernel::prelude::*; + +/// Shift applied to the sysmem address before it is written into `NV_PFB_NISO_FLUSH_SYSMEM_ADDR`, +/// to be used by HALs. +pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8; + +pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 { + (regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08() as u64) << FLUSH_SYSMEM_ADDR_SHIFT +} + +pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { + // Check that the address doesn't overflow the receiving 32-bit register. + if addr >> (u32::BITS + FLUSH_SYSMEM_ADDR_SHIFT) == 0 { + regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::default() + .set_adr_39_08((addr >> FLUSH_SYSMEM_ADDR_SHIFT) as u32) + .write(bar); + + Ok(()) + } else { + Err(EINVAL) + } +} + +struct Tu102; + +impl FbHal for Tu102 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + read_sysmem_flush_page_gm107(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + write_sysmem_flush_page_gm107(bar, addr) + } +} + +const TU102: Tu102 = Tu102; +pub(super) const TU102_HAL: &dyn FbHal = &TU102; diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index e44ff6fa0714..992840f6b108 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -3,6 +3,7 @@ use kernel::{device, devres::Devres, error::code::*, pci, prelude::*}; use crate::driver::Bar0; +use crate::fb::SysmemFlush; use crate::firmware::{Firmware, FIRMWARE_VERSION}; use crate::gfw; use crate::regs; @@ -158,12 +159,24 @@ fn new(bar: &Bar0) -> Result { } /// Structure holding the resources required to operate the GPU. -#[pin_data] +#[pin_data(PinnedDrop)] pub(crate) struct Gpu { spec: Spec, /// MMIO mapping of PCI BAR 0 bar: Devres, fw: Firmware, + /// System memory page required for flushing all pending GPU-side memory writes done through + /// PCIE into system memory. + sysmem_flush: SysmemFlush, +} + +#[pinned_drop] +impl PinnedDrop for Gpu { + fn drop(self: Pin<&mut Self>) { + // Unregister the sysmem flush page before we release it. + self.bar + .try_access_with(|b| self.sysmem_flush.unregister(b)); + } } impl Gpu { @@ -187,10 +200,14 @@ pub(crate) fn new( gfw::wait_gfw_boot_completion(bar) .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; + // System memory page required for sysmembar to properly flush into system memory. + let sysmem_flush = SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?; + Ok(pin_init!(Self { spec, bar: devres_bar, - fw + fw, + sysmem_flush, })) } } diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 121fe5c11044..8ac04b8586e7 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -4,6 +4,7 @@ mod dma; mod driver; +mod fb; mod firmware; mod gfw; mod gpu; diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index cba442da5118..b599e7ddad57 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -38,6 +38,16 @@ pub(crate) fn chipset(self) -> Result { } } +/* PFB */ + +register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR @ 0x00100c10 { + 31:0 adr_39_08 as u32; +}); + +register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI @ 0x00100c40 { + 23:0 adr_63_40 as u32; +}); + /* PGC6 */ register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128 { From 69f5cd67ce41ba128d3df18137c7a93a1faa84da Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:23:59 +0900 Subject: [PATCH 030/358] gpu: nova-core: add falcon register definitions and base code Booting the GSP on Ampere requires an intricate dance between the GSP and SEC2 falcons, where the GSP starts by running the FWSEC firmware to create the WPR2 region , and then SEC2 loads the actual RISC-V firmware into the GSP. Add the common Falcon code and HAL for Ampere GPUs, and instantiate the GSP and SEC2 Falcons that will be required to perform that dance and boot the GSP. Thanks to Ben Skeggs for pointing out an important bug in the memory scrubbing code that could lead to a race condition and ultimately a failure to boot the GSP! Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-15-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/falcon.rs | 551 ++++++++++++++++++++++ drivers/gpu/nova-core/falcon/gsp.rs | 24 + drivers/gpu/nova-core/falcon/hal.rs | 54 +++ drivers/gpu/nova-core/falcon/hal/ga102.rs | 119 +++++ drivers/gpu/nova-core/falcon/sec2.rs | 10 + drivers/gpu/nova-core/gpu.rs | 11 + drivers/gpu/nova-core/nova_core.rs | 1 + drivers/gpu/nova-core/regs.rs | 146 ++++++ 8 files changed, 916 insertions(+) create mode 100644 drivers/gpu/nova-core/falcon.rs create mode 100644 drivers/gpu/nova-core/falcon/gsp.rs create mode 100644 drivers/gpu/nova-core/falcon/hal.rs create mode 100644 drivers/gpu/nova-core/falcon/hal/ga102.rs create mode 100644 drivers/gpu/nova-core/falcon/sec2.rs diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs new file mode 100644 index 000000000000..ba14cb24b80d --- /dev/null +++ b/drivers/gpu/nova-core/falcon.rs @@ -0,0 +1,551 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Falcon microprocessor base support + +// To be removed when all code is used. +#![expect(dead_code)] + +use core::ops::Deref; +use core::time::Duration; +use hal::FalconHal; +use kernel::bindings; +use kernel::device; +use kernel::prelude::*; +use kernel::types::ARef; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::gpu::Chipset; +use crate::regs; +use crate::util; + +pub(crate) mod gsp; +mod hal; +pub(crate) mod sec2; + +/// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] +/// register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) enum FalconCoreRev { + #[default] + Rev1 = 1, + Rev2 = 2, + Rev3 = 3, + Rev4 = 4, + Rev5 = 5, + Rev6 = 6, + Rev7 = 7, +} + +impl TryFrom for FalconCoreRev { + type Error = Error; + + fn try_from(value: u8) -> Result { + use FalconCoreRev::*; + + let rev = match value { + 1 => Rev1, + 2 => Rev2, + 3 => Rev3, + 4 => Rev4, + 5 => Rev5, + 6 => Rev6, + 7 => Rev7, + _ => return Err(EINVAL), + }; + + Ok(rev) + } +} + +/// Revision subversion number of a falcon core, used in the +/// [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) enum FalconCoreRevSubversion { + #[default] + Subversion0 = 0, + Subversion1 = 1, + Subversion2 = 2, + Subversion3 = 3, +} + +impl TryFrom for FalconCoreRevSubversion { + type Error = Error; + + fn try_from(value: u8) -> Result { + use FalconCoreRevSubversion::*; + + let sub_version = match value & 0b11 { + 0 => Subversion0, + 1 => Subversion1, + 2 => Subversion2, + 3 => Subversion3, + _ => return Err(EINVAL), + }; + + Ok(sub_version) + } +} + +/// Security model of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] +/// register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone)] +pub(crate) enum FalconSecurityModel { + /// Non-Secure: runs unsigned code without privileges. + #[default] + None = 0, + /// Low-Secure: runs code with some privileges. Can only be entered from `Heavy` mode, which + /// will typically validate the LS code through some signature. + Light = 2, + /// High-Secure: runs signed code with full privileges. Signature is validated by boot ROM. + Heavy = 3, +} + +impl TryFrom for FalconSecurityModel { + type Error = Error; + + fn try_from(value: u8) -> Result { + use FalconSecurityModel::*; + + let sec_model = match value { + 0 => None, + 2 => Light, + 3 => Heavy, + _ => return Err(EINVAL), + }; + + Ok(sec_model) + } +} + +/// Signing algorithm for a given firmware, used in the [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] +/// register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] +pub(crate) enum FalconModSelAlgo { + /// RSA3K. + #[default] + Rsa3k = 1, +} + +impl TryFrom for FalconModSelAlgo { + type Error = Error; + + fn try_from(value: u8) -> Result { + match value { + 1 => Ok(FalconModSelAlgo::Rsa3k), + _ => Err(EINVAL), + } + } +} + +/// Valid values for the `size` field of the [`crate::regs::NV_PFALCON_FALCON_DMATRFCMD`] register. +#[repr(u8)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] +pub(crate) enum DmaTrfCmdSize { + /// 256 bytes transfer. + #[default] + Size256B = 0x6, +} + +impl TryFrom for DmaTrfCmdSize { + type Error = Error; + + fn try_from(value: u8) -> Result { + match value { + 0x6 => Ok(Self::Size256B), + _ => Err(EINVAL), + } + } +} + +/// Currently active core on a dual falcon/riscv (Peregrine) controller. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub(crate) enum PeregrineCoreSelect { + /// Falcon core is active. + #[default] + Falcon = 0, + /// RISC-V core is active. + Riscv = 1, +} + +impl From for PeregrineCoreSelect { + fn from(value: bool) -> Self { + match value { + false => PeregrineCoreSelect::Falcon, + true => PeregrineCoreSelect::Riscv, + } + } +} + +/// Different types of memory present in a falcon core. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum FalconMem { + /// Instruction Memory. + Imem, + /// Data Memory. + Dmem, +} + +/// Target/source of a DMA transfer to/from falcon memory. +#[derive(Debug, Clone, Default)] +pub(crate) enum FalconFbifTarget { + /// VRAM. + #[default] + LocalFb = 0, + /// Coherent system memory. + CoherentSysmem = 1, + /// Non-coherent system memory. + NoncoherentSysmem = 2, +} + +impl TryFrom for FalconFbifTarget { + type Error = Error; + + fn try_from(value: u8) -> Result { + let res = match value { + 0 => Self::LocalFb, + 1 => Self::CoherentSysmem, + 2 => Self::NoncoherentSysmem, + _ => return Err(EINVAL), + }; + + Ok(res) + } +} + +/// Type of memory addresses to use. +#[derive(Debug, Clone, Default)] +pub(crate) enum FalconFbifMemType { + /// Virtual memory addresses. + #[default] + Virtual = 0, + /// Physical memory addresses. + Physical = 1, +} + +/// Conversion from a single-bit register field. +impl From for FalconFbifMemType { + fn from(value: bool) -> Self { + match value { + false => Self::Virtual, + true => Self::Physical, + } + } +} + +/// Trait defining the parameters of a given Falcon instance. +pub(crate) trait FalconEngine: Sync { + /// Base I/O address for the falcon, relative from which its registers are accessed. + const BASE: usize; +} + +/// Represents a portion of the firmware to be loaded into a particular memory (e.g. IMEM or DMEM). +#[derive(Debug)] +pub(crate) struct FalconLoadTarget { + /// Offset from the start of the source object to copy from. + pub(crate) src_start: u32, + /// Offset from the start of the destination memory to copy into. + pub(crate) dst_start: u32, + /// Number of bytes to copy. + pub(crate) len: u32, +} + +/// Parameters for the falcon boot ROM. +#[derive(Debug)] +pub(crate) struct FalconBromParams { + /// Offset in `DMEM`` of the firmware's signature. + pub(crate) pkc_data_offset: u32, + /// Mask of engines valid for this firmware. + pub(crate) engine_id_mask: u16, + /// ID of the ucode used to infer a fuse register to validate the signature. + pub(crate) ucode_id: u8, +} + +/// Trait for providing load parameters of falcon firmwares. +pub(crate) trait FalconLoadParams { + /// Returns the load parameters for `IMEM`. + fn imem_load_params(&self) -> FalconLoadTarget; + + /// Returns the load parameters for `DMEM`. + fn dmem_load_params(&self) -> FalconLoadTarget; + + /// Returns the parameters to write into the BROM registers. + fn brom_params(&self) -> FalconBromParams; + + /// Returns the start address of the firmware. + fn boot_addr(&self) -> u32; +} + +/// Trait for a falcon firmware. +/// +/// A falcon firmware can be loaded on a given engine, and is presented in the form of a DMA +/// object. +pub(crate) trait FalconFirmware: FalconLoadParams + Deref { + /// Engine on which this firmware is to be loaded. + type Target: FalconEngine; +} + +/// Contains the base parameters common to all Falcon instances. +pub(crate) struct Falcon { + hal: KBox>, + dev: ARef, +} + +impl Falcon { + /// Create a new falcon instance. + /// + /// `need_riscv` is set to `true` if the caller expects the falcon to be a dual falcon/riscv + /// controller. + pub(crate) fn new( + dev: &device::Device, + chipset: Chipset, + bar: &Bar0, + need_riscv: bool, + ) -> Result { + let hwcfg1 = regs::NV_PFALCON_FALCON_HWCFG1::read(bar, E::BASE); + // Check that the revision and security model contain valid values. + let _ = hwcfg1.core_rev()?; + let _ = hwcfg1.security_model()?; + + if need_riscv { + let hwcfg2 = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + if !hwcfg2.riscv() { + dev_err!( + dev, + "riscv support requested on a controller that does not support it\n" + ); + return Err(EINVAL); + } + } + + Ok(Self { + hal: hal::falcon_hal(chipset)?, + dev: dev.into(), + }) + } + + /// Wait for memory scrubbing to complete. + fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { + // TIMEOUT: memory scrubbing should complete in less than 20ms. + util::wait_on(Duration::from_millis(20), || { + if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE).mem_scrubbing_done() { + Some(()) + } else { + None + } + }) + } + + /// Reset the falcon engine. + fn reset_eng(&self, bar: &Bar0) -> Result { + let _ = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + + // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set + // RESET_READY so a non-failing timeout is used. + let _ = util::wait_on(Duration::from_micros(150), || { + let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); + if r.reset_ready() { + Some(()) + } else { + None + } + }); + + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(true)); + + // TODO: replace with udelay() or equivalent once available. + // TIMEOUT: falcon engine should not take more than 10us to reset. + let _: Result = util::wait_on(Duration::from_micros(10), || None); + + regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(false)); + + self.reset_wait_mem_scrubbing(bar)?; + + Ok(()) + } + + /// Reset the controller, select the falcon core, and wait for memory scrubbing to complete. + pub(crate) fn reset(&self, bar: &Bar0) -> Result { + self.reset_eng(bar)?; + self.hal.select_core(self, bar)?; + self.reset_wait_mem_scrubbing(bar)?; + + regs::NV_PFALCON_FALCON_RM::default() + .set_value(regs::NV_PMC_BOOT_0::read(bar).into()) + .write(bar, E::BASE); + + Ok(()) + } + + /// Perform a DMA write according to `load_offsets` from `dma_handle` into the falcon's + /// `target_mem`. + /// + /// `sec` is set if the loaded firmware is expected to run in secure mode. + fn dma_wr>( + &self, + bar: &Bar0, + fw: &F, + target_mem: FalconMem, + load_offsets: FalconLoadTarget, + sec: bool, + ) -> Result { + const DMA_LEN: u32 = 256; + + // For IMEM, we want to use the start offset as a virtual address tag for each page, since + // code addresses in the firmware (and the boot vector) are virtual. + // + // For DMEM we can fold the start offset into the DMA handle. + let (src_start, dma_start) = match target_mem { + FalconMem::Imem => (load_offsets.src_start, fw.dma_handle()), + FalconMem::Dmem => ( + 0, + fw.dma_handle_with_offset(load_offsets.src_start as usize)?, + ), + }; + if dma_start % DMA_LEN as bindings::dma_addr_t > 0 { + dev_err!( + self.dev, + "DMA transfer start addresses must be a multiple of {}", + DMA_LEN + ); + return Err(EINVAL); + } + if load_offsets.len % DMA_LEN > 0 { + dev_err!( + self.dev, + "DMA transfer length must be a multiple of {}", + DMA_LEN + ); + return Err(EINVAL); + } + + // Set up the base source DMA address. + + regs::NV_PFALCON_FALCON_DMATRFBASE::default() + .set_base((dma_start >> 8) as u32) + .write(bar, E::BASE); + regs::NV_PFALCON_FALCON_DMATRFBASE1::default() + .set_base((dma_start >> 40) as u16) + .write(bar, E::BASE); + + let cmd = regs::NV_PFALCON_FALCON_DMATRFCMD::default() + .set_size(DmaTrfCmdSize::Size256B) + .set_imem(target_mem == FalconMem::Imem) + .set_sec(if sec { 1 } else { 0 }); + + for pos in (0..load_offsets.len).step_by(DMA_LEN as usize) { + // Perform a transfer of size `DMA_LEN`. + regs::NV_PFALCON_FALCON_DMATRFMOFFS::default() + .set_offs(load_offsets.dst_start + pos) + .write(bar, E::BASE); + regs::NV_PFALCON_FALCON_DMATRFFBOFFS::default() + .set_offs(src_start + pos) + .write(bar, E::BASE); + cmd.write(bar, E::BASE); + + // Wait for the transfer to complete. + // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories + // should ever take that long. + util::wait_on(Duration::from_secs(2), || { + let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, E::BASE); + if r.idle() { + Some(()) + } else { + None + } + })?; + } + + Ok(()) + } + + /// Perform a DMA load into `IMEM` and `DMEM` of `fw`, and prepare the falcon to run it. + pub(crate) fn dma_load>(&self, bar: &Bar0, fw: &F) -> Result { + regs::NV_PFALCON_FBIF_CTL::alter(bar, E::BASE, |v| v.set_allow_phys_no_ctx(true)); + regs::NV_PFALCON_FALCON_DMACTL::default().write(bar, E::BASE); + regs::NV_PFALCON_FBIF_TRANSCFG::alter(bar, E::BASE, |v| { + v.set_target(FalconFbifTarget::CoherentSysmem) + .set_mem_type(FalconFbifMemType::Physical) + }); + + self.dma_wr(bar, fw, FalconMem::Imem, fw.imem_load_params(), true)?; + self.dma_wr(bar, fw, FalconMem::Dmem, fw.dmem_load_params(), true)?; + + self.hal.program_brom(self, bar, &fw.brom_params())?; + + // Set `BootVec` to start of non-secure code. + regs::NV_PFALCON_FALCON_BOOTVEC::default() + .set_value(fw.boot_addr()) + .write(bar, E::BASE); + + Ok(()) + } + + /// Runs the loaded firmware and waits for its completion. + /// + /// `mbox0` and `mbox1` are optional parameters to write into the `MBOX0` and `MBOX1` registers + /// prior to running. + /// + /// Wait up to two seconds for the firmware to complete, and return its exit status read from + /// the `MBOX0` and `MBOX1` registers. + pub(crate) fn boot( + &self, + bar: &Bar0, + mbox0: Option, + mbox1: Option, + ) -> Result<(u32, u32)> { + if let Some(mbox0) = mbox0 { + regs::NV_PFALCON_FALCON_MAILBOX0::default() + .set_value(mbox0) + .write(bar, E::BASE); + } + + if let Some(mbox1) = mbox1 { + regs::NV_PFALCON_FALCON_MAILBOX1::default() + .set_value(mbox1) + .write(bar, E::BASE); + } + + match regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE).alias_en() { + true => regs::NV_PFALCON_FALCON_CPUCTL_ALIAS::default() + .set_startcpu(true) + .write(bar, E::BASE), + false => regs::NV_PFALCON_FALCON_CPUCTL::default() + .set_startcpu(true) + .write(bar, E::BASE), + } + + // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. + util::wait_on(Duration::from_secs(2), || { + let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE); + if r.halted() { + Some(()) + } else { + None + } + })?; + + let (mbox0, mbox1) = ( + regs::NV_PFALCON_FALCON_MAILBOX0::read(bar, E::BASE).value(), + regs::NV_PFALCON_FALCON_MAILBOX1::read(bar, E::BASE).value(), + ); + + Ok((mbox0, mbox1)) + } + + /// Returns the fused version of the signature to use in order to run a HS firmware on this + /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header. + pub(crate) fn signature_reg_fuse_version( + &self, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result { + self.hal + .signature_reg_fuse_version(self, bar, engine_id_mask, ucode_id) + } +} diff --git a/drivers/gpu/nova-core/falcon/gsp.rs b/drivers/gpu/nova-core/falcon/gsp.rs new file mode 100644 index 000000000000..d622e9a64470 --- /dev/null +++ b/drivers/gpu/nova-core/falcon/gsp.rs @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::{ + driver::Bar0, + falcon::{Falcon, FalconEngine}, + regs, +}; + +/// Type specifying the `Gsp` falcon engine. Cannot be instantiated. +pub(crate) struct Gsp(()); + +impl FalconEngine for Gsp { + const BASE: usize = 0x00110000; +} + +impl Falcon { + /// Clears the SWGEN0 bit in the Falcon's IRQ status clear register to + /// allow GSP to signal CPU for processing new messages in message queue. + pub(crate) fn clear_swgen0_intr(&self, bar: &Bar0) { + regs::NV_PFALCON_FALCON_IRQSCLR::default() + .set_swgen0(true) + .write(bar, Gsp::BASE); + } +} diff --git a/drivers/gpu/nova-core/falcon/hal.rs b/drivers/gpu/nova-core/falcon/hal.rs new file mode 100644 index 000000000000..b233bc365882 --- /dev/null +++ b/drivers/gpu/nova-core/falcon/hal.rs @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::falcon::{Falcon, FalconBromParams, FalconEngine}; +use crate::gpu::Chipset; + +mod ga102; + +/// Hardware Abstraction Layer for Falcon cores. +/// +/// Implements chipset-specific low-level operations. The trait is generic against [`FalconEngine`] +/// so its `BASE` parameter can be used in order to avoid runtime bound checks when accessing +/// registers. +pub(crate) trait FalconHal: Sync { + /// Activates the Falcon core if the engine is a risvc/falcon dual engine. + fn select_core(&self, _falcon: &Falcon, _bar: &Bar0) -> Result { + Ok(()) + } + + /// Returns the fused version of the signature to use in order to run a HS firmware on this + /// falcon instance. `engine_id_mask` and `ucode_id` are obtained from the firmware header. + fn signature_reg_fuse_version( + &self, + falcon: &Falcon, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result; + + /// Program the boot ROM registers prior to starting a secure firmware. + fn program_brom(&self, falcon: &Falcon, bar: &Bar0, params: &FalconBromParams) -> Result; +} + +/// Returns a boxed falcon HAL adequate for `chipset`. +/// +/// We use a heap-allocated trait object instead of a statically defined one because the +/// generic `FalconEngine` argument makes it difficult to define all the combinations +/// statically. +pub(super) fn falcon_hal( + chipset: Chipset, +) -> Result>> { + use Chipset::*; + + let hal = match chipset { + GA102 | GA103 | GA104 | GA106 | GA107 => { + KBox::new(ga102::Ga102::::new(), GFP_KERNEL)? as KBox> + } + _ => return Err(ENOTSUPP), + }; + + Ok(hal) +} diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs new file mode 100644 index 000000000000..0a4e5e7adf8c --- /dev/null +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +use core::marker::PhantomData; +use core::time::Duration; + +use kernel::device; +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::falcon::{ + Falcon, FalconBromParams, FalconEngine, FalconModSelAlgo, PeregrineCoreSelect, +}; +use crate::regs; +use crate::util; + +use super::FalconHal; + +fn select_core_ga102(bar: &Bar0) -> Result { + let bcr_ctrl = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE); + if bcr_ctrl.core_select() != PeregrineCoreSelect::Falcon { + regs::NV_PRISCV_RISCV_BCR_CTRL::default() + .set_core_select(PeregrineCoreSelect::Falcon) + .write(bar, E::BASE); + + // TIMEOUT: falcon core should take less than 10ms to report being enabled. + util::wait_on(Duration::from_millis(10), || { + let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE); + if r.valid() { + Some(()) + } else { + None + } + })?; + } + + Ok(()) +} + +fn signature_reg_fuse_version_ga102( + dev: &device::Device, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, +) -> Result { + // TODO: The ucode fuse versions are contained in the FUSE_OPT_FPF__UCODE_VERSION + // registers, which are an array. Our register definition macros do not allow us to manage them + // properly, so we need to hardcode their addresses for now. Clean this up once we support + // register arrays. + + // Each engine has 16 ucode version registers numbered from 1 to 16. + if ucode_id == 0 || ucode_id > 16 { + dev_err!(dev, "invalid ucode id {:#x}", ucode_id); + return Err(EINVAL); + } + + // Base address of the FUSE registers array corresponding to the engine. + let reg_fuse_base = if engine_id_mask & 0x0001 != 0 { + regs::NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION::OFFSET + } else if engine_id_mask & 0x0004 != 0 { + regs::NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION::OFFSET + } else if engine_id_mask & 0x0400 != 0 { + regs::NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION::OFFSET + } else { + dev_err!(dev, "unexpected engine_id_mask {:#x}", engine_id_mask); + return Err(EINVAL); + }; + + // Read `reg_fuse_base[ucode_id - 1]`. + let reg_fuse_version = + bar.read32(reg_fuse_base + ((ucode_id - 1) as usize * core::mem::size_of::())); + + // TODO: replace with `last_set_bit` once it lands. + Ok(u32::BITS - reg_fuse_version.leading_zeros()) +} + +fn program_brom_ga102(bar: &Bar0, params: &FalconBromParams) -> Result { + regs::NV_PFALCON2_FALCON_BROM_PARAADDR::default() + .set_value(params.pkc_data_offset) + .write(bar, E::BASE); + regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() + .set_value(params.engine_id_mask as u32) + .write(bar, E::BASE); + regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default() + .set_ucode_id(params.ucode_id) + .write(bar, E::BASE); + regs::NV_PFALCON2_FALCON_MOD_SEL::default() + .set_algo(FalconModSelAlgo::Rsa3k) + .write(bar, E::BASE); + + Ok(()) +} + +pub(super) struct Ga102(PhantomData); + +impl Ga102 { + pub(super) fn new() -> Self { + Self(PhantomData) + } +} + +impl FalconHal for Ga102 { + fn select_core(&self, _falcon: &Falcon, bar: &Bar0) -> Result { + select_core_ga102::(bar) + } + + fn signature_reg_fuse_version( + &self, + falcon: &Falcon, + bar: &Bar0, + engine_id_mask: u16, + ucode_id: u8, + ) -> Result { + signature_reg_fuse_version_ga102(&falcon.dev, bar, engine_id_mask, ucode_id) + } + + fn program_brom(&self, _falcon: &Falcon, bar: &Bar0, params: &FalconBromParams) -> Result { + program_brom_ga102::(bar, params) + } +} diff --git a/drivers/gpu/nova-core/falcon/sec2.rs b/drivers/gpu/nova-core/falcon/sec2.rs new file mode 100644 index 000000000000..5147d9e2a7fe --- /dev/null +++ b/drivers/gpu/nova-core/falcon/sec2.rs @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + +use crate::falcon::FalconEngine; + +/// Type specifying the `Sec2` falcon engine. Cannot be instantiated. +pub(crate) struct Sec2(()); + +impl FalconEngine for Sec2 { + const BASE: usize = 0x00840000; +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 992840f6b108..29ba2dd0cba0 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -3,6 +3,7 @@ use kernel::{device, devres::Devres, error::code::*, pci, prelude::*}; use crate::driver::Bar0; +use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; use crate::fb::SysmemFlush; use crate::firmware::{Firmware, FIRMWARE_VERSION}; use crate::gfw; @@ -203,6 +204,16 @@ pub(crate) fn new( // System memory page required for sysmembar to properly flush into system memory. let sysmem_flush = SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?; + let gsp_falcon = Falcon::::new( + pdev.as_ref(), + spec.chipset, + bar, + spec.chipset > Chipset::GA100, + )?; + gsp_falcon.clear_swgen0_intr(bar); + + let _sec2_falcon = Falcon::::new(pdev.as_ref(), spec.chipset, bar, true)?; + Ok(pin_init!(Self { spec, bar: devres_bar, diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 8ac04b8586e7..808997bbe36d 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -4,6 +4,7 @@ mod dma; mod driver; +mod falcon; mod fb; mod firmware; mod gfw; diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index b599e7ddad57..a2f449eb08b5 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -7,6 +7,10 @@ #[macro_use] mod macros; +use crate::falcon::{ + DmaTrfCmdSize, FalconCoreRev, FalconCoreRevSubversion, FalconFbifMemType, FalconFbifTarget, + FalconModSelAlgo, FalconSecurityModel, PeregrineCoreSelect, +}; use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; @@ -72,3 +76,145 @@ pub(crate) fn completed(self) -> bool { self.progress() == 0xff } } + +/* FUSE */ + +register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100 { + 15:0 data as u16; +}); + +register!(NV_FUSE_OPT_FPF_SEC2_UCODE1_VERSION @ 0x00824140 { + 15:0 data as u16; +}); + +register!(NV_FUSE_OPT_FPF_GSP_UCODE1_VERSION @ 0x008241c0 { + 15:0 data as u16; +}); + +/* PFALCON */ + +register!(NV_PFALCON_FALCON_IRQSCLR @ +0x00000004 { + 4:4 halt as bool; + 6:6 swgen0 as bool; +}); + +register!(NV_PFALCON_FALCON_MAILBOX0 @ +0x00000040 { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_MAILBOX1 @ +0x00000044 { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_RM @ +0x00000084 { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_HWCFG2 @ +0x000000f4 { + 10:10 riscv as bool; + 12:12 mem_scrubbing as bool, "Set to 0 after memory scrubbing is completed"; + 31:31 reset_ready as bool, "Signal indicating that reset is completed (GA102+)"; +}); + +impl NV_PFALCON_FALCON_HWCFG2 { + /// Returns `true` if memory scrubbing is completed. + pub(crate) fn mem_scrubbing_done(self) -> bool { + !self.mem_scrubbing() + } +} + +register!(NV_PFALCON_FALCON_CPUCTL @ +0x00000100 { + 1:1 startcpu as bool; + 4:4 halted as bool; + 6:6 alias_en as bool; +}); + +register!(NV_PFALCON_FALCON_BOOTVEC @ +0x00000104 { + 31:0 value as u32; +}); + +register!(NV_PFALCON_FALCON_DMACTL @ +0x0000010c { + 0:0 require_ctx as bool; + 1:1 dmem_scrubbing as bool; + 2:2 imem_scrubbing as bool; + 6:3 dmaq_num as u8; + 7:7 secure_stat as bool; +}); + +register!(NV_PFALCON_FALCON_DMATRFBASE @ +0x00000110 { + 31:0 base as u32; +}); + +register!(NV_PFALCON_FALCON_DMATRFMOFFS @ +0x00000114 { + 23:0 offs as u32; +}); + +register!(NV_PFALCON_FALCON_DMATRFCMD @ +0x00000118 { + 0:0 full as bool; + 1:1 idle as bool; + 3:2 sec as u8; + 4:4 imem as bool; + 5:5 is_write as bool; + 10:8 size as u8 ?=> DmaTrfCmdSize; + 14:12 ctxdma as u8; + 16:16 set_dmtag as u8; +}); + +register!(NV_PFALCON_FALCON_DMATRFFBOFFS @ +0x0000011c { + 31:0 offs as u32; +}); + +register!(NV_PFALCON_FALCON_DMATRFBASE1 @ +0x00000128 { + 8:0 base as u16; +}); + +register!(NV_PFALCON_FALCON_HWCFG1 @ +0x0000012c { + 3:0 core_rev as u8 ?=> FalconCoreRev, "Core revision"; + 5:4 security_model as u8 ?=> FalconSecurityModel, "Security model"; + 7:6 core_rev_subversion as u8 ?=> FalconCoreRevSubversion, "Core revision subversion"; +}); + +register!(NV_PFALCON_FALCON_CPUCTL_ALIAS @ +0x00000130 { + 1:1 startcpu as bool; +}); + +// Actually known as `NV_PSEC_FALCON_ENGINE` and `NV_PGSP_FALCON_ENGINE` depending on the falcon +// instance. +register!(NV_PFALCON_FALCON_ENGINE @ +0x000003c0 { + 0:0 reset as bool; +}); + +// TODO: this is an array of registers. +register!(NV_PFALCON_FBIF_TRANSCFG @ +0x00000600 { + 1:0 target as u8 ?=> FalconFbifTarget; + 2:2 mem_type as bool => FalconFbifMemType; +}); + +register!(NV_PFALCON_FBIF_CTL @ +0x00000624 { + 7:7 allow_phys_no_ctx as bool; +}); + +register!(NV_PFALCON2_FALCON_MOD_SEL @ +0x00001180 { + 7:0 algo as u8 ?=> FalconModSelAlgo; +}); + +register!(NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID @ +0x00001198 { + 7:0 ucode_id as u8; +}); + +register!(NV_PFALCON2_FALCON_BROM_ENGIDMASK @ +0x0000119c { + 31:0 value as u32; +}); + +// TODO: this is an array of registers. +register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ +0x00001210 { + 31:0 value as u32; +}); + +/* PRISCV */ + +register!(NV_PRISCV_RISCV_BCR_CTRL @ +0x00001668 { + 0:0 valid as bool; + 4:4 core_select as bool => PeregrineCoreSelect; + 8:8 br_fetch as bool; +}); From d0c167eb0afaed24e320e2b461ffae21fe48df7c Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:24:00 +0900 Subject: [PATCH 031/358] gpu: nova-core: firmware: add ucode descriptor used by FWSEC-FRTS FWSEC-FRTS is the first firmware we need to run on the GSP falcon in order to initiate the GSP boot process. Introduce the structure that describes it. Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-16-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/firmware.rs | 45 +++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 4b8a38358a4f..2f4f5c7c7902 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -41,6 +41,51 @@ pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result usize { + const HDR_SIZE_SHIFT: u32 = 16; + const HDR_SIZE_MASK: u32 = 0xffff0000; + + ((self.hdr & HDR_SIZE_MASK) >> HDR_SIZE_SHIFT) as usize + } +} + pub(crate) struct ModInfoBuilder(firmware::ModInfoBuilder); impl ModInfoBuilder { From 6fda04e7f0cd57989066fa3b7975af08d779c8fd Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 19 Jun 2025 22:24:01 +0900 Subject: [PATCH 032/358] gpu: nova-core: vbios: Add base support for VBIOS construction and iteration Add support for navigating the VBIOS images required for extracting ucode data for GSP to boot. Later patches will build on this. Debug log messages will show the BIOS images: [102141.013287] NovaCore: Found BIOS image at offset 0x0, size: 0xfe00, type: PciAt [102141.080692] NovaCore: Found BIOS image at offset 0xfe00, size: 0x14800, type: Efi [102141.098443] NovaCore: Found BIOS image at offset 0x24600, size: 0x5600, type: FwSec [102141.415095] NovaCore: Found BIOS image at offset 0x29c00, size: 0x60800, type: FwSec Cc: Alexandre Courbot Cc: John Hubbard Cc: Shirish Baskaran Cc: Alistair Popple Cc: Timur Tabi Cc: Ben Skeggs [ acourbot@nvidia.com: fix clippy warnings, read_more() function ] Signed-off-by: Alexandre Courbot Signed-off-by: Joel Fernandes Link: https://lore.kernel.org/r/20250619-nova-frts-v6-17-ecf41ef99252@nvidia.com [ Replace extend_with() and copy_from_slice() with extend_from_slice(); re-format and use markdown in comments. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/firmware.rs | 4 +- drivers/gpu/nova-core/gpu.rs | 4 + drivers/gpu/nova-core/nova_core.rs | 1 + drivers/gpu/nova-core/vbios.rs | 681 +++++++++++++++++++++++++++++ 4 files changed, 688 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/nova-core/vbios.rs diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 2f4f5c7c7902..41f43a729ad3 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -44,6 +44,7 @@ pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result usize { const HDR_SIZE_SHIFT: u32 = 16; const HDR_SIZE_MASK: u32 = 0xffff0000; diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 29ba2dd0cba0..3c64d2b43883 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -9,6 +9,7 @@ use crate::gfw; use crate::regs; use crate::util; +use crate::vbios::Vbios; use core::fmt; macro_rules! define_chipset { @@ -214,6 +215,9 @@ pub(crate) fn new( let _sec2_falcon = Falcon::::new(pdev.as_ref(), spec.chipset, bar, true)?; + // Will be used in a later patch when fwsec firmware is needed. + let _bios = Vbios::new(pdev, bar)?; + Ok(pin_init!(Self { spec, bar: devres_bar, diff --git a/drivers/gpu/nova-core/nova_core.rs b/drivers/gpu/nova-core/nova_core.rs index 808997bbe36d..de14f2e92636 100644 --- a/drivers/gpu/nova-core/nova_core.rs +++ b/drivers/gpu/nova-core/nova_core.rs @@ -11,6 +11,7 @@ mod gpu; mod regs; mod util; +mod vbios; pub(crate) const MODULE_NAME: &kernel::str::CStr = ::NAME; diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs new file mode 100644 index 000000000000..eafca96041ac --- /dev/null +++ b/drivers/gpu/nova-core/vbios.rs @@ -0,0 +1,681 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! VBIOS extraction and parsing. + +// To be removed when all code is used. +#![expect(dead_code)] + +use crate::driver::Bar0; +use core::convert::TryFrom; +use kernel::error::Result; +use kernel::pci; +use kernel::prelude::*; + +/// The offset of the VBIOS ROM in the BAR0 space. +const ROM_OFFSET: usize = 0x300000; +/// The maximum length of the VBIOS ROM to scan into. +const BIOS_MAX_SCAN_LEN: usize = 0x100000; +/// The size to read ahead when parsing initial BIOS image headers. +const BIOS_READ_AHEAD_SIZE: usize = 1024; +/// The bit in the last image indicator byte for the PCI Data Structure that +/// indicates the last image. Bit 0-6 are reserved, bit 7 is last image bit. +const LAST_IMAGE_BIT_MASK: u8 = 0x80; + +// PMU lookup table entry types. Used to locate PMU table entries +// in the Fwsec image, corresponding to falcon ucodes. +#[expect(dead_code)] +const FALCON_UCODE_ENTRY_APPID_FIRMWARE_SEC_LIC: u8 = 0x05; +#[expect(dead_code)] +const FALCON_UCODE_ENTRY_APPID_FWSEC_DBG: u8 = 0x45; +const FALCON_UCODE_ENTRY_APPID_FWSEC_PROD: u8 = 0x85; + +/// Vbios Reader for constructing the VBIOS data. +struct VbiosIterator<'a> { + pdev: &'a pci::Device, + bar0: &'a Bar0, + /// VBIOS data vector: As BIOS images are scanned, they are added to this vector for reference + /// or copying into other data structures. It is the entire scanned contents of the VBIOS which + /// progressively extends. It is used so that we do not re-read any contents that are already + /// read as we use the cumulative length read so far, and re-read any gaps as we extend the + /// length. + data: KVec, + /// Current offset of the [`Iterator`]. + current_offset: usize, + /// Indicate whether the last image has been found. + last_found: bool, +} + +impl<'a> VbiosIterator<'a> { + fn new(pdev: &'a pci::Device, bar0: &'a Bar0) -> Result { + Ok(Self { + pdev, + bar0, + data: KVec::new(), + current_offset: 0, + last_found: false, + }) + } + + /// Read bytes from the ROM at the current end of the data vector. + fn read_more(&mut self, len: usize) -> Result { + let current_len = self.data.len(); + let start = ROM_OFFSET + current_len; + + // Ensure length is a multiple of 4 for 32-bit reads + if len % core::mem::size_of::() != 0 { + dev_err!( + self.pdev.as_ref(), + "VBIOS read length {} is not a multiple of 4\n", + len + ); + return Err(EINVAL); + } + + self.data.reserve(len, GFP_KERNEL)?; + // Read ROM data bytes and push directly to `data`. + for addr in (start..start + len).step_by(core::mem::size_of::()) { + // Read 32-bit word from the VBIOS ROM + let word = self.bar0.try_read32(addr)?; + + // Convert the `u32` to a 4 byte array and push each byte. + word.to_ne_bytes() + .iter() + .try_for_each(|&b| self.data.push(b, GFP_KERNEL))?; + } + + Ok(()) + } + + /// Read bytes at a specific offset, filling any gap. + fn read_more_at_offset(&mut self, offset: usize, len: usize) -> Result { + if offset > BIOS_MAX_SCAN_LEN { + dev_err!(self.pdev.as_ref(), "Error: exceeded BIOS scan limit.\n"); + return Err(EINVAL); + } + + // If `offset` is beyond current data size, fill the gap first. + let current_len = self.data.len(); + let gap_bytes = offset.saturating_sub(current_len); + + // Now read the requested bytes at the offset. + self.read_more(gap_bytes + len) + } + + /// Read a BIOS image at a specific offset and create a [`BiosImage`] from it. + /// + /// `self.data` is extended as needed and a new [`BiosImage`] is returned. + /// `context` is a string describing the operation for error reporting. + fn read_bios_image_at_offset( + &mut self, + offset: usize, + len: usize, + context: &str, + ) -> Result { + let data_len = self.data.len(); + if offset + len > data_len { + self.read_more_at_offset(offset, len).inspect_err(|e| { + dev_err!( + self.pdev.as_ref(), + "Failed to read more at offset {:#x}: {:?}\n", + offset, + e + ) + })?; + } + + BiosImage::new(self.pdev, &self.data[offset..offset + len]).inspect_err(|err| { + dev_err!( + self.pdev.as_ref(), + "Failed to {} at offset {:#x}: {:?}\n", + context, + offset, + err + ) + }) + } +} + +impl<'a> Iterator for VbiosIterator<'a> { + type Item = Result; + + /// Iterate over all VBIOS images until the last image is detected or offset + /// exceeds scan limit. + fn next(&mut self) -> Option { + if self.last_found { + return None; + } + + if self.current_offset > BIOS_MAX_SCAN_LEN { + dev_err!( + self.pdev.as_ref(), + "Error: exceeded BIOS scan limit, stopping scan\n" + ); + return None; + } + + // Parse image headers first to get image size. + let image_size = match self.read_bios_image_at_offset( + self.current_offset, + BIOS_READ_AHEAD_SIZE, + "parse initial BIOS image headers", + ) { + Ok(image) => image.image_size_bytes(), + Err(e) => return Some(Err(e)), + }; + + // Now create a new `BiosImage` with the full image data. + let full_image = match self.read_bios_image_at_offset( + self.current_offset, + image_size, + "parse full BIOS image", + ) { + Ok(image) => image, + Err(e) => return Some(Err(e)), + }; + + self.last_found = full_image.is_last(); + + // Advance to next image (aligned to 512 bytes). + self.current_offset += image_size; + // TODO: replace with `align_up` once it lands. + self.current_offset = self.current_offset.next_multiple_of(512); + + Some(Ok(full_image)) + } +} + +pub(crate) struct Vbios { + fwsec_image: FwSecBiosImage, +} + +impl Vbios { + /// Probe for VBIOS extraction. + /// + /// Once the VBIOS object is built, `bar0` is not read for [`Vbios`] purposes anymore. + pub(crate) fn new(pdev: &pci::Device, bar0: &Bar0) -> Result { + // Images to extract from iteration + let mut pci_at_image: Option = None; + let mut first_fwsec_image: Option = None; + let mut second_fwsec_image: Option = None; + + // Parse all VBIOS images in the ROM + for image_result in VbiosIterator::new(pdev, bar0)? { + let full_image = image_result?; + + dev_dbg!( + pdev.as_ref(), + "Found BIOS image: size: {:#x}, type: {}, last: {}\n", + full_image.image_size_bytes(), + full_image.image_type_str(), + full_image.is_last() + ); + + // Get references to images we will need after the loop, in order to + // setup the falcon data offset. + match full_image { + BiosImage::PciAt(image) => { + pci_at_image = Some(image); + } + BiosImage::FwSec(image) => { + if first_fwsec_image.is_none() { + first_fwsec_image = Some(image); + } else { + second_fwsec_image = Some(image); + } + } + // For now we don't need to handle these + BiosImage::Efi(_image) => {} + BiosImage::Nbsi(_image) => {} + } + } + + // Using all the images, setup the falcon data pointer in Fwsec. + // These are temporarily unused images and will be used in later patches. + if let (Some(second), Some(_first), Some(_pci_at)) = + (second_fwsec_image, first_fwsec_image, pci_at_image) + { + Ok(Vbios { + fwsec_image: second, + }) + } else { + dev_err!( + pdev.as_ref(), + "Missing required images for falcon data setup, skipping\n" + ); + Err(EINVAL) + } + } +} + +/// PCI Data Structure as defined in PCI Firmware Specification +#[derive(Debug, Clone)] +#[repr(C)] +struct PcirStruct { + /// PCI Data Structure signature ("PCIR" or "NPDS") + signature: [u8; 4], + /// PCI Vendor ID (e.g., 0x10DE for NVIDIA) + vendor_id: u16, + /// PCI Device ID + device_id: u16, + /// Device List Pointer + device_list_ptr: u16, + /// PCI Data Structure Length + pci_data_struct_len: u16, + /// PCI Data Structure Revision + pci_data_struct_rev: u8, + /// Class code (3 bytes, 0x03 for display controller) + class_code: [u8; 3], + /// Size of this image in 512-byte blocks + image_len: u16, + /// Revision Level of the Vendor's ROM + vendor_rom_rev: u16, + /// ROM image type (0x00 = PC-AT compatible, 0x03 = EFI, 0x70 = NBSI) + code_type: u8, + /// Last image indicator (0x00 = Not last image, 0x80 = Last image) + last_image: u8, + /// Maximum Run-time Image Length (units of 512 bytes) + max_runtime_image_len: u16, +} + +impl PcirStruct { + fn new(pdev: &pci::Device, data: &[u8]) -> Result { + if data.len() < core::mem::size_of::() { + dev_err!(pdev.as_ref(), "Not enough data for PcirStruct\n"); + return Err(EINVAL); + } + + let mut signature = [0u8; 4]; + signature.copy_from_slice(&data[0..4]); + + // Signature should be "PCIR" (0x52494350) or "NPDS" (0x5344504e). + if &signature != b"PCIR" && &signature != b"NPDS" { + dev_err!( + pdev.as_ref(), + "Invalid signature for PcirStruct: {:?}\n", + signature + ); + return Err(EINVAL); + } + + let mut class_code = [0u8; 3]; + class_code.copy_from_slice(&data[13..16]); + + let image_len = u16::from_le_bytes([data[16], data[17]]); + if image_len == 0 { + dev_err!(pdev.as_ref(), "Invalid image length: 0\n"); + return Err(EINVAL); + } + + Ok(PcirStruct { + signature, + vendor_id: u16::from_le_bytes([data[4], data[5]]), + device_id: u16::from_le_bytes([data[6], data[7]]), + device_list_ptr: u16::from_le_bytes([data[8], data[9]]), + pci_data_struct_len: u16::from_le_bytes([data[10], data[11]]), + pci_data_struct_rev: data[12], + class_code, + image_len, + vendor_rom_rev: u16::from_le_bytes([data[18], data[19]]), + code_type: data[20], + last_image: data[21], + max_runtime_image_len: u16::from_le_bytes([data[22], data[23]]), + }) + } + + /// Check if this is the last image in the ROM. + fn is_last(&self) -> bool { + self.last_image & LAST_IMAGE_BIT_MASK != 0 + } + + /// Calculate image size in bytes from 512-byte blocks. + fn image_size_bytes(&self) -> usize { + self.image_len as usize * 512 + } +} + +/// PCI ROM Expansion Header as defined in PCI Firmware Specification. +/// +/// This is header is at the beginning of every image in the set of images in the ROM. It contains +/// a pointer to the PCI Data Structure which describes the image. For "NBSI" images (NoteBook +/// System Information), the ROM header deviates from the standard and contains an offset to the +/// NBSI image however we do not yet parse that in this module and keep it for future reference. +#[derive(Debug, Clone, Copy)] +#[expect(dead_code)] +struct PciRomHeader { + /// 00h: Signature (0xAA55) + signature: u16, + /// 02h: Reserved bytes for processor architecture unique data (20 bytes) + reserved: [u8; 20], + /// 16h: NBSI Data Offset (NBSI-specific, offset from header to NBSI image) + nbsi_data_offset: Option, + /// 18h: Pointer to PCI Data Structure (offset from start of ROM image) + pci_data_struct_offset: u16, + /// 1Ah: Size of block (this is NBSI-specific) + size_of_block: Option, +} + +impl PciRomHeader { + fn new(pdev: &pci::Device, data: &[u8]) -> Result { + if data.len() < 26 { + // Need at least 26 bytes to read pciDataStrucPtr and sizeOfBlock. + return Err(EINVAL); + } + + let signature = u16::from_le_bytes([data[0], data[1]]); + + // Check for valid ROM signatures. + match signature { + 0xAA55 | 0xBB77 | 0x4E56 => {} + _ => { + dev_err!(pdev.as_ref(), "ROM signature unknown {:#x}\n", signature); + return Err(EINVAL); + } + } + + // Read the pointer to the PCI Data Structure at offset 0x18. + let pci_data_struct_ptr = u16::from_le_bytes([data[24], data[25]]); + + // Try to read optional fields if enough data. + let mut size_of_block = None; + let mut nbsi_data_offset = None; + + if data.len() >= 30 { + // Read size_of_block at offset 0x1A. + size_of_block = Some( + (data[29] as u32) << 24 + | (data[28] as u32) << 16 + | (data[27] as u32) << 8 + | (data[26] as u32), + ); + } + + // For NBSI images, try to read the nbsiDataOffset at offset 0x16. + if data.len() >= 24 { + nbsi_data_offset = Some(u16::from_le_bytes([data[22], data[23]])); + } + + Ok(PciRomHeader { + signature, + reserved: [0u8; 20], + pci_data_struct_offset: pci_data_struct_ptr, + size_of_block, + nbsi_data_offset, + }) + } +} + +/// NVIDIA PCI Data Extension Structure. +/// +/// This is similar to the PCI Data Structure, but is Nvidia-specific and is placed right after the +/// PCI Data Structure. It contains some fields that are redundant with the PCI Data Structure, but +/// are needed for traversing the BIOS images. It is expected to be present in all BIOS images +/// except for NBSI images. +#[derive(Debug, Clone)] +#[repr(C)] +struct NpdeStruct { + /// 00h: Signature ("NPDE") + signature: [u8; 4], + /// 04h: NVIDIA PCI Data Extension Revision + npci_data_ext_rev: u16, + /// 06h: NVIDIA PCI Data Extension Length + npci_data_ext_len: u16, + /// 08h: Sub-image Length (in 512-byte units) + subimage_len: u16, + /// 0Ah: Last image indicator flag + last_image: u8, +} + +impl NpdeStruct { + fn new(pdev: &pci::Device, data: &[u8]) -> Option { + if data.len() < core::mem::size_of::() { + dev_dbg!(pdev.as_ref(), "Not enough data for NpdeStruct\n"); + return None; + } + + let mut signature = [0u8; 4]; + signature.copy_from_slice(&data[0..4]); + + // Signature should be "NPDE" (0x4544504E). + if &signature != b"NPDE" { + dev_dbg!( + pdev.as_ref(), + "Invalid signature for NpdeStruct: {:?}\n", + signature + ); + return None; + } + + let subimage_len = u16::from_le_bytes([data[8], data[9]]); + if subimage_len == 0 { + dev_dbg!(pdev.as_ref(), "Invalid subimage length: 0\n"); + return None; + } + + Some(NpdeStruct { + signature, + npci_data_ext_rev: u16::from_le_bytes([data[4], data[5]]), + npci_data_ext_len: u16::from_le_bytes([data[6], data[7]]), + subimage_len, + last_image: data[10], + }) + } + + /// Check if this is the last image in the ROM. + fn is_last(&self) -> bool { + self.last_image & LAST_IMAGE_BIT_MASK != 0 + } + + /// Calculate image size in bytes from 512-byte blocks. + fn image_size_bytes(&self) -> usize { + self.subimage_len as usize * 512 + } + + /// Try to find NPDE in the data, the NPDE is right after the PCIR. + fn find_in_data( + pdev: &pci::Device, + data: &[u8], + rom_header: &PciRomHeader, + pcir: &PcirStruct, + ) -> Option { + // Calculate the offset where NPDE might be located + // NPDE should be right after the PCIR structure, aligned to 16 bytes + let pcir_offset = rom_header.pci_data_struct_offset as usize; + let npde_start = (pcir_offset + pcir.pci_data_struct_len as usize + 0x0F) & !0x0F; + + // Check if we have enough data + if npde_start + core::mem::size_of::() > data.len() { + dev_dbg!(pdev.as_ref(), "Not enough data for NPDE\n"); + return None; + } + + // Try to create NPDE from the data + NpdeStruct::new(pdev, &data[npde_start..]) + } +} + +// Use a macro to implement BiosImage enum and methods. This avoids having to +// repeat each enum type when implementing functions like base() in BiosImage. +macro_rules! bios_image { + ( + $($variant:ident: $class:ident),* $(,)? + ) => { + // BiosImage enum with variants for each image type + enum BiosImage { + $($variant($class)),* + } + + impl BiosImage { + /// Get a reference to the common BIOS image data regardless of type + fn base(&self) -> &BiosImageBase { + match self { + $(Self::$variant(img) => &img.base),* + } + } + + /// Returns a string representing the type of BIOS image + fn image_type_str(&self) -> &'static str { + match self { + $(Self::$variant(_) => stringify!($variant)),* + } + } + } + } +} + +impl BiosImage { + /// Check if this is the last image. + fn is_last(&self) -> bool { + let base = self.base(); + + // For NBSI images (type == 0x70), return true as they're + // considered the last image + if matches!(self, Self::Nbsi(_)) { + return true; + } + + // For other image types, check the NPDE first if available + if let Some(ref npde) = base.npde { + return npde.is_last(); + } + + // Otherwise, fall back to checking the PCIR last_image flag + base.pcir.is_last() + } + + /// Get the image size in bytes. + fn image_size_bytes(&self) -> usize { + let base = self.base(); + + // Prefer NPDE image size if available + if let Some(ref npde) = base.npde { + return npde.image_size_bytes(); + } + + // Otherwise, fall back to the PCIR image size + base.pcir.image_size_bytes() + } + + /// Create a [`BiosImageBase`] from a byte slice and convert it to a [`BiosImage`] which + /// triggers the constructor of the specific BiosImage enum variant. + fn new(pdev: &pci::Device, data: &[u8]) -> Result { + let base = BiosImageBase::new(pdev, data)?; + let image = base.into_image().inspect_err(|e| { + dev_err!(pdev.as_ref(), "Failed to create BiosImage: {:?}\n", e); + })?; + + Ok(image) + } +} + +bios_image! { + PciAt: PciAtBiosImage, // PCI-AT compatible BIOS image + Efi: EfiBiosImage, // EFI (Extensible Firmware Interface) + Nbsi: NbsiBiosImage, // NBSI (Nvidia Bios System Interface) + FwSec: FwSecBiosImage, // FWSEC (Firmware Security) +} + +struct PciAtBiosImage { + base: BiosImageBase, + // PCI-AT-specific fields can be added here in the future. +} + +struct EfiBiosImage { + base: BiosImageBase, + // EFI-specific fields can be added here in the future. +} + +struct NbsiBiosImage { + base: BiosImageBase, + // NBSI-specific fields can be added here in the future. +} + +struct FwSecBiosImage { + base: BiosImageBase, + // FWSEC-specific fields can be added here in the future. +} + +// Convert from BiosImageBase to BiosImage +impl TryFrom for BiosImage { + type Error = Error; + + fn try_from(base: BiosImageBase) -> Result { + match base.pcir.code_type { + 0x00 => Ok(BiosImage::PciAt(PciAtBiosImage { base })), + 0x03 => Ok(BiosImage::Efi(EfiBiosImage { base })), + 0x70 => Ok(BiosImage::Nbsi(NbsiBiosImage { base })), + 0xE0 => Ok(BiosImage::FwSec(FwSecBiosImage { base })), + _ => Err(EINVAL), + } + } +} + +/// BIOS Image structure containing various headers and reference fields to all BIOS images. +/// +/// Each BiosImage type has a BiosImageBase type along with other image-specific fields. Note that +/// Rust favors composition of types over inheritance. +#[derive(Debug)] +#[expect(dead_code)] +struct BiosImageBase { + /// PCI ROM Expansion Header + rom_header: PciRomHeader, + /// PCI Data Structure + pcir: PcirStruct, + /// NVIDIA PCI Data Extension (optional) + npde: Option, + /// Image data (includes ROM header and PCIR) + data: KVec, +} + +impl BiosImageBase { + fn into_image(self) -> Result { + BiosImage::try_from(self) + } + + /// Creates a new BiosImageBase from raw byte data. + fn new(pdev: &pci::Device, data: &[u8]) -> Result { + // Ensure we have enough data for the ROM header. + if data.len() < 26 { + dev_err!(pdev.as_ref(), "Not enough data for ROM header\n"); + return Err(EINVAL); + } + + // Parse the ROM header. + let rom_header = PciRomHeader::new(pdev, &data[0..26]) + .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PciRomHeader: {:?}\n", e))?; + + // Get the PCI Data Structure using the pointer from the ROM header. + let pcir_offset = rom_header.pci_data_struct_offset as usize; + let pcir_data = data + .get(pcir_offset..pcir_offset + core::mem::size_of::()) + .ok_or(EINVAL) + .inspect_err(|_| { + dev_err!( + pdev.as_ref(), + "PCIR offset {:#x} out of bounds (data length: {})\n", + pcir_offset, + data.len() + ); + dev_err!( + pdev.as_ref(), + "Consider reading more data for construction of BiosImage\n" + ); + })?; + + let pcir = PcirStruct::new(pdev, pcir_data) + .inspect_err(|e| dev_err!(pdev.as_ref(), "Failed to create PcirStruct: {:?}\n", e))?; + + // Look for NPDE structure if this is not an NBSI image (type != 0x70). + let npde = NpdeStruct::find_in_data(pdev, data, &rom_header, &pcir); + + // Create a copy of the data. + let mut data_copy = KVec::new(); + data_copy.extend_from_slice(data, GFP_KERNEL)?; + + Ok(BiosImageBase { + rom_header, + pcir, + npde, + data: data_copy, + }) + } +} From dc70c6ae2441c8ab5438331b2430ec098fdd94bb Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 19 Jun 2025 22:24:02 +0900 Subject: [PATCH 033/358] gpu: nova-core: vbios: Add support to look up PMU table in FWSEC The PMU table in the FWSEC image has to be located to locate the start of the Falcon ucode in the same or another FWSEC image. Add support for the same. Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-18-ecf41ef99252@nvidia.com [ Re-format and use markdown in comments. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/vbios.rs | 182 ++++++++++++++++++++++++++++++++- 1 file changed, 180 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index eafca96041ac..04c5f96844b4 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -333,6 +333,112 @@ fn image_size_bytes(&self) -> usize { } } +/// BIOS Information Table (BIT) Header. +/// +/// This is the head of the BIT table, that is used to locate the Falcon data. The BIT table (with +/// its header) is in the [`PciAtBiosImage`] and the falcon data it is pointing to is in the +/// [`FwSecBiosImage`]. +#[derive(Debug, Clone, Copy)] +#[expect(dead_code)] +struct BitHeader { + /// 0h: BIT Header Identifier (BMP=0x7FFF/BIT=0xB8FF) + id: u16, + /// 2h: BIT Header Signature ("BIT\0") + signature: [u8; 4], + /// 6h: Binary Coded Decimal Version, ex: 0x0100 is 1.00. + bcd_version: u16, + /// 8h: Size of BIT Header (in bytes) + header_size: u8, + /// 9h: Size of BIT Tokens (in bytes) + token_size: u8, + /// 10h: Number of token entries that follow + token_entries: u8, + /// 11h: BIT Header Checksum + checksum: u8, +} + +impl BitHeader { + fn new(data: &[u8]) -> Result { + if data.len() < 12 { + return Err(EINVAL); + } + + let mut signature = [0u8; 4]; + signature.copy_from_slice(&data[2..6]); + + // Check header ID and signature + let id = u16::from_le_bytes([data[0], data[1]]); + if id != 0xB8FF || &signature != b"BIT\0" { + return Err(EINVAL); + } + + Ok(BitHeader { + id, + signature, + bcd_version: u16::from_le_bytes([data[6], data[7]]), + header_size: data[8], + token_size: data[9], + token_entries: data[10], + checksum: data[11], + }) + } +} + +/// BIT Token Entry: Records in the BIT table followed by the BIT header. +#[derive(Debug, Clone, Copy)] +#[expect(dead_code)] +struct BitToken { + /// 00h: Token identifier + id: u8, + /// 01h: Version of the token data + data_version: u8, + /// 02h: Size of token data in bytes + data_size: u16, + /// 04h: Offset to the token data + data_offset: u16, +} + +// Define the token ID for the Falcon data +const BIT_TOKEN_ID_FALCON_DATA: u8 = 0x70; + +impl BitToken { + /// Find a BIT token entry by BIT ID in a PciAtBiosImage + fn from_id(image: &PciAtBiosImage, token_id: u8) -> Result { + let header = &image.bit_header; + + // Offset to the first token entry + let tokens_start = image.bit_offset + header.header_size as usize; + + for i in 0..header.token_entries as usize { + let entry_offset = tokens_start + (i * header.token_size as usize); + + // Make sure we don't go out of bounds + if entry_offset + header.token_size as usize > image.base.data.len() { + return Err(EINVAL); + } + + // Check if this token has the requested ID + if image.base.data[entry_offset] == token_id { + return Ok(BitToken { + id: image.base.data[entry_offset], + data_version: image.base.data[entry_offset + 1], + data_size: u16::from_le_bytes([ + image.base.data[entry_offset + 2], + image.base.data[entry_offset + 3], + ]), + data_offset: u16::from_le_bytes([ + image.base.data[entry_offset + 4], + image.base.data[entry_offset + 5], + ]), + }); + } + } + + // Token not found + Err(ENOENT) + } +} + /// PCI ROM Expansion Header as defined in PCI Firmware Specification. /// /// This is header is at the beginning of every image in the set of images in the ROM. It contains @@ -574,9 +680,13 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { FwSec: FwSecBiosImage, // FWSEC (Firmware Security) } +/// The PciAt BIOS image is typically the first BIOS image type found in the BIOS image chain. +/// +/// It contains the BIT header and the BIT tokens. struct PciAtBiosImage { base: BiosImageBase, - // PCI-AT-specific fields can be added here in the future. + bit_header: BitHeader, + bit_offset: usize, } struct EfiBiosImage { @@ -600,7 +710,7 @@ impl TryFrom for BiosImage { fn try_from(base: BiosImageBase) -> Result { match base.pcir.code_type { - 0x00 => Ok(BiosImage::PciAt(PciAtBiosImage { base })), + 0x00 => Ok(BiosImage::PciAt(base.try_into()?)), 0x03 => Ok(BiosImage::Efi(EfiBiosImage { base })), 0x70 => Ok(BiosImage::Nbsi(NbsiBiosImage { base })), 0xE0 => Ok(BiosImage::FwSec(FwSecBiosImage { base })), @@ -679,3 +789,71 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { }) } } + +impl PciAtBiosImage { + /// Find a byte pattern in a slice. + fn find_byte_pattern(haystack: &[u8], needle: &[u8]) -> Result { + haystack + .windows(needle.len()) + .position(|window| window == needle) + .ok_or(EINVAL) + } + + /// Find the BIT header in the [`PciAtBiosImage`]. + fn find_bit_header(data: &[u8]) -> Result<(BitHeader, usize)> { + let bit_pattern = [0xff, 0xb8, b'B', b'I', b'T', 0x00]; + let bit_offset = Self::find_byte_pattern(data, &bit_pattern)?; + let bit_header = BitHeader::new(&data[bit_offset..])?; + + Ok((bit_header, bit_offset)) + } + + /// Get a BIT token entry from the BIT table in the [`PciAtBiosImage`] + fn get_bit_token(&self, token_id: u8) -> Result { + BitToken::from_id(self, token_id) + } + + /// Find the Falcon data pointer structure in the [`PciAtBiosImage`]. + /// + /// This is just a 4 byte structure that contains a pointer to the Falcon data in the FWSEC + /// image. + fn falcon_data_ptr(&self, pdev: &pci::Device) -> Result { + let token = self.get_bit_token(BIT_TOKEN_ID_FALCON_DATA)?; + + // Make sure we don't go out of bounds + if token.data_offset as usize + 4 > self.base.data.len() { + return Err(EINVAL); + } + + // read the 4 bytes at the offset specified in the token + let offset = token.data_offset as usize; + let bytes: [u8; 4] = self.base.data[offset..offset + 4].try_into().map_err(|_| { + dev_err!(pdev.as_ref(), "Failed to convert data slice to array"); + EINVAL + })?; + + let data_ptr = u32::from_le_bytes(bytes); + + if (data_ptr as usize) < self.base.data.len() { + dev_err!(pdev.as_ref(), "Falcon data pointer out of bounds\n"); + return Err(EINVAL); + } + + Ok(data_ptr) + } +} + +impl TryFrom for PciAtBiosImage { + type Error = Error; + + fn try_from(base: BiosImageBase) -> Result { + let data_slice = &base.data; + let (bit_header, bit_offset) = PciAtBiosImage::find_bit_header(data_slice)?; + + Ok(PciAtBiosImage { + base, + bit_header, + bit_offset, + }) + } +} From 47c4846e4319503e9cd0bd0922facd0fb1e383d0 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 19 Jun 2025 22:24:03 +0900 Subject: [PATCH 034/358] gpu: nova-core: vbios: Add support for FWSEC ucode extraction Using the support for navigating the VBIOS, add support to extract vBIOS ucode data required for GSP to boot. The main data extracted from the vBIOS is the FWSEC-FRTS firmware which runs on the GSP processor. This firmware runs in high secure mode, and sets up the WPR2 (Write protected region) before the Booter runs on the SEC2 processor. Tested on my Ampere GA102 and boot is successful. Cc: Alexandre Courbot Cc: John Hubbard Cc: Shirish Baskaran Cc: Alistair Popple Cc: Timur Tabi Cc: Ben Skeggs Signed-off-by: Joel Fernandes [ acourbot@nvidia.com: remove now-unneeded Devres acquisition ] Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-19-ecf41ef99252@nvidia.com [ Re-format and use markdown in comments. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/firmware.rs | 2 - drivers/gpu/nova-core/vbios.rs | 313 +++++++++++++++++++++++++++++- 2 files changed, 304 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 41f43a729ad3..e5583925cb3b 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -44,7 +44,6 @@ pub(crate) fn new(dev: &device::Device, chipset: Chipset, ver: &str) -> Result usize { const HDR_SIZE_SHIFT: u32 = 16; const HDR_SIZE_MASK: u32 = 0xffff0000; diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 04c5f96844b4..15b7ce5c663e 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -6,7 +6,9 @@ #![expect(dead_code)] use crate::driver::Bar0; +use crate::firmware::FalconUCodeDescV3; use core::convert::TryFrom; +use kernel::device; use kernel::error::Result; use kernel::pci; use kernel::prelude::*; @@ -195,8 +197,8 @@ impl Vbios { pub(crate) fn new(pdev: &pci::Device, bar0: &Bar0) -> Result { // Images to extract from iteration let mut pci_at_image: Option = None; - let mut first_fwsec_image: Option = None; - let mut second_fwsec_image: Option = None; + let mut first_fwsec_image: Option = None; + let mut second_fwsec_image: Option = None; // Parse all VBIOS images in the ROM for image_result in VbiosIterator::new(pdev, bar0)? { @@ -230,12 +232,14 @@ pub(crate) fn new(pdev: &pci::Device, bar0: &Bar0) -> Result { } // Using all the images, setup the falcon data pointer in Fwsec. - // These are temporarily unused images and will be used in later patches. - if let (Some(second), Some(_first), Some(_pci_at)) = + if let (Some(mut second), Some(first), Some(pci_at)) = (second_fwsec_image, first_fwsec_image, pci_at_image) { + second + .setup_falcon_data(pdev, &pci_at, &first) + .inspect_err(|e| dev_err!(pdev.as_ref(), "Falcon data setup failed: {:?}\n", e))?; Ok(Vbios { - fwsec_image: second, + fwsec_image: second.build(pdev)?, }) } else { dev_err!( @@ -245,6 +249,10 @@ pub(crate) fn new(pdev: &pci::Device, bar0: &Bar0) -> Result { Err(EINVAL) } } + + pub(crate) fn fwsec_image(&self) -> &FwSecBiosImage { + &self.fwsec_image + } } /// PCI Data Structure as defined in PCI Firmware Specification @@ -677,7 +685,7 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { PciAt: PciAtBiosImage, // PCI-AT compatible BIOS image Efi: EfiBiosImage, // EFI (Extensible Firmware Interface) Nbsi: NbsiBiosImage, // NBSI (Nvidia Bios System Interface) - FwSec: FwSecBiosImage, // FWSEC (Firmware Security) + FwSec: FwSecBiosBuilder, // FWSEC (Firmware Security) } /// The PciAt BIOS image is typically the first BIOS image type found in the BIOS image chain. @@ -699,9 +707,29 @@ struct NbsiBiosImage { // NBSI-specific fields can be added here in the future. } -struct FwSecBiosImage { +struct FwSecBiosBuilder { base: BiosImageBase, - // FWSEC-specific fields can be added here in the future. + /// These are temporary fields that are used during the construction of the + /// [`FwSecBiosBuilder`]. + /// + /// Once FwSecBiosBuilder is constructed, the `falcon_ucode_offset` will be copied into a new + /// [`FwSecBiosImage`]. + /// + /// The offset of the Falcon data from the start of Fwsec image. + falcon_data_offset: Option, + /// The [`PmuLookupTable`] starts at the offset of the falcon data pointer. + pmu_lookup_table: Option, + /// The offset of the Falcon ucode. + falcon_ucode_offset: Option, +} + +/// The [`FwSecBiosImage`] structure contains the PMU table and the Falcon Ucode. +/// +/// The PMU table contains voltage/frequency tables as well as a pointer to the Falcon Ucode. +pub(crate) struct FwSecBiosImage { + base: BiosImageBase, + /// The offset of the Falcon ucode. + falcon_ucode_offset: usize, } // Convert from BiosImageBase to BiosImage @@ -713,7 +741,12 @@ fn try_from(base: BiosImageBase) -> Result { 0x00 => Ok(BiosImage::PciAt(base.try_into()?)), 0x03 => Ok(BiosImage::Efi(EfiBiosImage { base })), 0x70 => Ok(BiosImage::Nbsi(NbsiBiosImage { base })), - 0xE0 => Ok(BiosImage::FwSec(FwSecBiosImage { base })), + 0xE0 => Ok(BiosImage::FwSec(FwSecBiosBuilder { + base, + falcon_data_offset: None, + pmu_lookup_table: None, + falcon_ucode_offset: None, + })), _ => Err(EINVAL), } } @@ -857,3 +890,265 @@ fn try_from(base: BiosImageBase) -> Result { }) } } + +/// The [`PmuLookupTableEntry`] structure is a single entry in the [`PmuLookupTable`]. +/// +/// See the [`PmuLookupTable`] description for more information. +#[expect(dead_code)] +struct PmuLookupTableEntry { + application_id: u8, + target_id: u8, + data: u32, +} + +impl PmuLookupTableEntry { + fn new(data: &[u8]) -> Result { + if data.len() < 5 { + return Err(EINVAL); + } + + Ok(PmuLookupTableEntry { + application_id: data[0], + target_id: data[1], + data: u32::from_le_bytes(data[2..6].try_into().map_err(|_| EINVAL)?), + }) + } +} + +/// The [`PmuLookupTableEntry`] structure is used to find the [`PmuLookupTableEntry`] for a given +/// application ID. +/// +/// The table of entries is pointed to by the falcon data pointer in the BIT table, and is used to +/// locate the Falcon Ucode. +#[expect(dead_code)] +struct PmuLookupTable { + version: u8, + header_len: u8, + entry_len: u8, + entry_count: u8, + table_data: KVec, +} + +impl PmuLookupTable { + fn new(pdev: &pci::Device, data: &[u8]) -> Result { + if data.len() < 4 { + return Err(EINVAL); + } + + let header_len = data[1] as usize; + let entry_len = data[2] as usize; + let entry_count = data[3] as usize; + + let required_bytes = header_len + (entry_count * entry_len); + + if data.len() < required_bytes { + dev_err!( + pdev.as_ref(), + "PmuLookupTable data length less than required\n" + ); + return Err(EINVAL); + } + + // Create a copy of only the table data + let table_data = { + let mut ret = KVec::new(); + ret.extend_from_slice(&data[header_len..required_bytes], GFP_KERNEL)?; + ret + }; + + // Debug logging of entries (dumps the table data to dmesg) + for i in (header_len..required_bytes).step_by(entry_len) { + dev_dbg!( + pdev.as_ref(), + "PMU entry: {:02x?}\n", + &data[i..][..entry_len] + ); + } + + Ok(PmuLookupTable { + version: data[0], + header_len: header_len as u8, + entry_len: entry_len as u8, + entry_count: entry_count as u8, + table_data, + }) + } + + fn lookup_index(&self, idx: u8) -> Result { + if idx >= self.entry_count { + return Err(EINVAL); + } + + let index = (idx as usize) * self.entry_len as usize; + PmuLookupTableEntry::new(&self.table_data[index..]) + } + + // find entry by type value + fn find_entry_by_type(&self, entry_type: u8) -> Result { + for i in 0..self.entry_count { + let entry = self.lookup_index(i)?; + if entry.application_id == entry_type { + return Ok(entry); + } + } + + Err(EINVAL) + } +} + +impl FwSecBiosBuilder { + fn setup_falcon_data( + &mut self, + pdev: &pci::Device, + pci_at_image: &PciAtBiosImage, + first_fwsec: &FwSecBiosBuilder, + ) -> Result { + let mut offset = pci_at_image.falcon_data_ptr(pdev)? as usize; + let mut pmu_in_first_fwsec = false; + + // The falcon data pointer assumes that the PciAt and FWSEC images + // are contiguous in memory. However, testing shows the EFI image sits in + // between them. So calculate the offset from the end of the PciAt image + // rather than the start of it. Compensate. + offset -= pci_at_image.base.data.len(); + + // The offset is now from the start of the first Fwsec image, however + // the offset points to a location in the second Fwsec image. Since + // the fwsec images are contiguous, subtract the length of the first Fwsec + // image from the offset to get the offset to the start of the second + // Fwsec image. + if offset < first_fwsec.base.data.len() { + pmu_in_first_fwsec = true; + } else { + offset -= first_fwsec.base.data.len(); + } + + self.falcon_data_offset = Some(offset); + + if pmu_in_first_fwsec { + self.pmu_lookup_table = + Some(PmuLookupTable::new(pdev, &first_fwsec.base.data[offset..])?); + } else { + self.pmu_lookup_table = Some(PmuLookupTable::new(pdev, &self.base.data[offset..])?); + } + + match self + .pmu_lookup_table + .as_ref() + .ok_or(EINVAL)? + .find_entry_by_type(FALCON_UCODE_ENTRY_APPID_FWSEC_PROD) + { + Ok(entry) => { + let mut ucode_offset = entry.data as usize; + ucode_offset -= pci_at_image.base.data.len(); + if ucode_offset < first_fwsec.base.data.len() { + dev_err!(pdev.as_ref(), "Falcon Ucode offset not in second Fwsec.\n"); + return Err(EINVAL); + } + ucode_offset -= first_fwsec.base.data.len(); + self.falcon_ucode_offset = Some(ucode_offset); + } + Err(e) => { + dev_err!( + pdev.as_ref(), + "PmuLookupTableEntry not found, error: {:?}\n", + e + ); + return Err(EINVAL); + } + } + Ok(()) + } + + /// Build the final FwSecBiosImage from this builder + fn build(self, pdev: &pci::Device) -> Result { + let ret = FwSecBiosImage { + base: self.base, + falcon_ucode_offset: self.falcon_ucode_offset.ok_or(EINVAL)?, + }; + + if cfg!(debug_assertions) { + // Print the desc header for debugging + let desc = ret.header(pdev.as_ref())?; + dev_dbg!(pdev.as_ref(), "PmuLookupTableEntry desc: {:#?}\n", desc); + } + + Ok(ret) + } +} + +impl FwSecBiosImage { + /// Get the FwSec header ([`FalconUCodeDescV3`]). + pub(crate) fn header(&self, dev: &device::Device) -> Result<&FalconUCodeDescV3> { + // Get the falcon ucode offset that was found in setup_falcon_data. + let falcon_ucode_offset = self.falcon_ucode_offset; + + // Make sure the offset is within the data bounds. + if falcon_ucode_offset + core::mem::size_of::() > self.base.data.len() { + dev_err!(dev, "fwsec-frts header not contained within BIOS bounds\n"); + return Err(ERANGE); + } + + // Read the first 4 bytes to get the version. + let hdr_bytes: [u8; 4] = self.base.data[falcon_ucode_offset..falcon_ucode_offset + 4] + .try_into() + .map_err(|_| EINVAL)?; + let hdr = u32::from_le_bytes(hdr_bytes); + let ver = (hdr & 0xff00) >> 8; + + if ver != 3 { + dev_err!(dev, "invalid fwsec firmware version: {:?}\n", ver); + return Err(EINVAL); + } + + // Return a reference to the FalconUCodeDescV3 structure. + // + // SAFETY: We have checked that `falcon_ucode_offset + size_of::` is + // within the bounds of `data`. Also, this data vector is from ROM, and the `data` field + // in `BiosImageBase` is immutable after construction. + Ok(unsafe { + &*(self + .base + .data + .as_ptr() + .add(falcon_ucode_offset) + .cast::()) + }) + } + + /// Get the ucode data as a byte slice + pub(crate) fn ucode(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Result<&[u8]> { + let falcon_ucode_offset = self.falcon_ucode_offset; + + // The ucode data follows the descriptor. + let ucode_data_offset = falcon_ucode_offset + desc.size(); + let size = (desc.imem_load_size + desc.dmem_load_size) as usize; + + // Get the data slice, checking bounds in a single operation. + self.base + .data + .get(ucode_data_offset..ucode_data_offset + size) + .ok_or(ERANGE) + .inspect_err(|_| dev_err!(dev, "fwsec ucode data not contained within BIOS bounds\n")) + } + + /// Get the signatures as a byte slice + pub(crate) fn sigs(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Result<&[u8]> { + const SIG_SIZE: usize = 96 * 4; + + // The signatures data follows the descriptor. + let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::(); + let size = desc.signature_count as usize * SIG_SIZE; + + // Make sure the data is within bounds. + if sigs_data_offset + size > self.base.data.len() { + dev_err!( + dev, + "fwsec signatures data not contained within BIOS bounds\n" + ); + return Err(ERANGE); + } + + Ok(&self.base.data[sigs_data_offset..sigs_data_offset + size]) + } +} From 80213934d00fe09d9dcef3d6f17250be131435aa Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:24:04 +0900 Subject: [PATCH 035/358] gpu: nova-core: compute layout of the FRTS region FWSEC-FRTS is run with the desired address of the FRTS region as parameter, which we need to compute depending on some hardware parameters. Do this in a `FbLayout` structure, that will be later extended to describe more memory regions used to boot the GSP. Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-20-ecf41ef99252@nvidia.com [ In doc-comment of FbLayout s/bootup process/boot process/ - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/fb.rs | 70 ++++++++++++++++++++++++ drivers/gpu/nova-core/fb/hal.rs | 12 ++++- drivers/gpu/nova-core/fb/hal/ga100.rs | 12 +++++ drivers/gpu/nova-core/fb/hal/ga102.rs | 36 +++++++++++++ drivers/gpu/nova-core/fb/hal/tu102.rs | 16 ++++++ drivers/gpu/nova-core/gpu.rs | 4 ++ drivers/gpu/nova-core/regs.rs | 76 +++++++++++++++++++++++++++ 7 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/nova-core/fb/hal/ga102.rs diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index bfe3ed063572..5719de5c4759 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -1,12 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 +use core::ops::Range; + use kernel::prelude::*; +use kernel::sizes::*; use kernel::types::ARef; use kernel::{dev_warn, device}; use crate::dma::DmaObject; use crate::driver::Bar0; use crate::gpu::Chipset; +use crate::regs; mod hal; @@ -65,3 +69,69 @@ pub(crate) fn unregister(&self, bar: &Bar0) { } } } + +/// Layout of the GPU framebuffer memory. +/// +/// Contains ranges of GPU memory reserved for a given purpose during the GSP boot process. +#[derive(Debug)] +#[expect(dead_code)] +pub(crate) struct FbLayout { + pub(crate) fb: Range, + pub(crate) vga_workspace: Range, + pub(crate) frts: Range, +} + +impl FbLayout { + /// Computes the FB layout. + pub(crate) fn new(chipset: Chipset, bar: &Bar0) -> Result { + let hal = hal::fb_hal(chipset); + + let fb = { + let fb_size = hal.vidmem_size(bar); + + 0..fb_size + }; + + let vga_workspace = { + let vga_base = { + const NV_PRAMIN_SIZE: u64 = SZ_1M as u64; + let base = fb.end - NV_PRAMIN_SIZE; + + if hal.supports_display(bar) { + match regs::NV_PDISP_VGA_WORKSPACE_BASE::read(bar).vga_workspace_addr() { + Some(addr) => { + if addr < base { + const VBIOS_WORKSPACE_SIZE: u64 = SZ_128K as u64; + + // Point workspace address to end of framebuffer. + fb.end - VBIOS_WORKSPACE_SIZE + } else { + addr + } + } + None => base, + } + } else { + base + } + }; + + vga_base..fb.end + }; + + let frts = { + const FRTS_DOWN_ALIGN: u64 = SZ_128K as u64; + const FRTS_SIZE: u64 = SZ_1M as u64; + // TODO: replace with `align_down` once it lands. + let frts_base = (vga_workspace.start & !(FRTS_DOWN_ALIGN - 1)) - FRTS_SIZE; + + frts_base..frts_base + FRTS_SIZE + }; + + Ok(Self { + fb, + vga_workspace, + frts, + }) + } +} diff --git a/drivers/gpu/nova-core/fb/hal.rs b/drivers/gpu/nova-core/fb/hal.rs index 23eab57eec9f..2f914948bb9a 100644 --- a/drivers/gpu/nova-core/fb/hal.rs +++ b/drivers/gpu/nova-core/fb/hal.rs @@ -6,6 +6,7 @@ use crate::gpu::Chipset; mod ga100; +mod ga102; mod tu102; pub(crate) trait FbHal { @@ -16,6 +17,12 @@ pub(crate) trait FbHal { /// /// This might fail if the address is too large for the receiving register. fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result; + + /// Returns `true` is display is supported. + fn supports_display(&self, bar: &Bar0) -> bool; + + /// Returns the VRAM size, in bytes. + fn vidmem_size(&self, bar: &Bar0) -> u64; } /// Returns the HAL corresponding to `chipset`. @@ -24,8 +31,9 @@ pub(super) fn fb_hal(chipset: Chipset) -> &'static dyn FbHal { match chipset { TU102 | TU104 | TU106 | TU117 | TU116 => tu102::TU102_HAL, - GA100 | GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => { - ga100::GA100_HAL + GA100 => ga100::GA100_HAL, + GA102 | GA103 | GA104 | GA106 | GA107 | AD102 | AD103 | AD104 | AD106 | AD107 => { + ga102::GA102_HAL } } } diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs index 7c10436c1c59..4827721c9860 100644 --- a/drivers/gpu/nova-core/fb/hal/ga100.rs +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -25,6 +25,10 @@ pub(super) fn write_sysmem_flush_page_ga100(bar: &Bar0, addr: u64) { .write(bar); } +pub(super) fn display_enabled_ga100(bar: &Bar0) -> bool { + !regs::ga100::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() +} + /// Shift applied to the sysmem address before it is written into /// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI`, const FLUSH_SYSMEM_ADDR_SHIFT_HI: u32 = 40; @@ -39,6 +43,14 @@ fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { Ok(()) } + + fn supports_display(&self, bar: &Bar0) -> bool { + display_enabled_ga100(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + super::tu102::vidmem_size_gp102(bar) + } } const GA100: Ga100 = Ga100; diff --git a/drivers/gpu/nova-core/fb/hal/ga102.rs b/drivers/gpu/nova-core/fb/hal/ga102.rs new file mode 100644 index 000000000000..a73b77e39715 --- /dev/null +++ b/drivers/gpu/nova-core/fb/hal/ga102.rs @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +use kernel::prelude::*; + +use crate::driver::Bar0; +use crate::fb::hal::FbHal; +use crate::regs; + +fn vidmem_size_ga102(bar: &Bar0) -> u64 { + regs::NV_USABLE_FB_SIZE_IN_MB::read(bar).usable_fb_size() +} + +struct Ga102; + +impl FbHal for Ga102 { + fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { + super::ga100::read_sysmem_flush_page_ga100(bar) + } + + fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { + super::ga100::write_sysmem_flush_page_ga100(bar, addr); + + Ok(()) + } + + fn supports_display(&self, bar: &Bar0) -> bool { + super::ga100::display_enabled_ga100(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + vidmem_size_ga102(bar) + } +} + +const GA102: Ga102 = Ga102; +pub(super) const GA102_HAL: &dyn FbHal = &GA102; diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs index 048859f9fd9d..6f8ae58e9481 100644 --- a/drivers/gpu/nova-core/fb/hal/tu102.rs +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -26,6 +26,14 @@ pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { } } +pub(super) fn display_enabled_gm107(bar: &Bar0) -> bool { + !regs::gm107::NV_FUSE_STATUS_OPT_DISPLAY::read(bar).display_disabled() +} + +pub(super) fn vidmem_size_gp102(bar: &Bar0) -> u64 { + regs::NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE::read(bar).usable_fb_size() +} + struct Tu102; impl FbHal for Tu102 { @@ -36,6 +44,14 @@ fn read_sysmem_flush_page(&self, bar: &Bar0) -> u64 { fn write_sysmem_flush_page(&self, bar: &Bar0, addr: u64) -> Result { write_sysmem_flush_page_gm107(bar, addr) } + + fn supports_display(&self, bar: &Bar0) -> bool { + display_enabled_gm107(bar) + } + + fn vidmem_size(&self, bar: &Bar0) -> u64 { + vidmem_size_gp102(bar) + } } const TU102: Tu102 = Tu102; diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 3c64d2b43883..a748ce689e61 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -4,6 +4,7 @@ use crate::driver::Bar0; use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; +use crate::fb::FbLayout; use crate::fb::SysmemFlush; use crate::firmware::{Firmware, FIRMWARE_VERSION}; use crate::gfw; @@ -215,6 +216,9 @@ pub(crate) fn new( let _sec2_falcon = Falcon::::new(pdev.as_ref(), spec.chipset, bar, true)?; + let fb_layout = FbLayout::new(spec.chipset, bar)?; + dev_dbg!(pdev.as_ref(), "{:#x?}\n", fb_layout); + // Will be used in a later patch when fwsec firmware is needed. let _bios = Vbios::new(pdev, bar)?; diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index a2f449eb08b5..8ca7bcb5a93f 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -52,6 +52,27 @@ pub(crate) fn chipset(self) -> Result { 23:0 adr_63_40 as u32; }); +register!(NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE @ 0x00100ce0 { + 3:0 lower_scale as u8; + 9:4 lower_mag as u8; + 30:30 ecc_mode_enabled as bool; +}); + +impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { + /// Returns the usable framebuffer size, in bytes. + pub(crate) fn usable_fb_size(self) -> u64 { + let size = ((self.lower_mag() as u64) << (self.lower_scale() as u64)) + * kernel::sizes::SZ_1M as u64; + + if self.ecc_mode_enabled() { + // Remove the amount of memory reserved for ECC (one per 16 units). + size / 16 * 15 + } else { + size + } + } +} + /* PGC6 */ register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128 { @@ -77,6 +98,42 @@ pub(crate) fn completed(self) -> bool { } } +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_42 @ 0x001183a4 { + 31:0 value as u32; +}); + +register!( + NV_USABLE_FB_SIZE_IN_MB => NV_PGC6_AON_SECURE_SCRATCH_GROUP_42, + "Scratch group 42 register used as framebuffer size" { + 31:0 value as u32, "Usable framebuffer size, in megabytes"; + } +); + +impl NV_USABLE_FB_SIZE_IN_MB { + /// Returns the usable framebuffer size, in bytes. + pub(crate) fn usable_fb_size(self) -> u64 { + u64::from(self.value()) * kernel::sizes::SZ_1M as u64 + } +} + +/* PDISP */ + +register!(NV_PDISP_VGA_WORKSPACE_BASE @ 0x00625f04 { + 3:3 status_valid as bool, "Set if the `addr` field is valid"; + 31:8 addr as u32, "VGA workspace base address divided by 0x10000"; +}); + +impl NV_PDISP_VGA_WORKSPACE_BASE { + /// Returns the base address of the VGA workspace, or `None` if none exists. + pub(crate) fn vga_workspace_addr(self) -> Option { + if self.status_valid() { + Some((self.addr() as u64) << 16) + } else { + None + } + } +} + /* FUSE */ register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100 { @@ -218,3 +275,22 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { 4:4 core_select as bool => PeregrineCoreSelect; 8:8 br_fetch as bool; }); + +// The modules below provide registers that are not identical on all supported chips. They should +// only be used in HAL modules. + +pub(crate) mod gm107 { + /* FUSE */ + + register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00021c04 { + 0:0 display_disabled as bool; + }); +} + +pub(crate) mod ga100 { + /* FUSE */ + + register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00820c04 { + 0:0 display_disabled as bool; + }); +} From 436884511d54b8a090fe2886d48f72143ce95b67 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:24:05 +0900 Subject: [PATCH 036/358] gpu: nova-core: add types for patching firmware binaries Some of the firmwares need to be patched at load-time with a signature. Add a couple of types and traits that sub-modules can use to implement this behavior, while ensuring that the correct kind of signature is applied to the firmware. Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-21-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/firmware.rs | 64 +++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index e5583925cb3b..09e6bd104a6c 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -3,11 +3,15 @@ //! Contains structures and functions dedicated to the parsing, building and patching of firmwares //! to be loaded into a given execution unit. +use core::marker::PhantomData; + use kernel::device; use kernel::firmware; use kernel::prelude::*; use kernel::str::CString; +use crate::dma::DmaObject; +use crate::falcon::FalconFirmware; use crate::gpu; use crate::gpu::Chipset; @@ -84,6 +88,66 @@ pub(crate) fn size(&self) -> usize { } } +/// Trait implemented by types defining the signed state of a firmware. +trait SignedState {} + +/// Type indicating that the firmware must be signed before it can be used. +struct Unsigned; +impl SignedState for Unsigned {} + +/// Type indicating that the firmware is signed and ready to be loaded. +struct Signed; +impl SignedState for Signed {} + +/// A [`DmaObject`] containing a specific microcode ready to be loaded into a falcon. +/// +/// This is module-local and meant for sub-modules to use internally. +/// +/// After construction, a firmware is [`Unsigned`], and must generally be patched with a signature +/// before it can be loaded (with an exception for development hardware). The +/// [`Self::patch_signature`] and [`Self::no_patch_signature`] methods are used to transition the +/// firmware to its [`Signed`] state. +struct FirmwareDmaObject(DmaObject, PhantomData<(F, S)>); + +/// Trait for signatures to be patched directly into a given firmware. +/// +/// This is module-local and meant for sub-modules to use internally. +trait FirmwareSignature: AsRef<[u8]> {} + +#[expect(unused)] +impl FirmwareDmaObject { + /// Patches the firmware at offset `sig_base_img` with `signature`. + fn patch_signature>( + mut self, + signature: &S, + sig_base_img: usize, + ) -> Result> { + let signature_bytes = signature.as_ref(); + if sig_base_img + signature_bytes.len() > self.0.size() { + return Err(EINVAL); + } + + // SAFETY: We are the only user of this object, so there cannot be any race. + let dst = unsafe { self.0.start_ptr_mut().add(sig_base_img) }; + + // SAFETY: `signature` and `dst` are valid, properly aligned, and do not overlap. + unsafe { + core::ptr::copy_nonoverlapping(signature_bytes.as_ptr(), dst, signature_bytes.len()) + }; + + Ok(FirmwareDmaObject(self.0, PhantomData)) + } + + /// Mark the firmware as signed without patching it. + /// + /// This method is used to explicitly confirm that we do not need to sign the firmware, while + /// allowing us to continue as if it was. This is typically only needed for development + /// hardware. + fn no_patch_signature(self) -> FirmwareDmaObject { + FirmwareDmaObject(self.0, PhantomData) + } +} + pub(crate) struct ModInfoBuilder(firmware::ModInfoBuilder); impl ModInfoBuilder { From 31f0feefb49a219687a1233e68146db834417d75 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:24:06 +0900 Subject: [PATCH 037/358] gpu: nova-core: extract FWSEC from BIOS and patch it to run FWSEC-FRTS The FWSEC firmware needs to be extracted from the VBIOS and patched with the desired command, as well as the right signature. Do this so we are ready to load and run this firmware into the GSP falcon and create the FRTS region. [joelagnelf@nvidia.com: give better names to FalconAppifHdrV1's fields] Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-22-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/dma.rs | 3 - drivers/gpu/nova-core/firmware.rs | 3 +- drivers/gpu/nova-core/firmware/fwsec.rs | 398 ++++++++++++++++++++++++ drivers/gpu/nova-core/gpu.rs | 15 +- drivers/gpu/nova-core/vbios.rs | 30 +- 5 files changed, 434 insertions(+), 15 deletions(-) create mode 100644 drivers/gpu/nova-core/firmware/fwsec.rs diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs index 4b063aaef65e..1f1f8c378d8e 100644 --- a/drivers/gpu/nova-core/dma.rs +++ b/drivers/gpu/nova-core/dma.rs @@ -2,9 +2,6 @@ //! Simple DMA object wrapper. -// To be removed when all code is used. -#![expect(dead_code)] - use core::ops::{Deref, DerefMut}; use kernel::device; diff --git a/drivers/gpu/nova-core/firmware.rs b/drivers/gpu/nova-core/firmware.rs index 09e6bd104a6c..0fdece652587 100644 --- a/drivers/gpu/nova-core/firmware.rs +++ b/drivers/gpu/nova-core/firmware.rs @@ -15,6 +15,8 @@ use crate::gpu; use crate::gpu::Chipset; +pub(crate) mod fwsec; + pub(crate) const FIRMWARE_VERSION: &str = "535.113.01"; /// Structure encapsulating the firmware blobs required for the GPU to operate. @@ -114,7 +116,6 @@ impl SignedState for Signed {} /// This is module-local and meant for sub-modules to use internally. trait FirmwareSignature: AsRef<[u8]> {} -#[expect(unused)] impl FirmwareDmaObject { /// Patches the firmware at offset `sig_base_img` with `signature`. fn patch_signature>( diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs new file mode 100644 index 000000000000..de179c201139 --- /dev/null +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! FWSEC is a High Secure firmware that is extracted from the BIOS and performs the first step of +//! the GSP startup by creating the WPR2 memory region and copying critical areas of the VBIOS into +//! it after authenticating them, ensuring they haven't been tampered with. It runs on the GSP +//! falcon. +//! +//! Before being run, it needs to be patched in two areas: +//! +//! - The command to be run, as this firmware can perform several tasks ; +//! - The ucode signature, so the GSP falcon can run FWSEC in HS mode. + +use core::marker::PhantomData; +use core::mem::{align_of, size_of}; +use core::ops::Deref; + +use kernel::device::{self, Device}; +use kernel::prelude::*; +use kernel::transmute::FromBytes; + +use crate::dma::DmaObject; +use crate::driver::Bar0; +use crate::falcon::gsp::Gsp; +use crate::falcon::{Falcon, FalconBromParams, FalconFirmware, FalconLoadParams, FalconLoadTarget}; +use crate::firmware::{FalconUCodeDescV3, FirmwareDmaObject, FirmwareSignature, Signed, Unsigned}; +use crate::vbios::Vbios; + +const NVFW_FALCON_APPIF_ID_DMEMMAPPER: u32 = 0x4; + +#[repr(C)] +#[derive(Debug)] +struct FalconAppifHdrV1 { + version: u8, + header_size: u8, + entry_size: u8, + entry_count: u8, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifHdrV1 {} + +#[repr(C, packed)] +#[derive(Debug)] +struct FalconAppifV1 { + id: u32, + dmem_base: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifV1 {} + +#[derive(Debug)] +#[repr(C, packed)] +struct FalconAppifDmemmapperV3 { + signature: u32, + version: u16, + size: u16, + cmd_in_buffer_offset: u32, + cmd_in_buffer_size: u32, + cmd_out_buffer_offset: u32, + cmd_out_buffer_size: u32, + nvf_img_data_buffer_offset: u32, + nvf_img_data_buffer_size: u32, + printf_buffer_hdr: u32, + ucode_build_time_stamp: u32, + ucode_signature: u32, + init_cmd: u32, + ucode_feature: u32, + ucode_cmd_mask0: u32, + ucode_cmd_mask1: u32, + multi_tgt_tbl: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FalconAppifDmemmapperV3 {} + +#[derive(Debug)] +#[repr(C, packed)] +struct ReadVbios { + ver: u32, + hdr: u32, + addr: u64, + size: u32, + flags: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for ReadVbios {} + +#[derive(Debug)] +#[repr(C, packed)] +struct FrtsRegion { + ver: u32, + hdr: u32, + addr: u32, + size: u32, + ftype: u32, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FrtsRegion {} + +const NVFW_FRTS_CMD_REGION_TYPE_FB: u32 = 2; + +#[repr(C, packed)] +struct FrtsCmd { + read_vbios: ReadVbios, + frts_region: FrtsRegion, +} +// SAFETY: any byte sequence is valid for this struct. +unsafe impl FromBytes for FrtsCmd {} + +const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS: u32 = 0x15; +const NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB: u32 = 0x19; + +/// Command for the [`FwsecFirmware`] to execute. +pub(crate) enum FwsecCommand { + /// Asks [`FwsecFirmware`] to carve out the WPR2 area and place a verified copy of the VBIOS + /// image into it. + Frts { frts_addr: u64, frts_size: u64 }, + /// Asks [`FwsecFirmware`] to load pre-OS apps on the PMU. + #[expect(dead_code)] + Sb, +} + +/// Size of the signatures used in FWSEC. +const BCRT30_RSA3K_SIG_SIZE: usize = 384; + +/// A single signature that can be patched into a FWSEC image. +#[repr(transparent)] +pub(crate) struct Bcrt30Rsa3kSignature([u8; BCRT30_RSA3K_SIG_SIZE]); + +/// SAFETY: A signature is just an array of bytes. +unsafe impl FromBytes for Bcrt30Rsa3kSignature {} + +impl From<[u8; BCRT30_RSA3K_SIG_SIZE]> for Bcrt30Rsa3kSignature { + fn from(sig: [u8; BCRT30_RSA3K_SIG_SIZE]) -> Self { + Self(sig) + } +} + +impl AsRef<[u8]> for Bcrt30Rsa3kSignature { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl FirmwareSignature for Bcrt30Rsa3kSignature {} + +/// Reinterpret the area starting from `offset` in `fw` as an instance of `T` (which must implement +/// [`FromBytes`]) and return a reference to it. +/// +/// # Safety +/// +/// Callers must ensure that the region of memory returned is not written for as long as the +/// returned reference is alive. +/// +/// TODO: Remove this and `transmute_mut` once `CoherentAllocation::as_slice` is available and we +/// have a way to transmute objects implementing FromBytes, e.g.: +/// https://lore.kernel.org/lkml/20250330234039.29814-1-christiansantoslima21@gmail.com/ +unsafe fn transmute<'a, 'b, T: Sized + FromBytes>( + fw: &'a DmaObject, + offset: usize, +) -> Result<&'b T> { + if offset + size_of::() > fw.size() { + return Err(EINVAL); + } + if (fw.start_ptr() as usize + offset) % align_of::() != 0 { + return Err(EINVAL); + } + + // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is + // large enough the contains an instance of `T`, which implements `FromBytes`. + Ok(unsafe { &*(fw.start_ptr().add(offset).cast::()) }) +} + +/// Reinterpret the area starting from `offset` in `fw` as a mutable instance of `T` (which must +/// implement [`FromBytes`]) and return a reference to it. +/// +/// # Safety +/// +/// Callers must ensure that the region of memory returned is not read or written for as long as +/// the returned reference is alive. +unsafe fn transmute_mut<'a, 'b, T: Sized + FromBytes>( + fw: &'a mut DmaObject, + offset: usize, +) -> Result<&'b mut T> { + if offset + size_of::() > fw.size() { + return Err(EINVAL); + } + if (fw.start_ptr_mut() as usize + offset) % align_of::() != 0 { + return Err(EINVAL); + } + + // SAFETY: we have checked that the pointer is properly aligned that its pointed memory is + // large enough the contains an instance of `T`, which implements `FromBytes`. + Ok(unsafe { &mut *(fw.start_ptr_mut().add(offset).cast::()) }) +} + +/// The FWSEC microcode, extracted from the BIOS and to be run on the GSP falcon. +/// +/// It is responsible for e.g. carving out the WPR2 region as the first step of the GSP bootflow. +pub(crate) struct FwsecFirmware { + /// Descriptor of the firmware. + desc: FalconUCodeDescV3, + /// GPU-accessible DMA object containing the firmware. + ucode: FirmwareDmaObject, +} + +// We need to load full DMEM pages. +const DMEM_LOAD_SIZE_ALIGN: u32 = 256; + +impl FalconLoadParams for FwsecFirmware { + fn imem_load_params(&self) -> FalconLoadTarget { + FalconLoadTarget { + src_start: 0, + dst_start: self.desc.imem_phys_base, + len: self.desc.imem_load_size, + } + } + + fn dmem_load_params(&self) -> FalconLoadTarget { + FalconLoadTarget { + src_start: self.desc.imem_load_size, + dst_start: self.desc.dmem_phys_base, + // TODO: replace with `align_up` once it lands. + len: self + .desc + .dmem_load_size + .next_multiple_of(DMEM_LOAD_SIZE_ALIGN), + } + } + + fn brom_params(&self) -> FalconBromParams { + FalconBromParams { + pkc_data_offset: self.desc.pkc_data_offset, + engine_id_mask: self.desc.engine_id_mask, + ucode_id: self.desc.ucode_id, + } + } + + fn boot_addr(&self) -> u32 { + 0 + } +} + +impl Deref for FwsecFirmware { + type Target = DmaObject; + + fn deref(&self) -> &Self::Target { + &self.ucode.0 + } +} + +impl FalconFirmware for FwsecFirmware { + type Target = Gsp; +} + +impl FirmwareDmaObject { + fn new_fwsec(dev: &Device, bios: &Vbios, cmd: FwsecCommand) -> Result { + let desc = bios.fwsec_image().header(dev)?; + let ucode = bios.fwsec_image().ucode(dev, desc)?; + let mut dma_object = DmaObject::from_data(dev, ucode)?; + + let hdr_offset = (desc.imem_load_size + desc.interface_offset) as usize; + // SAFETY: we have exclusive access to `dma_object`. + let hdr: &FalconAppifHdrV1 = unsafe { transmute(&dma_object, hdr_offset) }?; + + if hdr.version != 1 { + return Err(EINVAL); + } + + // Find the DMEM mapper section in the firmware. + for i in 0..hdr.entry_count as usize { + let app: &FalconAppifV1 = + // SAFETY: we have exclusive access to `dma_object`. + unsafe { + transmute( + &dma_object, + hdr_offset + hdr.header_size as usize + i * hdr.entry_size as usize + ) + }?; + + if app.id != NVFW_FALCON_APPIF_ID_DMEMMAPPER { + continue; + } + + // SAFETY: we have exclusive access to `dma_object`. + let dmem_mapper: &mut FalconAppifDmemmapperV3 = unsafe { + transmute_mut( + &mut dma_object, + (desc.imem_load_size + app.dmem_base) as usize, + ) + }?; + + // SAFETY: we have exclusive access to `dma_object`. + let frts_cmd: &mut FrtsCmd = unsafe { + transmute_mut( + &mut dma_object, + (desc.imem_load_size + dmem_mapper.cmd_in_buffer_offset) as usize, + ) + }?; + + frts_cmd.read_vbios = ReadVbios { + ver: 1, + hdr: size_of::() as u32, + addr: 0, + size: 0, + flags: 2, + }; + + dmem_mapper.init_cmd = match cmd { + FwsecCommand::Frts { + frts_addr, + frts_size, + } => { + frts_cmd.frts_region = FrtsRegion { + ver: 1, + hdr: size_of::() as u32, + addr: (frts_addr >> 12) as u32, + size: (frts_size >> 12) as u32, + ftype: NVFW_FRTS_CMD_REGION_TYPE_FB, + }; + + NVFW_FALCON_APPIF_DMEMMAPPER_CMD_FRTS + } + FwsecCommand::Sb => NVFW_FALCON_APPIF_DMEMMAPPER_CMD_SB, + }; + + // Return early as we found and patched the DMEMMAPPER region. + return Ok(Self(dma_object, PhantomData)); + } + + Err(ENOTSUPP) + } +} + +impl FwsecFirmware { + /// Extract the Fwsec firmware from `bios` and patch it to run on `falcon` with the `cmd` + /// command. + pub(crate) fn new( + dev: &Device, + falcon: &Falcon, + bar: &Bar0, + bios: &Vbios, + cmd: FwsecCommand, + ) -> Result { + let ucode_dma = FirmwareDmaObject::::new_fwsec(dev, bios, cmd)?; + + // Patch signature if needed. + let desc = bios.fwsec_image().header(dev)?; + let ucode_signed = if desc.signature_count != 0 { + let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize; + let desc_sig_versions = desc.signature_versions as u32; + let reg_fuse_version = + falcon.signature_reg_fuse_version(bar, desc.engine_id_mask, desc.ucode_id)?; + dev_dbg!( + dev, + "desc_sig_versions: {:#x}, reg_fuse_version: {}\n", + desc_sig_versions, + reg_fuse_version + ); + let signature_idx = { + let reg_fuse_version_bit = 1 << reg_fuse_version; + + // Check if the fuse version is supported by the firmware. + if desc_sig_versions & reg_fuse_version_bit == 0 { + dev_err!( + dev, + "no matching signature: {:#x} {:#x}\n", + reg_fuse_version_bit, + desc_sig_versions, + ); + return Err(EINVAL); + } + + // `desc_sig_versions` has one bit set per included signature. Thus, the index of + // the signature to patch is the number of bits in `desc_sig_versions` set to `1` + // before `reg_fuse_version_bit`. + + // Mask of the bits of `desc_sig_versions` to preserve. + let reg_fuse_version_mask = reg_fuse_version_bit.wrapping_sub(1); + + (desc_sig_versions & reg_fuse_version_mask).count_ones() as usize + }; + + dev_dbg!(dev, "patching signature with index {}\n", signature_idx); + let signature = bios + .fwsec_image() + .sigs(dev, desc) + .and_then(|sigs| sigs.get(signature_idx).ok_or(EINVAL))?; + + ucode_dma.patch_signature(signature, sig_base_img)? + } else { + ucode_dma.no_patch_signature() + }; + + Ok(FwsecFirmware { + desc: desc.clone(), + ucode: ucode_signed, + }) + } +} diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index a748ce689e61..c53e6135ca7d 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -6,6 +6,7 @@ use crate::falcon::{gsp::Gsp, sec2::Sec2, Falcon}; use crate::fb::FbLayout; use crate::fb::SysmemFlush; +use crate::firmware::fwsec::{FwsecCommand, FwsecFirmware}; use crate::firmware::{Firmware, FIRMWARE_VERSION}; use crate::gfw; use crate::regs; @@ -219,8 +220,18 @@ pub(crate) fn new( let fb_layout = FbLayout::new(spec.chipset, bar)?; dev_dbg!(pdev.as_ref(), "{:#x?}\n", fb_layout); - // Will be used in a later patch when fwsec firmware is needed. - let _bios = Vbios::new(pdev, bar)?; + let bios = Vbios::new(pdev, bar)?; + + let _fwsec_frts = FwsecFirmware::new( + pdev.as_ref(), + &gsp_falcon, + bar, + &bios, + FwsecCommand::Frts { + frts_addr: fb_layout.frts.start, + frts_size: fb_layout.frts.end - fb_layout.frts.start, + }, + )?; Ok(pin_init!(Self { spec, diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 15b7ce5c663e..0b47ddb05744 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -2,10 +2,8 @@ //! VBIOS extraction and parsing. -// To be removed when all code is used. -#![expect(dead_code)] - use crate::driver::Bar0; +use crate::firmware::fwsec::Bcrt30Rsa3kSignature; use crate::firmware::FalconUCodeDescV3; use core::convert::TryFrom; use kernel::device; @@ -1133,15 +1131,18 @@ pub(crate) fn ucode(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Re } /// Get the signatures as a byte slice - pub(crate) fn sigs(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Result<&[u8]> { - const SIG_SIZE: usize = 96 * 4; - + pub(crate) fn sigs( + &self, + dev: &device::Device, + desc: &FalconUCodeDescV3, + ) -> Result<&[Bcrt30Rsa3kSignature]> { // The signatures data follows the descriptor. let sigs_data_offset = self.falcon_ucode_offset + core::mem::size_of::(); - let size = desc.signature_count as usize * SIG_SIZE; + let sigs_size = + desc.signature_count as usize * core::mem::size_of::(); // Make sure the data is within bounds. - if sigs_data_offset + size > self.base.data.len() { + if sigs_data_offset + sigs_size > self.base.data.len() { dev_err!( dev, "fwsec signatures data not contained within BIOS bounds\n" @@ -1149,6 +1150,17 @@ pub(crate) fn sigs(&self, dev: &device::Device, desc: &FalconUCodeDescV3) -> Res return Err(ERANGE); } - Ok(&self.base.data[sigs_data_offset..sigs_data_offset + size]) + // SAFETY: we checked that `data + sigs_data_offset + (signature_count * + // sizeof::()` is within the bounds of `data`. + Ok(unsafe { + core::slice::from_raw_parts( + self.base + .data + .as_ptr() + .add(sigs_data_offset) + .cast::(), + desc.signature_count as usize, + ) + }) } } From 859aa3d940585db3f3513964020c2a42614804dc Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:24:07 +0900 Subject: [PATCH 038/358] gpu: nova-core: load and run FWSEC-FRTS With all the required pieces in place, load FWSEC-FRTS onto the GSP falcon, run it, and check that it successfully carved out the WPR2 region out of framebuffer memory. Reviewed-by: Lyude Paul Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-23-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/falcon.rs | 3 - drivers/gpu/nova-core/firmware/fwsec.rs | 25 +++++++ drivers/gpu/nova-core/gpu.rs | 90 ++++++++++++++++++++++--- drivers/gpu/nova-core/regs.rs | 31 +++++++++ 4 files changed, 136 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index ba14cb24b80d..fe4d3d458a6b 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -2,9 +2,6 @@ //! Falcon microprocessor base support -// To be removed when all code is used. -#![expect(dead_code)] - use core::ops::Deref; use core::time::Duration; use hal::FalconHal; diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index de179c201139..6058598ce76e 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -395,4 +395,29 @@ pub(crate) fn new( ucode: ucode_signed, }) } + + /// Loads the FWSEC firmware into `falcon` and execute it. + pub(crate) fn run( + &self, + dev: &Device, + falcon: &Falcon, + bar: &Bar0, + ) -> Result<()> { + // Reset falcon, load the firmware, and run it. + falcon + .reset(bar) + .inspect_err(|e| dev_err!(dev, "Failed to reset GSP falcon: {:?}\n", e))?; + falcon + .dma_load(bar, self) + .inspect_err(|e| dev_err!(dev, "Failed to load FWSEC firmware: {:?}\n", e))?; + let (mbox0, _) = falcon + .boot(bar, Some(0), None) + .inspect_err(|e| dev_err!(dev, "Failed to boot FWSEC firmware: {:?}\n", e))?; + if mbox0 != 0 { + dev_err!(dev, "FWSEC firmware returned error {}\n", mbox0); + Err(EIO) + } else { + Ok(()) + } + } } diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index c53e6135ca7d..43c8120559a7 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -184,6 +184,85 @@ fn drop(self: Pin<&mut Self>) { } impl Gpu { + /// Helper function to load and run the FWSEC-FRTS firmware and confirm that it has properly + /// created the WPR2 region. + /// + /// TODO: this needs to be moved into a larger type responsible for booting the whole GSP + /// (`GspBooter`?). + fn run_fwsec_frts( + dev: &device::Device, + falcon: &Falcon, + bar: &Bar0, + bios: &Vbios, + fb_layout: &FbLayout, + ) -> Result<()> { + // Check that the WPR2 region does not already exists - if it does, we cannot run + // FWSEC-FRTS until the GPU is reset. + if regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound() != 0 { + dev_err!( + dev, + "WPR2 region already exists - GPU needs to be reset to proceed\n" + ); + return Err(EBUSY); + } + + let fwsec_frts = FwsecFirmware::new( + dev, + falcon, + bar, + bios, + FwsecCommand::Frts { + frts_addr: fb_layout.frts.start, + frts_size: fb_layout.frts.end - fb_layout.frts.start, + }, + )?; + + // Run FWSEC-FRTS to create the WPR2 region. + fwsec_frts.run(dev, falcon, bar)?; + + // SCRATCH_E contains the error code for FWSEC-FRTS. + let frts_status = regs::NV_PBUS_SW_SCRATCH_0E::read(bar).frts_err_code(); + if frts_status != 0 { + dev_err!( + dev, + "FWSEC-FRTS returned with error code {:#x}", + frts_status + ); + + return Err(EIO); + } + + // Check that the WPR2 region has been created as we requested. + let (wpr2_lo, wpr2_hi) = ( + regs::NV_PFB_PRI_MMU_WPR2_ADDR_LO::read(bar).lower_bound(), + regs::NV_PFB_PRI_MMU_WPR2_ADDR_HI::read(bar).higher_bound(), + ); + + match (wpr2_lo, wpr2_hi) { + (_, 0) => { + dev_err!(dev, "WPR2 region not created after running FWSEC-FRTS\n"); + + Err(EIO) + } + (wpr2_lo, _) if wpr2_lo != fb_layout.frts.start => { + dev_err!( + dev, + "WPR2 region created at unexpected address {:#x}; expected {:#x}\n", + wpr2_lo, + fb_layout.frts.start, + ); + + Err(EIO) + } + (wpr2_lo, wpr2_hi) => { + dev_dbg!(dev, "WPR2: {:#x}-{:#x}\n", wpr2_lo, wpr2_hi); + dev_dbg!(dev, "GPU instance built\n"); + + Ok(()) + } + } + } + pub(crate) fn new( pdev: &pci::Device, devres_bar: Devres, @@ -222,16 +301,7 @@ pub(crate) fn new( let bios = Vbios::new(pdev, bar)?; - let _fwsec_frts = FwsecFirmware::new( - pdev.as_ref(), - &gsp_falcon, - bar, - &bios, - FwsecCommand::Frts { - frts_addr: fb_layout.frts.start, - frts_size: fb_layout.frts.end - fb_layout.frts.start, - }, - )?; + Self::run_fwsec_frts(pdev.as_ref(), &gsp_falcon, bar, &bios, &fb_layout)?; Ok(pin_init!(Self { spec, diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 8ca7bcb5a93f..ccfaeed55cff 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -42,6 +42,13 @@ pub(crate) fn chipset(self) -> Result { } } +/* PBUS */ + +// TODO: this is an array of registers. +register!(NV_PBUS_SW_SCRATCH_0E@0x00001438 { + 31:16 frts_err_code as u16; +}); + /* PFB */ register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR @ 0x00100c10 { @@ -73,6 +80,30 @@ pub(crate) fn usable_fb_size(self) -> u64 { } } +register!(NV_PFB_PRI_MMU_WPR2_ADDR_LO@0x001fa824 { + 31:4 lo_val as u32, "Bits 12..40 of the lower (inclusive) bound of the WPR2 region"; +}); + +impl NV_PFB_PRI_MMU_WPR2_ADDR_LO { + /// Returns the lower (inclusive) bound of the WPR2 region. + pub(crate) fn lower_bound(self) -> u64 { + (self.lo_val() as u64) << 12 + } +} + +register!(NV_PFB_PRI_MMU_WPR2_ADDR_HI@0x001fa828 { + 31:4 hi_val as u32, "Bits 12..40 of the higher (exclusive) bound of the WPR2 region"; +}); + +impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { + /// Returns the higher (exclusive) bound of the WPR2 region. + /// + /// A value of zero means the WPR2 region is not set. + pub(crate) fn higher_bound(self) -> u64 { + (self.hi_val() as u64) << 12 + } +} + /* PGC6 */ register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128 { From 3606620b316c29e3de8ff87b40828c722086a9c9 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jun 2025 22:24:08 +0900 Subject: [PATCH 039/358] gpu: nova-core: update and annotate TODO list A few new dependencies are required to remove some of the TODO items: - A way to safely convert from byte slices to types implementing `FromBytes`, - A way to obtain slices and write into a `CoherentAllocation`, - Several improvements to the `register!()` macro, - Alignment operations to powers of two, and an equivalent to the C `fls`, - Support for `xa_alloc` in the XAlloc bindings. Some items have also become obsolete: - The auxiliary bus abstractions have been implemented and are in use, - The ELF utilities are not considered for being part of the core kernel bindings anymore. - VBIOS, falcon and GPU timer have been completed. We now have quite a few TODO entries in the code, so annotate them with a 4 letter code representing the corresponding task in `todo.rst`. This allows to easily find which part of the code corresponds to a given entry (and conversely). Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250619-nova-frts-v6-24-ecf41ef99252@nvidia.com Signed-off-by: Danilo Krummrich --- Documentation/gpu/nova/core/todo.rst | 107 ++++++++++++---------- drivers/gpu/nova-core/dma.rs | 2 +- drivers/gpu/nova-core/driver.rs | 2 +- drivers/gpu/nova-core/falcon.rs | 8 +- drivers/gpu/nova-core/falcon/hal/ga102.rs | 10 +- drivers/gpu/nova-core/fb.rs | 2 +- drivers/gpu/nova-core/firmware/fwsec.rs | 6 +- drivers/gpu/nova-core/gfw.rs | 2 +- drivers/gpu/nova-core/gpu.rs | 2 +- drivers/gpu/nova-core/regs.rs | 8 +- drivers/gpu/nova-core/regs/macros.rs | 2 +- drivers/gpu/nova-core/util.rs | 2 +- drivers/gpu/nova-core/vbios.rs | 2 +- 13 files changed, 84 insertions(+), 71 deletions(-) diff --git a/Documentation/gpu/nova/core/todo.rst b/Documentation/gpu/nova/core/todo.rst index 8a459fc08812..894a1e9c3741 100644 --- a/Documentation/gpu/nova/core/todo.rst +++ b/Documentation/gpu/nova/core/todo.rst @@ -14,14 +14,17 @@ Tasks may have the following fields: - ``Contact``: The person that can be contacted for further information about the task. +A task might have `[ABCD]` code after its name. This code can be used to grep +into the code for `TODO` entries related to it. + Enablement (Rust) ================= Tasks that are not directly related to nova-core, but are preconditions in terms of required APIs. -FromPrimitive API ------------------ +FromPrimitive API [FPRI] +------------------------ Sometimes the need arises to convert a number to a value of an enum or a structure. @@ -41,8 +44,27 @@ automatically generates the corresponding mappings between a value and a number. | Complexity: Beginner | Link: https://docs.rs/num/latest/num/trait.FromPrimitive.html -Generic register abstraction ----------------------------- +Conversion from byte slices for types implementing FromBytes [TRSM] +------------------------------------------------------------------- + +We retrieve several structures from byte streams coming from the BIOS or loaded +firmware. At the moment converting the bytes slice into the proper type require +an inelegant `unsafe` operation; this will go away once `FromBytes` implements +a proper `from_bytes` method. + +| Complexity: Beginner + +CoherentAllocation improvements [COHA] +-------------------------------------- + +`CoherentAllocation` needs a safe way to write into the allocation, and to +obtain slices within the allocation. + +| Complexity: Beginner +| Contact: Abdiel Janulgue + +Generic register abstraction [REGA] +----------------------------------- Work out how register constants and structures can be automatically generated through generalized macros. @@ -102,16 +124,40 @@ Usage: let boot0 = Boot0::read(&bar); pr_info!("Revision: {}\n", boot0.revision()); -Note: a work-in-progress implementation currently resides in +A work-in-progress implementation currently resides in `drivers/gpu/nova-core/regs/macros.rs` and is used in nova-core. It would be nice to improve it (possibly using proc macros) and move it to the `kernel` crate so it can be used by other components as well. +Features desired before this happens: + +* Relative register with build-time base address validation, +* Arrays of registers with build-time index validation, +* Make I/O optional I/O (for field values that are not registers), +* Support other sizes than `u32`, +* Allow visibility control for registers and individual fields, +* Use Rust slice syntax to express fields ranges. + | Complexity: Advanced | Contact: Alexandre Courbot -Delay / Sleep abstractions --------------------------- +Numerical operations [NUMM] +--------------------------- + +Nova uses integer operations that are not part of the standard library (or not +implemented in an optimized way for the kernel). These include: + +- Aligning up and down to a power of two, +- The "Find Last Set Bit" (`fls` function of the C part of the kernel) + operation. + +A `num` core kernel module is being designed to provide these operations. + +| Complexity: Intermediate +| Contact: Alexandre Courbot + +Delay / Sleep abstractions [DLAY] +--------------------------------- Rust abstractions for the kernel's delay() and sleep() functions. @@ -159,18 +205,6 @@ mailing list yet. | Complexity: Intermediate | Contact: Abdiel Janulgue -ELF utils ---------- - -Rust implementation of ELF header representation to retrieve section header -tables, names, and data from an ELF-formatted images. - -There is preceding work from Abdiel Janulgue, which hasn't made it to the -mailing list yet. - -| Complexity: Beginner -| Contact: Abdiel Janulgue - PCI MISC APIs ------------- @@ -179,12 +213,11 @@ capability, MSI API abstractions. | Complexity: Beginner -Auxiliary bus abstractions --------------------------- +XArray bindings [XARR] +---------------------- -Rust abstraction for the auxiliary bus APIs. - -This is needed to connect nova-core to the nova-drm driver. +We need bindings for `xa_alloc`/`xa_alloc_cyclic` in order to generate the +auxiliary device IDs. | Complexity: Intermediate @@ -216,15 +249,6 @@ Build the radix3 page table to map the firmware. | Complexity: Intermediate | Contact: Abdiel Janulgue -vBIOS support -------------- - -Parse the vBIOS and probe the structures required for driver initialization. - -| Contact: Dave Airlie -| Reference: Vec extensions -| Complexity: Intermediate - Initial Devinit support ----------------------- @@ -234,23 +258,6 @@ configuration. | Contact: Dave Airlie | Complexity: Beginner -Boot Falcon controller ----------------------- - -Infrastructure to load and execute falcon (sec2) firmware images; handle the -GSP falcon processor and fwsec loading. - -| Complexity: Advanced -| Contact: Dave Airlie - -GPU Timer support ------------------ - -Support for the GPU's internal timer peripheral. - -| Complexity: Beginner -| Contact: Dave Airlie - MMU / PT management ------------------- diff --git a/drivers/gpu/nova-core/dma.rs b/drivers/gpu/nova-core/dma.rs index 1f1f8c378d8e..94f44bcfd748 100644 --- a/drivers/gpu/nova-core/dma.rs +++ b/drivers/gpu/nova-core/dma.rs @@ -26,7 +26,7 @@ pub(crate) fn new(dev: &device::Device, len: usize) -> Result, data: &[u8]) -> Result { Self::new(dev, data.len()).map(|mut dma_obj| { - // TODO: replace with `CoherentAllocation::write()` once available. + // TODO[COHA]: replace with `CoherentAllocation::write()` once available. // SAFETY: // - `dma_obj`'s size is at least `data.len()`. // - We have just created this object and there is no other user at this stage. diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs index ffe25c7a2fda..518ef8739550 100644 --- a/drivers/gpu/nova-core/driver.rs +++ b/drivers/gpu/nova-core/driver.rs @@ -42,7 +42,7 @@ fn probe(pdev: &pci::Device, _info: &Self::IdInfo) -> Result for FalconCoreRev { type Error = Error; @@ -68,6 +69,7 @@ pub(crate) enum FalconCoreRevSubversion { Subversion3 = 3, } +// TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconCoreRevSubversion { type Error = Error; @@ -101,6 +103,7 @@ pub(crate) enum FalconSecurityModel { Heavy = 3, } +// TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconSecurityModel { type Error = Error; @@ -128,6 +131,7 @@ pub(crate) enum FalconModSelAlgo { Rsa3k = 1, } +// TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconModSelAlgo { type Error = Error; @@ -148,6 +152,7 @@ pub(crate) enum DmaTrfCmdSize { Size256B = 0x6, } +// TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for DmaTrfCmdSize { type Error = Error; @@ -199,6 +204,7 @@ pub(crate) enum FalconFbifTarget { NoncoherentSysmem = 2, } +// TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconFbifTarget { type Error = Error; @@ -354,7 +360,7 @@ fn reset_eng(&self, bar: &Bar0) -> Result { regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(true)); - // TODO: replace with udelay() or equivalent once available. + // TODO[DLAY]: replace with udelay() or equivalent once available. // TIMEOUT: falcon engine should not take more than 10us to reset. let _: Result = util::wait_on(Duration::from_micros(10), || None); diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 0a4e5e7adf8c..664327f75cf4 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -42,10 +42,10 @@ fn signature_reg_fuse_version_ga102( engine_id_mask: u16, ucode_id: u8, ) -> Result { - // TODO: The ucode fuse versions are contained in the FUSE_OPT_FPF__UCODE_VERSION - // registers, which are an array. Our register definition macros do not allow us to manage them - // properly, so we need to hardcode their addresses for now. Clean this up once we support - // register arrays. + // TODO[REGA]: The ucode fuse versions are contained in the + // FUSE_OPT_FPF__UCODE_VERSION registers, which are an array. Our register + // definition macros do not allow us to manage them properly, so we need to hardcode their + // addresses for now. Clean this up once we support register arrays. // Each engine has 16 ucode version registers numbered from 1 to 16. if ucode_id == 0 || ucode_id > 16 { @@ -69,7 +69,7 @@ fn signature_reg_fuse_version_ga102( let reg_fuse_version = bar.read32(reg_fuse_base + ((ucode_id - 1) as usize * core::mem::size_of::())); - // TODO: replace with `last_set_bit` once it lands. + // TODO[NUMM]: replace with `last_set_bit` once it lands. Ok(u32::BITS - reg_fuse_version.leading_zeros()) } diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index 5719de5c4759..172b4a12ba2a 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -122,7 +122,7 @@ pub(crate) fn new(chipset: Chipset, bar: &Bar0) -> Result { let frts = { const FRTS_DOWN_ALIGN: u64 = SZ_128K as u64; const FRTS_SIZE: u64 = SZ_1M as u64; - // TODO: replace with `align_down` once it lands. + // TODO[NUMM]: replace with `align_down` once it lands. let frts_base = (vga_workspace.start & !(FRTS_DOWN_ALIGN - 1)) - FRTS_SIZE; frts_base..frts_base + FRTS_SIZE diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 6058598ce76e..047aab76470e 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -150,8 +150,8 @@ impl FirmwareSignature for Bcrt30Rsa3kSignature {} /// Callers must ensure that the region of memory returned is not written for as long as the /// returned reference is alive. /// -/// TODO: Remove this and `transmute_mut` once `CoherentAllocation::as_slice` is available and we -/// have a way to transmute objects implementing FromBytes, e.g.: +/// TODO[TRSM][COHA]: Remove this and `transmute_mut` once `CoherentAllocation::as_slice` is +/// available and we have a way to transmute objects implementing FromBytes, e.g.: /// https://lore.kernel.org/lkml/20250330234039.29814-1-christiansantoslima21@gmail.com/ unsafe fn transmute<'a, 'b, T: Sized + FromBytes>( fw: &'a DmaObject, @@ -218,7 +218,7 @@ fn dmem_load_params(&self) -> FalconLoadTarget { FalconLoadTarget { src_start: self.desc.imem_load_size, dst_start: self.desc.dmem_phys_base, - // TODO: replace with `align_up` once it lands. + // TODO[NUMM]: replace with `align_up` once it lands. len: self .desc .dmem_load_size diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs index 937e820e00fc..ce03ac9f4d9d 100644 --- a/drivers/gpu/nova-core/gfw.rs +++ b/drivers/gpu/nova-core/gfw.rs @@ -29,7 +29,7 @@ pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { if gfw_booted { Some(()) } else { - // TODO: replace with [1] once merged. + // TODO[DLAY]: replace with [1] once it merges. // [1] https://lore.kernel.org/rust-for-linux/20250423192857.199712-6-fujita.tomonori@gmail.com/ // // SAFETY: `msleep()` is safe to call with any parameter. diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 43c8120559a7..8e32af16b669 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -37,7 +37,7 @@ impl Chipset { ]; } - // TODO replace with something like derive(FromPrimitive) + // TODO[FPRI]: replace with something like derive(FromPrimitive) impl TryFrom for Chipset { type Error = kernel::error::Error; diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index ccfaeed55cff..707f87d6828d 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -44,7 +44,7 @@ pub(crate) fn chipset(self) -> Result { /* PBUS */ -// TODO: this is an array of registers. +// TODO[REGA]: this is an array of registers. register!(NV_PBUS_SW_SCRATCH_0E@0x00001438 { 31:16 frts_err_code as u16; }); @@ -110,7 +110,7 @@ pub(crate) fn higher_bound(self) -> u64 { 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; }); -// TODO: This is an array of registers. +// TODO[REGA]: This is an array of registers. register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05 @ 0x00118234 { 31:0 value as u32; }); @@ -272,7 +272,7 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { 0:0 reset as bool; }); -// TODO: this is an array of registers. +// TODO[REGA]: this is an array of registers. register!(NV_PFALCON_FBIF_TRANSCFG @ +0x00000600 { 1:0 target as u8 ?=> FalconFbifTarget; 2:2 mem_type as bool => FalconFbifMemType; @@ -294,7 +294,7 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { 31:0 value as u32; }); -// TODO: this is an array of registers. +// TODO[REGA]: this is an array of registers. register!(NV_PFALCON2_FALCON_BROM_PARAADDR @ +0x00001210 { 31:0 value as u32; }); diff --git a/drivers/gpu/nova-core/regs/macros.rs b/drivers/gpu/nova-core/regs/macros.rs index e0e6fef3796f..cdf668073480 100644 --- a/drivers/gpu/nova-core/regs/macros.rs +++ b/drivers/gpu/nova-core/regs/macros.rs @@ -147,7 +147,7 @@ impl $name { pub(crate) const OFFSET: usize = $offset; } - // TODO: display the raw hex value, then the value of all the fields. This requires + // TODO[REGA]: display the raw hex value, then the value of all the fields. This requires // matching the fields, which will complexify the syntax considerably... impl ::core::fmt::Debug for $name { fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index 69f29238b25e..5cafe0797cd6 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -32,7 +32,7 @@ pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { /// `Err(ETIMEDOUT)` is returned if `timeout` has been reached without `cond` evaluating to /// `Some`. /// -/// TODO: replace with `read_poll_timeout` once it is available. +/// TODO[DLAY]: replace with `read_poll_timeout` once it is available. /// (https://lore.kernel.org/lkml/20250220070611.214262-8-fujita.tomonori@gmail.com/) pub(crate) fn wait_on Option>(timeout: Duration, cond: F) -> Result { let start_time = Instant::now(); diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 0b47ddb05744..feb80c847077 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -177,7 +177,7 @@ fn next(&mut self) -> Option { // Advance to next image (aligned to 512 bytes). self.current_offset += image_size; - // TODO: replace with `align_up` once it lands. + // TODO[NUMM]: replace with `align_up` once it lands. self.current_offset = self.current_offset.next_multiple_of(512); Some(Ok(full_image)) From 1beae9aa2b88d3a02eb666e7b777eb2d7bc645f4 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 18 Jun 2025 11:49:58 -0700 Subject: [PATCH 040/358] drm/xe/guc_pc: Add _locked variant for min/max freq There are places in which the getters/setters are called one after the other causing a multiple lock()/unlock(). These are not currently a problem since they are all happening from the same thread, but there's a race possibility as calls are added outside of the early init when the max/min and stashed values need to be correlated. Add the _locked() variants to prepare for that. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-1-b888388477f2@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_pc.c | 123 ++++++++++++++++++--------------- 1 file changed, 69 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 9fab5f5b10fa..624e3a1a7812 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -5,6 +5,7 @@ #include "xe_guc_pc.h" +#include #include #include @@ -554,6 +555,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) return pc->rpn_freq; } +static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq) +{ + int ret; + + lockdep_assert_held(&pc->freq_lock); + + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); + if (ret) + return ret; + + *freq = pc_get_min_freq(pc); + + return 0; +} + /** * xe_guc_pc_get_min_freq - Get the min operational frequency * @pc: The GuC PC @@ -563,27 +583,29 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc) * -EAGAIN if GuC PC not ready (likely in middle of a reset). */ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) +{ + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq) { int ret; - xe_device_assert_mem_access(pc_to_xe(pc)); + lockdep_assert_held(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; - ret = pc_action_query_task_state(pc); + ret = pc_set_min_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_min_freq(pc); + pc->user_requested_min = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -596,25 +618,29 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq) * -EINVAL if value out of bounds. */ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) +{ + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_set_min_freq_locked(pc, freq); +} + +static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq) { int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_set_min_freq(pc, freq); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_action_query_task_state(pc); if (ret) - goto out; + return ret; - pc->user_requested_min = freq; + *freq = pc_get_max_freq(pc); -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -626,25 +652,29 @@ int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq) * -EAGAIN if GuC PC not ready (likely in middle of a reset). */ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) +{ + guard(mutex)(&pc->freq_lock); + + return xe_guc_pc_get_max_freq_locked(pc, freq); +} + +static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) { int ret; - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } + lockdep_assert_held(&pc->freq_lock); - ret = pc_action_query_task_state(pc); + /* Might be in the middle of a gt reset */ + if (!pc->freq_ready) + return -EAGAIN; + + ret = pc_set_max_freq(pc, freq); if (ret) - goto out; + return ret; - *freq = pc_get_max_freq(pc); + pc->user_requested_max = freq; -out: - mutex_unlock(&pc->freq_lock); - return ret; + return 0; } /** @@ -658,24 +688,9 @@ int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq) */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { - int ret; + guard(mutex)(&pc->freq_lock); - mutex_lock(&pc->freq_lock); - if (!pc->freq_ready) { - /* Might be in the middle of a gt reset */ - ret = -EAGAIN; - goto out; - } - - ret = pc_set_max_freq(pc, freq); - if (ret) - goto out; - - pc->user_requested_max = freq; - -out: - mutex_unlock(&pc->freq_lock); - return ret; + return xe_guc_pc_set_max_freq_locked(pc, freq); } /** From d878c97daa603573e5af01fd8beec2fffdb42ad1 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 18 Jun 2025 11:49:59 -0700 Subject: [PATCH 041/358] drm/xe/xe_guc_pc: Lock once to update stashed frequencies pc_set_mert_freq_cap() currently lock()/unlock() the mutex multiple times to stash the current frequencies. It's not a problem since xe_guc_pc_restore_stashed_freq() is guaranteed to be called only later in the init sequence. However, now that we have _locked() variants for this functions, use them and avoid potential issues when called from other places or using the same pattern. While at it, prefer and early return for the WA check to reduce indentation. Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-2-b888388477f2@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_pc.c | 39 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 624e3a1a7812..6224cc4fefb9 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -890,27 +890,28 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) { - int ret = 0; + int ret; - if (XE_WA(pc_to_gt(pc), 22019338487)) { - /* - * Get updated min/max and stash them. - */ - ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq); - if (!ret) - ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq); - if (ret) - return ret; + if (!XE_WA(pc_to_gt(pc), 22019338487)) + return 0; - /* - * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. - */ - mutex_lock(&pc->freq_lock); - ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); - if (!ret) - ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); - mutex_unlock(&pc->freq_lock); - } + guard(mutex)(&pc->freq_lock); + + /* + * Get updated min/max and stash them. + */ + ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq); + if (!ret) + ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq); + if (ret) + return ret; + + /* + * Ensure min and max are bound by MERT_FREQ_CAP until driver loads. + */ + ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc))); + if (!ret) + ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc))); return ret; } From 5e300ed8a545bdffc26b579c526b5fef7b2d5365 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 18 Jun 2025 11:50:00 -0700 Subject: [PATCH 042/358] drm/xe: Split xe_device_td_flush() xe_device_td_flush() has 2 possible implementations: an entire L2 flush or a transient flush, depending on WA 16023588340. Make this clear by splitting the function so it calls each of them. Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-3-b888388477f2@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 68 ++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index e160e7be84f0..f95ae0922461 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -986,38 +986,15 @@ void xe_device_wmb(struct xe_device *xe) xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0); } -/** - * xe_device_td_flush() - Flush transient L3 cache entries - * @xe: The device - * - * Display engine has direct access to memory and is never coherent with L3/L4 - * caches (or CPU caches), however KMD is responsible for specifically flushing - * transient L3 GPU cache entries prior to the flip sequence to ensure scanout - * can happen from such a surface without seeing corruption. - * - * Display surfaces can be tagged as transient by mapping it using one of the - * various L3:XD PAT index modes on Xe2. - * - * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed - * at the end of each submission via PIPE_CONTROL for compute/render, since SA - * Media is not coherent with L3 and we want to support render-vs-media - * usescases. For other engines like copy/blt the HW internally forces uncached - * behaviour, hence why we can skip the TDF on such platforms. +/* + * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt. */ -void xe_device_td_flush(struct xe_device *xe) +static void tdf_request_sync(struct xe_device *xe) { - struct xe_gt *gt; unsigned int fw_ref; + struct xe_gt *gt; u8 id; - if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) - return; - - if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { - xe_device_l2_flush(xe); - return; - } - for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -1027,6 +1004,7 @@ void xe_device_td_flush(struct xe_device *xe) return; xe_mmio_write32(>->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST); + /* * FIXME: We can likely do better here with our choice of * timeout. Currently we just assume the worst case, i.e. 150us, @@ -1057,15 +1035,49 @@ void xe_device_l2_flush(struct xe_device *xe) return; spin_lock(>->global_invl_lock); - xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); + xe_mmio_write32(>->mmio, XE2_GLOBAL_INVAL, 0x1); if (xe_mmio_wait32(>->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true)) xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); xe_force_wake_put(gt_to_fw(gt), fw_ref); } +/** + * xe_device_td_flush() - Flush transient L3 cache entries + * @xe: The device + * + * Display engine has direct access to memory and is never coherent with L3/L4 + * caches (or CPU caches), however KMD is responsible for specifically flushing + * transient L3 GPU cache entries prior to the flip sequence to ensure scanout + * can happen from such a surface without seeing corruption. + * + * Display surfaces can be tagged as transient by mapping it using one of the + * various L3:XD PAT index modes on Xe2. + * + * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed + * at the end of each submission via PIPE_CONTROL for compute/render, since SA + * Media is not coherent with L3 and we want to support render-vs-media + * usescases. For other engines like copy/blt the HW internally forces uncached + * behaviour, hence why we can skip the TDF on such platforms. + */ +void xe_device_td_flush(struct xe_device *xe) +{ + struct xe_gt *root_gt; + + if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) + return; + + root_gt = xe_root_mmio_gt(xe); + if (XE_WA(root_gt, 16023588340)) + /* A transient flush is not sufficient: flush the L2 */ + xe_device_l2_flush(xe); + else + tdf_request_sync(xe); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? From deea6a7d6d803d6bb874a3e6f1b312e560e6c6df Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Wed, 18 Jun 2025 11:50:01 -0700 Subject: [PATCH 043/358] drm/xe/bmg: Update Wa_22019338487 Limit GT max frequency to 2600MHz and wait for frequency to reduce before proceeding with a transient flush. This is really only needed for the transient flush: if L2 flush is needed due to 16023588340 then there's no need to do this additional wait since we are already using the bigger hammer. v2: Use generic names, ensure user set max frequency requests wait for flush to complete (Rodrigo) v3: - User requests wait via wait_var_event_timeout (Lucas) - Close races on flush + user requests (Lucas) - Fix xe_guc_pc_remove_flush_freq_limit() being called on last gt rather than root gt (Lucas) v4: - Only apply the freq reducing part if a TDF is needed: L2 flush trumps the need for waiting a lower frequency Fixes: aaa08078e725 ("drm/xe/bmg: Apply Wa_22019338487") Reviewed-by: Rodrigo Vivi Signed-off-by: Vinay Belgaumkar Link: https://lore.kernel.org/r/20250618-wa-22019338487-v5-4-b888388477f2@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 8 +- drivers/gpu/drm/xe/xe_guc_pc.c | 125 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_pc.h | 2 + drivers/gpu/drm/xe/xe_guc_pc_types.h | 2 + 4 files changed, 135 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index f95ae0922461..cd17c1354ab3 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -40,6 +40,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_vf.h" #include "xe_guc.h" +#include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" @@ -1071,11 +1072,14 @@ void xe_device_td_flush(struct xe_device *xe) return; root_gt = xe_root_mmio_gt(xe); - if (XE_WA(root_gt, 16023588340)) + if (XE_WA(root_gt, 16023588340)) { /* A transient flush is not sufficient: flush the L2 */ xe_device_l2_flush(xe); - else + } else { + xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc); tdf_request_sync(xe); + xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc); + } } u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c index 6224cc4fefb9..68b192fe3b32 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.c +++ b/drivers/gpu/drm/xe/xe_guc_pc.c @@ -7,7 +7,9 @@ #include #include +#include #include +#include #include #include @@ -53,9 +55,11 @@ #define LNL_MERT_FREQ_CAP 800 #define BMG_MERT_FREQ_CAP 2133 #define BMG_MIN_FREQ 1200 +#define BMG_MERT_FLUSH_FREQ_CAP 2600 #define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */ #define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */ +#define SLPC_ACT_FREQ_TIMEOUT_MS 100 /** * DOC: GuC Power Conservation (PC) @@ -143,6 +147,36 @@ static int wait_for_pc_state(struct xe_guc_pc *pc, return -ETIMEDOUT; } +static int wait_for_flush_complete(struct xe_guc_pc *pc) +{ + const unsigned long timeout = msecs_to_jiffies(30); + + if (!wait_var_event_timeout(&pc->flush_freq_limit, + !atomic_read(&pc->flush_freq_limit), + timeout)) + return -ETIMEDOUT; + + return 0; +} + +static int wait_for_act_freq_limit(struct xe_guc_pc *pc, u32 freq) +{ + int timeout_us = SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC; + int slept, wait = 10; + + for (slept = 0; slept < timeout_us;) { + if (xe_guc_pc_get_act_freq(pc) <= freq) + return 0; + + usleep_range(wait, wait << 1); + slept += wait; + wait <<= 1; + if (slept + wait > timeout_us) + wait = timeout_us - slept; + } + + return -ETIMEDOUT; +} static int pc_action_reset(struct xe_guc_pc *pc) { struct xe_guc_ct *ct = pc_to_ct(pc); @@ -688,6 +722,11 @@ static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq) */ int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq) { + if (XE_WA(pc_to_gt(pc), 22019338487)) { + if (wait_for_flush_complete(pc) != 0) + return -EAGAIN; + } + guard(mutex)(&pc->freq_lock); return xe_guc_pc_set_max_freq_locked(pc, freq); @@ -888,6 +927,92 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc) return ret; } +static bool needs_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + + return XE_WA(gt, 22019338487) && + pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP; +} + +/** + * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush + * @pc: the xe_guc_pc object + * + * As per the WA, reduce max GT frequency during L2 cache flush + */ +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + u32 max_freq; + int ret; + + if (!needs_flush_freq_limit(pc)) + return; + + guard(mutex)(&pc->freq_lock); + + ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq); + if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) { + ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) { + xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); + return; + } + + atomic_set(&pc->flush_freq_limit, 1); + + /* + * If user has previously changed max freq, stash that value to + * restore later, otherwise use the current max. New user + * requests wait on flush. + */ + if (pc->user_requested_max != 0) + pc->stashed_max_freq = pc->user_requested_max; + else + pc->stashed_max_freq = max_freq; + } + + /* + * Wait for actual freq to go below the flush cap: even if the previous + * max was below cap, the current one might still be above it + */ + ret = wait_for_act_freq_limit(pc, BMG_MERT_FLUSH_FREQ_CAP); + if (ret) + xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n", + BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret)); +} + +/** + * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes. + * @pc: the xe_guc_pc object + * + * Retrieve the previous GT max frequency value. + */ +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc) +{ + struct xe_gt *gt = pc_to_gt(pc); + int ret = 0; + + if (!needs_flush_freq_limit(pc)) + return; + + if (!atomic_read(&pc->flush_freq_limit)) + return; + + mutex_lock(&pc->freq_lock); + + ret = pc_set_max_freq(>->uc.guc.pc, pc->stashed_max_freq); + if (ret) + xe_gt_err_once(gt, "Failed to restore max freq %u:%d", + pc->stashed_max_freq, ret); + + atomic_set(&pc->flush_freq_limit, 0); + mutex_unlock(&pc->freq_lock); + wake_up_var(&pc->flush_freq_limit); +} + static int pc_set_mert_freq_cap(struct xe_guc_pc *pc) { int ret; diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h index 0a2664d5c811..52ecdd5ddbff 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc.h +++ b/drivers/gpu/drm/xe/xe_guc_pc.h @@ -38,5 +38,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc); void xe_guc_pc_init_early(struct xe_guc_pc *pc); int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc); void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc); +void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc); +void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc); #endif /* _XE_GUC_PC_H_ */ diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h index 2978ac9a249b..c02053948a57 100644 --- a/drivers/gpu/drm/xe/xe_guc_pc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h @@ -15,6 +15,8 @@ struct xe_guc_pc { /** @bo: GGTT buffer object that is shared with GuC PC */ struct xe_bo *bo; + /** @flush_freq_limit: 1 when max freq changes are limited by driver */ + atomic_t flush_freq_limit; /** @rp0_freq: HW RP0 frequency - The Maximum one */ u32 rp0_freq; /** @rpa_freq: HW RPa frequency - The Achievable one */ From 0dd2dd0182bc444a62652e89d08c7f0e4fde15ba Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 6 Jun 2025 11:45:47 +0100 Subject: [PATCH 044/358] drm/xe: Move DSB l2 flush to a more sensible place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flushing l2 is only needed after all data has been written. Fixes: 01570b446939 ("drm/xe/bmg: implement Wa_16023588340") Signed-off-by: Maarten Lankhorst Cc: Matthew Auld Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Reviewed-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Link: https://lore.kernel.org/r/20250606104546.1996818-3-matthew.auld@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/display/xe_dsb_buffer.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index f95375451e2f..9f941fc2e36b 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); - xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { - struct xe_device *xe = dsb_buf->vma->bo->tile->xe; - WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); - xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) @@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + /* * The memory barrier here is to ensure coherency of DSB vs MMIO, * both for weak ordering archs and discrete cards. */ - xe_device_wmb(dsb_buf->vma->bo->tile->xe); + xe_device_wmb(xe); + xe_device_l2_flush(xe); } From 35db1da40c8cfd7511dc42f342a133601eb45449 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Jun 2025 11:45:48 +0100 Subject: [PATCH 045/358] drm/xe: move DPT l2 flush to a more sensible place MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only need the flush for DPT host updates here. Normal GGTT updates don't need special flush. Fixes: 01570b446939 ("drm/xe/bmg: implement Wa_16023588340") Signed-off-by: Matthew Auld Cc: Maarten Lankhorst Cc: stable@vger.kernel.org # v6.12+ Reviewed-by: Ville Syrjälä Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250606104546.1996818-4-matthew.auld@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 6b362695d6b6..5e846f0bec21 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -163,6 +163,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, vma->dpt = dpt; vma->node = dpt->ggtt_node[tile0->id]; + + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return 0; } @@ -326,8 +329,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; - /* Ensure DPT writes are flushed */ - xe_device_l2_flush(xe); return vma; err_unpin: From 89d2835c3680ab1938e22ad81b1c9f8c686bd391 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 13 Jun 2025 00:09:36 +0200 Subject: [PATCH 046/358] drm/xe: Process deferred GGTT node removals on device unwind While we are indirectly draining our dedicated workqueue ggtt->wq that we use to complete asynchronous removal of some GGTT nodes, this happends as part of the managed-drm unwinding (ggtt_fini_early), which could be later then manage-device unwinding, where we could already unmap our MMIO/GMS mapping (mmio_fini). This was recently observed during unsuccessful VF initialization: [ ] xe 0000:00:02.1: probe with driver xe failed with error -62 [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747340 __xe_bo_unpin_map_no_vm (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747540 __xe_bo_unpin_map_no_vm (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747240 __xe_bo_unpin_map_no_vm (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747040 tiles_fini (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e746840 mmio_fini (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e747f40 xe_bo_pinned_fini (16 bytes) [ ] xe 0000:00:02.1: DEVRES REL ffff88811e746b40 devm_drm_dev_init_release (16 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] drmres release begin [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef81640 __fini_relay (8 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef80d40 guc_ct_fini (8 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef80040 __drmm_mutex_release (8 bytes) [ ] xe 0000:00:02.1: [drm:drm_managed_release] REL ffff88810ef80140 ggtt_fini_early (8 bytes) and this was leading to: [ ] BUG: unable to handle page fault for address: ffffc900058162a0 [ ] #PF: supervisor write access in kernel mode [ ] #PF: error_code(0x0002) - not-present page [ ] Oops: Oops: 0002 [#1] SMP NOPTI [ ] Tainted: [W]=WARN [ ] Workqueue: xe-ggtt-wq ggtt_node_remove_work_func [xe] [ ] RIP: 0010:xe_ggtt_set_pte+0x6d/0x350 [xe] [ ] Call Trace: [ ] [ ] xe_ggtt_clear+0xb0/0x270 [xe] [ ] ggtt_node_remove+0xbb/0x120 [xe] [ ] ggtt_node_remove_work_func+0x30/0x50 [xe] [ ] process_one_work+0x22b/0x6f0 [ ] worker_thread+0x1e8/0x3d Add managed-device action that will explicitly drain the workqueue with all pending node removals prior to releasing MMIO/GSM mapping. Fixes: 919bb54e989c ("drm/xe: Fix missing runtime outer protection for ggtt_remove_node") Signed-off-by: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250612220937.857-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_ggtt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7b11fa1356f0..a8830cdb185f 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -238,6 +238,13 @@ int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size) } EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit); +static void dev_fini_ggtt(void *arg) +{ + struct xe_ggtt *ggtt = arg; + + drain_workqueue(ggtt->wq); +} + /** * xe_ggtt_init_early - Early GGTT initialization * @ggtt: the &xe_ggtt to be initialized @@ -290,6 +297,10 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (err) return err; + err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt); + if (err) + return err; + if (IS_SRIOV_VF(xe)) { err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile); if (err) From 2ddbb73ec20b98e70a5200cb85deade22ccea2ec Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 13 Jun 2025 00:09:37 +0200 Subject: [PATCH 047/358] drm/xe/guc: Explicitly exit CT safe mode on unwind During driver probe we might be briefly using CT safe mode, which is based on a delayed work, but usually we are able to stop this once we have IRQ fully operational. However, if we abort the probe quite early then during unwind we might try to destroy the workqueue while there is still a pending delayed work that attempts to restart itself which triggers a WARN. This was recently observed during unsuccessful VF initialization: [ ] xe 0000:00:02.1: probe with driver xe failed with error -62 [ ] ------------[ cut here ]------------ [ ] workqueue: cannot queue safe_mode_worker_func [xe] on wq xe-g2h-wq [ ] WARNING: CPU: 9 PID: 0 at kernel/workqueue.c:2257 __queue_work+0x287/0x710 [ ] RIP: 0010:__queue_work+0x287/0x710 [ ] Call Trace: [ ] delayed_work_timer_fn+0x19/0x30 [ ] call_timer_fn+0xa1/0x2a0 Exit the CT safe mode on unwind to avoid that warning. Fixes: 09b286950f29 ("drm/xe/guc: Allow CTB G2H processing without G2H IRQ") Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250612220937.857-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 37509f619503..bc4646964c40 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -35,6 +35,11 @@ #include "xe_pm.h" #include "xe_trace_guc.h" +static void receive_g2h(struct xe_guc_ct *ct); +static void g2h_worker_func(struct work_struct *w); +static void safe_mode_worker_func(struct work_struct *w); +static void ct_exit_safe_mode(struct xe_guc_ct *ct); + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) enum { /* Internal states, not error conditions */ @@ -189,14 +194,11 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; + ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); } -static void receive_g2h(struct xe_guc_ct *ct); -static void g2h_worker_func(struct work_struct *w); -static void safe_mode_worker_func(struct work_struct *w); - static void primelockdep(struct xe_guc_ct *ct) { if (!IS_ENABLED(CONFIG_LOCKDEP)) From 1b8233bb24fc703cf3246da850e8f8e6a1cdc5b9 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Jun 2025 15:23:22 +0200 Subject: [PATCH 048/358] gpu: nova-core: impl From for u32 for enums used from register! Implement From for u32 for all enum types used within the register!() macro. This avoids a conflict with [1] as reported in [2]. Cc: Alexandre Courbot Cc: Miguel Ojeda Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-5-f43b024581e8@gmail.com [1] Link: https://lore.kernel.org/all/20250624173114.3be38990@canb.auug.org.au/ [2] Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250624132337.2242-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/falcon.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 07be1c30668c..5dac395b139f 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -20,6 +20,17 @@ mod hal; pub(crate) mod sec2; +// TODO[FPRI]: Replace with `ToPrimitive`. +macro_rules! impl_from_enum_to_u32 { + ($enum_type:ty) => { + impl From<$enum_type> for u32 { + fn from(value: $enum_type) -> Self { + value as u32 + } + } + }; +} + /// Revision number of a falcon core, used in the [`crate::regs::NV_PFALCON_FALCON_HWCFG1`] /// register. #[repr(u8)] @@ -34,6 +45,7 @@ pub(crate) enum FalconCoreRev { Rev6 = 6, Rev7 = 7, } +impl_from_enum_to_u32!(FalconCoreRev); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconCoreRev { @@ -68,6 +80,7 @@ pub(crate) enum FalconCoreRevSubversion { Subversion2 = 2, Subversion3 = 3, } +impl_from_enum_to_u32!(FalconCoreRevSubversion); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconCoreRevSubversion { @@ -102,6 +115,7 @@ pub(crate) enum FalconSecurityModel { /// High-Secure: runs signed code with full privileges. Signature is validated by boot ROM. Heavy = 3, } +impl_from_enum_to_u32!(FalconSecurityModel); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconSecurityModel { @@ -130,6 +144,7 @@ pub(crate) enum FalconModSelAlgo { #[default] Rsa3k = 1, } +impl_from_enum_to_u32!(FalconModSelAlgo); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconModSelAlgo { @@ -151,6 +166,7 @@ pub(crate) enum DmaTrfCmdSize { #[default] Size256B = 0x6, } +impl_from_enum_to_u32!(DmaTrfCmdSize); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for DmaTrfCmdSize { @@ -173,6 +189,7 @@ pub(crate) enum PeregrineCoreSelect { /// RISC-V core is active. Riscv = 1, } +impl_from_enum_to_u32!(PeregrineCoreSelect); impl From for PeregrineCoreSelect { fn from(value: bool) -> Self { @@ -203,6 +220,7 @@ pub(crate) enum FalconFbifTarget { /// Non-coherent system memory. NoncoherentSysmem = 2, } +impl_from_enum_to_u32!(FalconFbifTarget); // TODO[FPRI]: replace with `FromPrimitive`. impl TryFrom for FalconFbifTarget { @@ -229,6 +247,7 @@ pub(crate) enum FalconFbifMemType { /// Physical memory addresses. Physical = 1, } +impl_from_enum_to_u32!(FalconFbifMemType); /// Conversion from a single-bit register field. impl From for FalconFbifMemType { From 43ad65eca2a165dd94faf7761cbcfc4877a31958 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Jun 2025 15:23:23 +0200 Subject: [PATCH 049/358] gpu: nova-core: consider `clippy::cast_lossless` Fix all warnings caused by `clippy::cast_lossless`, which is going to be enabled by [1]. Cc: Alexandre Courbot Cc: Miguel Ojeda Link: https://lore.kernel.org/r/20250615-ptr-as-ptr-v12-5-f43b024581e8@gmail.com [1] Reviewed-by: Alexandre Courbot Tested-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250624132337.2242-2-dakr@kernel.org Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/falcon.rs | 2 +- drivers/gpu/nova-core/falcon/hal/ga102.rs | 2 +- drivers/gpu/nova-core/fb/hal/ga100.rs | 4 ++-- drivers/gpu/nova-core/fb/hal/tu102.rs | 2 +- drivers/gpu/nova-core/firmware/fwsec.rs | 2 +- drivers/gpu/nova-core/regs.rs | 8 ++++---- drivers/gpu/nova-core/vbios.rs | 8 ++++---- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index 5dac395b139f..be4bf59422c6 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -428,7 +428,7 @@ fn dma_wr>( fw.dma_handle_with_offset(load_offsets.src_start as usize)?, ), }; - if dma_start % DMA_LEN as bindings::dma_addr_t > 0 { + if dma_start % bindings::dma_addr_t::from(DMA_LEN) > 0 { dev_err!( self.dev, "DMA transfer start addresses must be a multiple of {}", diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 664327f75cf4..0344cd33e6ea 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -78,7 +78,7 @@ fn program_brom_ga102(bar: &Bar0, params: &FalconBromParams) -> .set_value(params.pkc_data_offset) .write(bar, E::BASE); regs::NV_PFALCON2_FALCON_BROM_ENGIDMASK::default() - .set_value(params.engine_id_mask as u32) + .set_value(u32::from(params.engine_id_mask)) .write(bar, E::BASE); regs::NV_PFALCON2_FALCON_BROM_CURR_UCODE_ID::default() .set_ucode_id(params.ucode_id) diff --git a/drivers/gpu/nova-core/fb/hal/ga100.rs b/drivers/gpu/nova-core/fb/hal/ga100.rs index 4827721c9860..871c42bf033a 100644 --- a/drivers/gpu/nova-core/fb/hal/ga100.rs +++ b/drivers/gpu/nova-core/fb/hal/ga100.rs @@ -11,8 +11,8 @@ use super::tu102::FLUSH_SYSMEM_ADDR_SHIFT; pub(super) fn read_sysmem_flush_page_ga100(bar: &Bar0) -> u64 { - (regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08() as u64) << FLUSH_SYSMEM_ADDR_SHIFT - | (regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40() as u64) + u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT + | u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR_HI::read(bar).adr_63_40()) << FLUSH_SYSMEM_ADDR_SHIFT_HI } diff --git a/drivers/gpu/nova-core/fb/hal/tu102.rs b/drivers/gpu/nova-core/fb/hal/tu102.rs index 6f8ae58e9481..b022c781caf4 100644 --- a/drivers/gpu/nova-core/fb/hal/tu102.rs +++ b/drivers/gpu/nova-core/fb/hal/tu102.rs @@ -10,7 +10,7 @@ pub(super) const FLUSH_SYSMEM_ADDR_SHIFT: u32 = 8; pub(super) fn read_sysmem_flush_page_gm107(bar: &Bar0) -> u64 { - (regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08() as u64) << FLUSH_SYSMEM_ADDR_SHIFT + u64::from(regs::NV_PFB_NISO_FLUSH_SYSMEM_ADDR::read(bar).adr_39_08()) << FLUSH_SYSMEM_ADDR_SHIFT } pub(super) fn write_sysmem_flush_page_gm107(bar: &Bar0, addr: u64) -> Result { diff --git a/drivers/gpu/nova-core/firmware/fwsec.rs b/drivers/gpu/nova-core/firmware/fwsec.rs index 047aab76470e..0dff3cfa90af 100644 --- a/drivers/gpu/nova-core/firmware/fwsec.rs +++ b/drivers/gpu/nova-core/firmware/fwsec.rs @@ -346,7 +346,7 @@ pub(crate) fn new( let desc = bios.fwsec_image().header(dev)?; let ucode_signed = if desc.signature_count != 0 { let sig_base_img = (desc.imem_load_size + desc.pkc_data_offset) as usize; - let desc_sig_versions = desc.signature_versions as u32; + let desc_sig_versions = u32::from(desc.signature_versions); let reg_fuse_version = falcon.signature_reg_fuse_version(bar, desc.engine_id_mask, desc.ucode_id)?; dev_dbg!( diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 707f87d6828d..e8b8aabce3f3 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -68,7 +68,7 @@ pub(crate) fn chipset(self) -> Result { impl NV_PFB_PRI_MMU_LOCAL_MEMORY_RANGE { /// Returns the usable framebuffer size, in bytes. pub(crate) fn usable_fb_size(self) -> u64 { - let size = ((self.lower_mag() as u64) << (self.lower_scale() as u64)) + let size = (u64::from(self.lower_mag()) << u64::from(self.lower_scale())) * kernel::sizes::SZ_1M as u64; if self.ecc_mode_enabled() { @@ -87,7 +87,7 @@ pub(crate) fn usable_fb_size(self) -> u64 { impl NV_PFB_PRI_MMU_WPR2_ADDR_LO { /// Returns the lower (inclusive) bound of the WPR2 region. pub(crate) fn lower_bound(self) -> u64 { - (self.lo_val() as u64) << 12 + u64::from(self.lo_val()) << 12 } } @@ -100,7 +100,7 @@ impl NV_PFB_PRI_MMU_WPR2_ADDR_HI { /// /// A value of zero means the WPR2 region is not set. pub(crate) fn higher_bound(self) -> u64 { - (self.hi_val() as u64) << 12 + u64::from(self.hi_val()) << 12 } } @@ -158,7 +158,7 @@ impl NV_PDISP_VGA_WORKSPACE_BASE { /// Returns the base address of the VGA workspace, or `None` if none exists. pub(crate) fn vga_workspace_addr(self) -> Option { if self.status_valid() { - Some((self.addr() as u64) << 16) + Some(u64::from(self.addr()) << 16) } else { None } diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index feb80c847077..663fc50e8b66 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -494,10 +494,10 @@ fn new(pdev: &pci::Device, data: &[u8]) -> Result { if data.len() >= 30 { // Read size_of_block at offset 0x1A. size_of_block = Some( - (data[29] as u32) << 24 - | (data[28] as u32) << 16 - | (data[27] as u32) << 8 - | (data[26] as u32), + u32::from(data[29]) << 24 + | u32::from(data[28]) << 16 + | u32::from(data[27]) << 8 + | u32::from(data[26]), ); } From 771f002ef1d6f6c2b9bddf779abd31da6b9ccd25 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Sun, 8 Jun 2025 23:01:33 +0000 Subject: [PATCH 050/358] drm/xe/uapi: Correct sync type definition in comments Commit 37d078e51b4c ("drm/xe/uapi: Split xe_sync types from flags") renamed some DRM_XE_SYNC_* defines but later commits kept using the old names. Correct them with the new definition. v2: correct fixes tag and update commit message to explain why (Lucas) Fixes: 9329f0667215 ("drm/xe/uapi: Use LR abbrev for long-running vms") Fixes: 4b437893a826 ("drm/xe/uapi: More uAPI documentation additions and cosmetic updates") Reviewed-by: Lucas De Marchi Cc: Rodrigo Vivi Cc: Francois Dugast Cc: Zongyao Bai Signed-off-by: Shuicheng Lin Link: https://lore.kernel.org/r/20250608230133.1250849-1-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi --- include/uapi/drm/xe_drm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 8e8bbdec8c5c..e2426413488f 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -925,9 +925,9 @@ struct drm_xe_gem_mmap_offset { * - %DRM_XE_VM_CREATE_FLAG_LR_MODE - An LR, or Long Running VM accepts * exec submissions to its exec_queues that don't have an upper time * limit on the job execution time. But exec submissions to these - * don't allow any of the flags DRM_XE_SYNC_FLAG_SYNCOBJ, - * DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ, DRM_XE_SYNC_FLAG_DMA_BUF, - * used as out-syncobjs, that is, together with DRM_XE_SYNC_FLAG_SIGNAL. + * don't allow any of the sync types DRM_XE_SYNC_TYPE_SYNCOBJ, + * DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ, used as out-syncobjs, that is, + * together with sync flag DRM_XE_SYNC_FLAG_SIGNAL. * LR VMs can be created in recoverable page-fault mode using * DRM_XE_VM_CREATE_FLAG_FAULT_MODE, if the device supports it. * If that flag is omitted, the UMD can not rely on the slightly @@ -1394,7 +1394,7 @@ struct drm_xe_sync { /** * @timeline_value: Input for the timeline sync object. Needs to be - * different than 0 when used with %DRM_XE_SYNC_FLAG_TIMELINE_SYNCOBJ. + * different than 0 when used with %DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ. */ __u64 timeline_value; From dfe6c281323fea9f091f0d30a02579c072eb963c Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 24 Jun 2025 17:12:03 -0700 Subject: [PATCH 051/358] Revert "drm/xe/ptl: Apply Wa_16026007364" This reverts commit 3972872e459d812ab5e481a231a6066cf4f4d0f4. There are several things wrong with the way this WA was implemented: - The KLV is only supported on GuC 70.47.0 or newer, so we shouldn't apply it unconditionally. - The KLV requires 2 DWs of data, which are not currently provided. The GuC currently ignores any unknown KLVs, so on versions older that 70.47.0 nothing happens. However, starting on 70.47.0 the GuC attempts to parse the KLV and fails due to the missing data, causing a GuC load abort. Given that 70.47.0 is the first GuC version approved for public release for PTL, let's revert this patch so it doesn't cause the GuC load to fail with that blob. We can then re-apply it properly fixed after the GuC definition is merged, which will also have the added benefit of running the KLV addition through CI with the right GuC version. Fixes: 3972872e459d ("drm/xe/ptl: Apply Wa_16026007364") Signed-off-by: Daniele Ceraolo Spurio Cc: sanirban Cc: Badal Nilawar Cc: Rodrigo Vivi Link: https://lore.kernel.org/r/20250625001202.1616606-2-daniele.ceraolospurio@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 1 - drivers/gpu/drm/xe/xe_guc_ads.c | 5 ----- drivers/gpu/drm/xe/xe_wa_oob.rules | 1 - 3 files changed, 7 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index e2583761a70d..7de8f827281f 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -368,7 +368,6 @@ enum xe_guc_klv_ids { GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009, GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a, GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b, - GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG = 0x900c, }; #endif diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index f3c74f9a40c6..07a027755627 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -382,11 +382,6 @@ static void guc_waklv_init(struct xe_guc_ads *ads) GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH, &offset, &remain); - if (XE_WA(gt, 16026007364)) - guc_waklv_enable_simple(ads, - GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG, - &offset, &remain); - size = guc_ads_waklv_size(ads) - remain; if (!size) return; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 8c2aa48cb33a..96cc33da0fb5 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -70,4 +70,3 @@ no_media_l3 MEDIA_VERSION(3000) # SoC workaround - currently applies to all platforms with the following # primary GT GMDID 14022085890 GRAPHICS_VERSION(2001) -16026007364 MEDIA_VERSION(3000) From 5a2f117a80c207372513ca8964eeb178874f4990 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 24 Jun 2025 10:41:03 -0700 Subject: [PATCH 052/358] drm/xe: Do not wedge device on killed exec queues When a user closes an exec queue or interrupts an app with Ctrl-C, this does not warrant wedging the device in mode 2. Avoid this by skipping the wedge check for killed exec queues in the TDR and LR exec queue cleanup worker. Signed-off-by: Matthew Brost Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250624174103.2707941-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_guc_submit.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index df7a5a4eec74..72477ccc5c5e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -908,12 +908,13 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); struct xe_gpu_scheduler *sched = &ge->sched; - bool wedged; + bool wedged = false; xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Kill the run_job / process_msg entry points */ xe_sched_submission_stop(sched); @@ -1084,7 +1085,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) int err = -ETIME; pid_t pid = -1; int i = 0; - bool wedged, skip_timeout_check; + bool wedged = false, skip_timeout_check; /* * TDR has fired before free job worker. Common if exec queue @@ -1130,7 +1131,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * doesn't work for SRIOV. For now assuming timeouts in wedged mode are * genuine timeouts. */ - wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); + if (!exec_queue_killed(q)) + wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); /* Engine state now stable, disable scheduling to check timestamp */ if (!wedged && exec_queue_registered(q)) { From d612799de75b0cddbc00e2261355377493d74d45 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Tue, 24 Jun 2025 19:05:16 +0200 Subject: [PATCH 053/358] MAINTAINERS: Add Alexandre Courbot as co-maintainer to nova-core Alex has extensive experience with NVIDIA GPU support, is closely involved in nova-core development, and brings valuable Rust expertise. In addition to his technical contributions, Alex is actively engaged with the broader community. He works closely with contributors and stakeholders to identify and solve issues, fosters collaboration, and helps ensure that the Nova driver evolves in alignment with the community. I trust him to take a key role in making the Nova driver a successful upstream solution for NVIDIA GPUs in the Linux kernel. Cc: Alexandre Courbot Acked-by: Alexandre Courbot Acked-by: John Hubbard Link: https://lore.kernel.org/r/20250624170517.105148-1-dakr@kernel.org Signed-off-by: Danilo Krummrich --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0c1d245bf7b8..e694d06dc69f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7718,6 +7718,7 @@ F: include/uapi/drm/nouveau_drm.h CORE DRIVER FOR NVIDIA GPUS [RUST] M: Danilo Krummrich +M: Alexandre Courbot L: nouveau@lists.freedesktop.org S: Supported Q: https://patchwork.freedesktop.org/project/nouveau/ From 4092e1b41202ff39aad75a40a03ac1d318443670 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 24 Jun 2025 21:59:19 +0900 Subject: [PATCH 054/358] gpu: nova-core: replace `Duration` with `Delta` The kernel's `Delta` type was not available when the `wait_on` function was introduced. Now that it is, switch to it as it is more compact than `Duration` and cannot panic. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250624-nova-delta-v1-1-b37d75a593ac@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/falcon.rs | 12 ++++++------ drivers/gpu/nova-core/falcon/hal/ga102.rs | 4 ++-- drivers/gpu/nova-core/gfw.rs | 5 ++--- drivers/gpu/nova-core/util.rs | 8 +++----- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index be4bf59422c6..c2c6f9eb380a 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -3,11 +3,11 @@ //! Falcon microprocessor base support use core::ops::Deref; -use core::time::Duration; use hal::FalconHal; use kernel::bindings; use kernel::device; use kernel::prelude::*; +use kernel::time::Delta; use kernel::types::ARef; use crate::dma::DmaObject; @@ -353,7 +353,7 @@ pub(crate) fn new( /// Wait for memory scrubbing to complete. fn reset_wait_mem_scrubbing(&self, bar: &Bar0) -> Result { // TIMEOUT: memory scrubbing should complete in less than 20ms. - util::wait_on(Duration::from_millis(20), || { + util::wait_on(Delta::from_millis(20), || { if regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE).mem_scrubbing_done() { Some(()) } else { @@ -368,7 +368,7 @@ fn reset_eng(&self, bar: &Bar0) -> Result { // According to OpenRM's `kflcnPreResetWait_GA102` documentation, HW sometimes does not set // RESET_READY so a non-failing timeout is used. - let _ = util::wait_on(Duration::from_micros(150), || { + let _ = util::wait_on(Delta::from_micros(150), || { let r = regs::NV_PFALCON_FALCON_HWCFG2::read(bar, E::BASE); if r.reset_ready() { Some(()) @@ -381,7 +381,7 @@ fn reset_eng(&self, bar: &Bar0) -> Result { // TODO[DLAY]: replace with udelay() or equivalent once available. // TIMEOUT: falcon engine should not take more than 10us to reset. - let _: Result = util::wait_on(Duration::from_micros(10), || None); + let _: Result = util::wait_on(Delta::from_micros(10), || None); regs::NV_PFALCON_FALCON_ENGINE::alter(bar, E::BASE, |v| v.set_reset(false)); @@ -472,7 +472,7 @@ fn dma_wr>( // Wait for the transfer to complete. // TIMEOUT: arbitrarily large value, no DMA transfer to the falcon's small memories // should ever take that long. - util::wait_on(Duration::from_secs(2), || { + util::wait_on(Delta::from_secs(2), || { let r = regs::NV_PFALCON_FALCON_DMATRFCMD::read(bar, E::BASE); if r.idle() { Some(()) @@ -542,7 +542,7 @@ pub(crate) fn boot( } // TIMEOUT: arbitrarily large value, firmwares should complete in less than 2 seconds. - util::wait_on(Duration::from_secs(2), || { + util::wait_on(Delta::from_secs(2), || { let r = regs::NV_PFALCON_FALCON_CPUCTL::read(bar, E::BASE); if r.halted() { Some(()) diff --git a/drivers/gpu/nova-core/falcon/hal/ga102.rs b/drivers/gpu/nova-core/falcon/hal/ga102.rs index 0344cd33e6ea..52c33d3f22a8 100644 --- a/drivers/gpu/nova-core/falcon/hal/ga102.rs +++ b/drivers/gpu/nova-core/falcon/hal/ga102.rs @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 use core::marker::PhantomData; -use core::time::Duration; use kernel::device; use kernel::prelude::*; +use kernel::time::Delta; use crate::driver::Bar0; use crate::falcon::{ @@ -23,7 +23,7 @@ fn select_core_ga102(bar: &Bar0) -> Result { .write(bar, E::BASE); // TIMEOUT: falcon core should take less than 10ms to report being enabled. - util::wait_on(Duration::from_millis(10), || { + util::wait_on(Delta::from_millis(10), || { let r = regs::NV_PRISCV_RISCV_BCR_CTRL::read(bar, E::BASE); if r.valid() { Some(()) diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs index ce03ac9f4d9d..d5b68e02d405 100644 --- a/drivers/gpu/nova-core/gfw.rs +++ b/drivers/gpu/nova-core/gfw.rs @@ -6,10 +6,9 @@ //! the GPU is considered unusable until this step is completed, so we must wait on it before //! performing driver initialization. -use core::time::Duration; - use kernel::bindings; use kernel::prelude::*; +use kernel::time::Delta; use crate::driver::Bar0; use crate::regs; @@ -19,7 +18,7 @@ pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of // reset, and should complete in less time than that. - util::wait_on(Duration::from_secs(4), || { + util::wait_on(Delta::from_secs(4), || { // Check that FWSEC has lowered its protection level before reading the GFW_BOOT // status. let gfw_booted = regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) diff --git a/drivers/gpu/nova-core/util.rs b/drivers/gpu/nova-core/util.rs index 5cafe0797cd6..64fb13760764 100644 --- a/drivers/gpu/nova-core/util.rs +++ b/drivers/gpu/nova-core/util.rs @@ -1,9 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -use core::time::Duration; - use kernel::prelude::*; -use kernel::time::Instant; +use kernel::time::{Delta, Instant}; pub(crate) const fn to_lowercase_bytes(s: &str) -> [u8; N] { let src = s.as_bytes(); @@ -34,7 +32,7 @@ pub(crate) const fn const_bytes_to_str(bytes: &[u8]) -> &str { /// /// TODO[DLAY]: replace with `read_poll_timeout` once it is available. /// (https://lore.kernel.org/lkml/20250220070611.214262-8-fujita.tomonori@gmail.com/) -pub(crate) fn wait_on Option>(timeout: Duration, cond: F) -> Result { +pub(crate) fn wait_on Option>(timeout: Delta, cond: F) -> Result { let start_time = Instant::now(); loop { @@ -42,7 +40,7 @@ pub(crate) fn wait_on Option>(timeout: Duration, cond: F) -> Re return Ok(ret); } - if start_time.elapsed().as_nanos() > timeout.as_nanos() as i64 { + if start_time.elapsed().as_nanos() > timeout.as_nanos() { return Err(ETIMEDOUT); } } From f86ad0ed620cb3c91ec7d5468e93ac68d727539d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 19 Jun 2025 15:40:33 +0200 Subject: [PATCH 055/358] drm/gpusvm, drm/pagemap: Move migration functionality to drm_pagemap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The migration functionality and track-keeping of per-pagemap VRAM mapped to the CPU mm is not per GPU_vm, but rather per pagemap. This is also reflected by the functions not needing the drm_gpusvm structures. So move to drm_pagemap. With this, drm_gpusvm shouldn't really access the page zone-device-data since its meaning is internal to drm_pagemap. Currently it's used to reject mapping ranges backed by multiple drm_pagemap allocations. For now, make the zone-device-data a void pointer. Alter the interface of drm_gpusvm_migrate_to_devmem() to ensure we don't pass a gpusvm pointer. Rename CONFIG_DRM_XE_DEVMEM_MIRROR to CONFIG_DRM_XE_PAGEMAP. Matt is listed as author of this commit since he wrote most of the code, and it makes sense to retain his git authorship. Thomas mostly moved the code around. v3: - Kerneldoc fixes (CI) - Don't update documentation about how the drm_pagemap migration should be interpreted until upcoming patches where the functionality is implemented. (Matt Brost) v4: - More kerneldoc fixes around timeslice_ms (Himal Ghimiray, Matt Brost) v6: - Fix an uninitialized pagemap pointer (CI) Co-developed-by: Thomas Hellström Signed-off-by: Thomas Hellström Reviewed-by: Himal Prasad Ghimiray Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250619134035.170086-2-thomas.hellstrom@linux.intel.com --- Documentation/gpu/rfc/gpusvm.rst | 12 +- drivers/gpu/drm/Makefile | 6 +- drivers/gpu/drm/drm_gpusvm.c | 761 +------------------------ drivers/gpu/drm/drm_pagemap.c | 794 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/Kconfig | 10 +- drivers/gpu/drm/xe/xe_bo_types.h | 2 +- drivers/gpu/drm/xe/xe_device_types.h | 2 +- drivers/gpu/drm/xe/xe_svm.c | 47 +- include/drm/drm_gpusvm.h | 96 ---- include/drm/drm_pagemap.h | 101 ++++ 10 files changed, 958 insertions(+), 873 deletions(-) create mode 100644 drivers/gpu/drm/drm_pagemap.c diff --git a/Documentation/gpu/rfc/gpusvm.rst b/Documentation/gpu/rfc/gpusvm.rst index bcf66a8137a6..469db1372f16 100644 --- a/Documentation/gpu/rfc/gpusvm.rst +++ b/Documentation/gpu/rfc/gpusvm.rst @@ -73,15 +73,21 @@ Overview of baseline design .. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Locking -.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c - :doc: Migration - .. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Partial Unmapping of Ranges .. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Examples +Overview of drm_pagemap design +============================== + +.. kernel-doc:: drivers/gpu/drm/drm_pagemap.c + :doc: Overview + +.. kernel-doc:: drivers/gpu/drm/drm_pagemap.c + :doc: Migration + Possible future design features =============================== diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 5050ac32bba2..4dafbdc8f86a 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -104,7 +104,11 @@ obj-$(CONFIG_DRM_PANEL_BACKLIGHT_QUIRKS) += drm_panel_backlight_quirks.o # obj-$(CONFIG_DRM_EXEC) += drm_exec.o obj-$(CONFIG_DRM_GPUVM) += drm_gpuvm.o -obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm.o + +drm_gpusvm_helper-y := \ + drm_gpusvm.o\ + drm_pagemap.o +obj-$(CONFIG_DRM_GPUSVM) += drm_gpusvm_helper.o obj-$(CONFIG_DRM_BUDDY) += drm_buddy.o diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index ab198645d90f..e454bb806c72 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -9,10 +9,9 @@ #include #include #include +#include #include -#include #include -#include #include #include @@ -108,21 +107,6 @@ * to add annotations to GPU SVM. */ -/** - * DOC: Migration - * - * The migration support is quite simple, allowing migration between RAM and - * device memory at the range granularity. For example, GPU SVM currently does - * not support mixing RAM and device memory pages within a range. This means - * that upon GPU fault, the entire range can be migrated to device memory, and - * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device - * memory storage within a range could be added in the future if required. - * - * The reasoning for only supporting range granularity is as follows: it - * simplifies the implementation, and range sizes are driver-defined and should - * be relatively small. - */ - /** * DOC: Partial Unmapping of Ranges * @@ -194,10 +178,10 @@ * if (driver_migration_policy(range)) { * mmap_read_lock(mm); * devmem = driver_alloc_devmem(); - * err = drm_gpusvm_migrate_to_devmem(gpusvm, range, - * devmem_allocation, - * &ctx); - * mmap_read_unlock(mm); + * err = drm_pagemap_migrate_to_devmem(devmem, gpusvm->mm, gpuva_start, + * gpuva_end, ctx->timeslice_ms, + * driver_pgmap_owner()); + * mmap_read_unlock(mm); * if (err) // CPU mappings may have changed * goto retry; * } @@ -289,97 +273,6 @@ npages_in_range(unsigned long start, unsigned long end) return (end - start) >> PAGE_SHIFT; } -/** - * struct drm_gpusvm_zdd - GPU SVM zone device data - * - * @refcount: Reference count for the zdd - * @devmem_allocation: device memory allocation - * @device_private_page_owner: Device private pages owner - * - * This structure serves as a generic wrapper installed in - * page->zone_device_data. It provides infrastructure for looking up a device - * memory allocation upon CPU page fault and asynchronously releasing device - * memory once the CPU has no page references. Asynchronous release is useful - * because CPU page references can be dropped in IRQ contexts, while releasing - * device memory likely requires sleeping locks. - */ -struct drm_gpusvm_zdd { - struct kref refcount; - struct drm_gpusvm_devmem *devmem_allocation; - void *device_private_page_owner; -}; - -/** - * drm_gpusvm_zdd_alloc() - Allocate a zdd structure. - * @device_private_page_owner: Device private pages owner - * - * This function allocates and initializes a new zdd structure. It sets up the - * reference count and initializes the destroy work. - * - * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. - */ -static struct drm_gpusvm_zdd * -drm_gpusvm_zdd_alloc(void *device_private_page_owner) -{ - struct drm_gpusvm_zdd *zdd; - - zdd = kmalloc(sizeof(*zdd), GFP_KERNEL); - if (!zdd) - return NULL; - - kref_init(&zdd->refcount); - zdd->devmem_allocation = NULL; - zdd->device_private_page_owner = device_private_page_owner; - - return zdd; -} - -/** - * drm_gpusvm_zdd_get() - Get a reference to a zdd structure. - * @zdd: Pointer to the zdd structure. - * - * This function increments the reference count of the provided zdd structure. - * - * Return: Pointer to the zdd structure. - */ -static struct drm_gpusvm_zdd *drm_gpusvm_zdd_get(struct drm_gpusvm_zdd *zdd) -{ - kref_get(&zdd->refcount); - return zdd; -} - -/** - * drm_gpusvm_zdd_destroy() - Destroy a zdd structure. - * @ref: Pointer to the reference count structure. - * - * This function queues the destroy_work of the zdd for asynchronous destruction. - */ -static void drm_gpusvm_zdd_destroy(struct kref *ref) -{ - struct drm_gpusvm_zdd *zdd = - container_of(ref, struct drm_gpusvm_zdd, refcount); - struct drm_gpusvm_devmem *devmem = zdd->devmem_allocation; - - if (devmem) { - complete_all(&devmem->detached); - if (devmem->ops->devmem_release) - devmem->ops->devmem_release(devmem); - } - kfree(zdd); -} - -/** - * drm_gpusvm_zdd_put() - Put a zdd reference. - * @zdd: Pointer to the zdd structure. - * - * This function decrements the reference count of the provided zdd structure - * and schedules its destruction if the count drops to zero. - */ -static void drm_gpusvm_zdd_put(struct drm_gpusvm_zdd *zdd) -{ - kref_put(&zdd->refcount, drm_gpusvm_zdd_destroy); -} - /** * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier * @notifier: Pointer to the GPU SVM notifier structure. @@ -946,7 +839,7 @@ drm_gpusvm_range_chunk_size(struct drm_gpusvm *gpusvm, * process-many-malloc' fails. In the failure case, each process * mallocs 16k but the CPU VMA is ~128k which results in 64k SVM * ranges. When migrating the SVM ranges, some processes fail in - * drm_gpusvm_migrate_to_devmem with 'migrate.cpages != npages' + * drm_pagemap_migrate_to_devmem with 'migrate.cpages != npages' * and then upon drm_gpusvm_range_get_pages device pages from * other processes are collected + faulted in which creates all * sorts of problems. Unsure exactly how this happening, also @@ -1364,7 +1257,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, .dev_private_owner = gpusvm->device_private_page_owner, }; struct mm_struct *mm = gpusvm->mm; - struct drm_gpusvm_zdd *zdd; + void *zdd; unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); unsigned long i, j; @@ -1447,6 +1340,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, } zdd = NULL; + pagemap = NULL; num_dma_mapped = 0; for (i = 0, j = 0; i < npages; ++j) { struct page *page = hmm_pfn_to_page(pfns[i]); @@ -1466,7 +1360,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, } pagemap = page_pgmap(page); - dpagemap = zdd->devmem_allocation->dpagemap; + dpagemap = drm_pagemap_page_to_dpagemap(page); if (drm_WARN_ON(gpusvm->drm, !dpagemap)) { /* * Raced. This is not supposed to happen @@ -1490,7 +1384,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, } else { dma_addr_t addr; - if (is_zone_device_page(page) || zdd) { + if (is_zone_device_page(page) || pagemap) { err = -EOPNOTSUPP; goto err_unmap; } @@ -1518,7 +1412,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm, flags.has_dma_mapping = true; } - if (zdd) { + if (pagemap) { flags.has_devmem_pages = true; range->dpagemap = dpagemap; } @@ -1546,6 +1440,7 @@ EXPORT_SYMBOL_GPL(drm_gpusvm_range_get_pages); /** * drm_gpusvm_range_unmap_pages() - Unmap pages associated with a GPU SVM range + * drm_gpusvm_range_evict() - Evict GPU SVM range * @gpusvm: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range structure * @ctx: GPU SVM context @@ -1576,562 +1471,11 @@ void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, EXPORT_SYMBOL_GPL(drm_gpusvm_range_unmap_pages); /** - * drm_gpusvm_migration_unlock_put_page() - Put a migration page - * @page: Pointer to the page to put - * - * This function unlocks and puts a page. - */ -static void drm_gpusvm_migration_unlock_put_page(struct page *page) -{ - unlock_page(page); - put_page(page); -} - -/** - * drm_gpusvm_migration_unlock_put_pages() - Put migration pages - * @npages: Number of pages - * @migrate_pfn: Array of migrate page frame numbers - * - * This function unlocks and puts an array of pages. - */ -static void drm_gpusvm_migration_unlock_put_pages(unsigned long npages, - unsigned long *migrate_pfn) -{ - unsigned long i; - - for (i = 0; i < npages; ++i) { - struct page *page; - - if (!migrate_pfn[i]) - continue; - - page = migrate_pfn_to_page(migrate_pfn[i]); - drm_gpusvm_migration_unlock_put_page(page); - migrate_pfn[i] = 0; - } -} - -/** - * drm_gpusvm_get_devmem_page() - Get a reference to a device memory page - * @page: Pointer to the page - * @zdd: Pointer to the GPU SVM zone device data - * - * This function associates the given page with the specified GPU SVM zone - * device data and initializes it for zone device usage. - */ -static void drm_gpusvm_get_devmem_page(struct page *page, - struct drm_gpusvm_zdd *zdd) -{ - page->zone_device_data = drm_gpusvm_zdd_get(zdd); - zone_device_page_init(page); -} - -/** - * drm_gpusvm_migrate_map_pages() - Map migration pages for GPU SVM migration - * @dev: The device for which the pages are being mapped - * @dma_addr: Array to store DMA addresses corresponding to mapped pages - * @migrate_pfn: Array of migrate page frame numbers to map - * @npages: Number of pages to map - * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) - * - * This function maps pages of memory for migration usage in GPU SVM. It - * iterates over each page frame number provided in @migrate_pfn, maps the - * corresponding page, and stores the DMA address in the provided @dma_addr - * array. - * - * Return: 0 on success, -EFAULT if an error occurs during mapping. - */ -static int drm_gpusvm_migrate_map_pages(struct device *dev, - dma_addr_t *dma_addr, - unsigned long *migrate_pfn, - unsigned long npages, - enum dma_data_direction dir) -{ - unsigned long i; - - for (i = 0; i < npages; ++i) { - struct page *page = migrate_pfn_to_page(migrate_pfn[i]); - - if (!page) - continue; - - if (WARN_ON_ONCE(is_zone_device_page(page))) - return -EFAULT; - - dma_addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); - if (dma_mapping_error(dev, dma_addr[i])) - return -EFAULT; - } - - return 0; -} - -/** - * drm_gpusvm_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration - * @dev: The device for which the pages were mapped - * @dma_addr: Array of DMA addresses corresponding to mapped pages - * @npages: Number of pages to unmap - * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) - * - * This function unmaps previously mapped pages of memory for GPU Shared Virtual - * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks - * if it's valid and not already unmapped, and unmaps the corresponding page. - */ -static void drm_gpusvm_migrate_unmap_pages(struct device *dev, - dma_addr_t *dma_addr, - unsigned long npages, - enum dma_data_direction dir) -{ - unsigned long i; - - for (i = 0; i < npages; ++i) { - if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) - continue; - - dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); - } -} - -/** - * drm_gpusvm_migrate_to_devmem() - Migrate GPU SVM range to device memory + * drm_gpusvm_range_evict() - Evict GPU SVM range * @gpusvm: Pointer to the GPU SVM structure - * @range: Pointer to the GPU SVM range structure - * @devmem_allocation: Pointer to the device memory allocation. The caller - * should hold a reference to the device memory allocation, - * which should be dropped via ops->devmem_release or upon - * the failure of this function. - * @ctx: GPU SVM context - * - * This function migrates the specified GPU SVM range to device memory. It - * performs the necessary setup and invokes the driver-specific operations for - * migration to device memory. Upon successful return, @devmem_allocation can - * safely reference @range until ops->devmem_release is called which only upon - * successful return. Expected to be called while holding the mmap lock in read - * mode. - * - * Return: 0 on success, negative error code on failure. - */ -int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range, - struct drm_gpusvm_devmem *devmem_allocation, - const struct drm_gpusvm_ctx *ctx) -{ - const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; - unsigned long start = drm_gpusvm_range_start(range), - end = drm_gpusvm_range_end(range); - struct migrate_vma migrate = { - .start = start, - .end = end, - .pgmap_owner = gpusvm->device_private_page_owner, - .flags = MIGRATE_VMA_SELECT_SYSTEM, - }; - struct mm_struct *mm = gpusvm->mm; - unsigned long i, npages = npages_in_range(start, end); - struct vm_area_struct *vas; - struct drm_gpusvm_zdd *zdd = NULL; - struct page **pages; - dma_addr_t *dma_addr; - void *buf; - int err; - - mmap_assert_locked(gpusvm->mm); - - if (!range->flags.migrate_devmem) - return -EINVAL; - - if (!ops->populate_devmem_pfn || !ops->copy_to_devmem || - !ops->copy_to_ram) - return -EOPNOTSUPP; - - vas = vma_lookup(mm, start); - if (!vas) { - err = -ENOENT; - goto err_out; - } - - if (end > vas->vm_end || start < vas->vm_start) { - err = -EINVAL; - goto err_out; - } - - if (!vma_is_anonymous(vas)) { - err = -EBUSY; - goto err_out; - } - - buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + - sizeof(*pages), GFP_KERNEL); - if (!buf) { - err = -ENOMEM; - goto err_out; - } - dma_addr = buf + (2 * sizeof(*migrate.src) * npages); - pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; - - zdd = drm_gpusvm_zdd_alloc(gpusvm->device_private_page_owner); - if (!zdd) { - err = -ENOMEM; - goto err_free; - } - - migrate.vma = vas; - migrate.src = buf; - migrate.dst = migrate.src + npages; - - err = migrate_vma_setup(&migrate); - if (err) - goto err_free; - - if (!migrate.cpages) { - err = -EFAULT; - goto err_free; - } - - if (migrate.cpages != npages) { - err = -EBUSY; - goto err_finalize; - } - - err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); - if (err) - goto err_finalize; - - err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, - migrate.src, npages, DMA_TO_DEVICE); - if (err) - goto err_finalize; - - for (i = 0; i < npages; ++i) { - struct page *page = pfn_to_page(migrate.dst[i]); - - pages[i] = page; - migrate.dst[i] = migrate_pfn(migrate.dst[i]); - drm_gpusvm_get_devmem_page(page, zdd); - } - - err = ops->copy_to_devmem(pages, dma_addr, npages); - if (err) - goto err_finalize; - - /* Upon success bind devmem allocation to range and zdd */ - devmem_allocation->timeslice_expiration = get_jiffies_64() + - msecs_to_jiffies(ctx->timeslice_ms); - zdd->devmem_allocation = devmem_allocation; /* Owns ref */ - -err_finalize: - if (err) - drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); - migrate_vma_pages(&migrate); - migrate_vma_finalize(&migrate); - drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, - DMA_TO_DEVICE); -err_free: - if (zdd) - drm_gpusvm_zdd_put(zdd); - kvfree(buf); -err_out: - return err; -} -EXPORT_SYMBOL_GPL(drm_gpusvm_migrate_to_devmem); - -/** - * drm_gpusvm_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area - * @vas: Pointer to the VM area structure, can be NULL - * @fault_page: Fault page - * @npages: Number of pages to populate - * @mpages: Number of pages to migrate - * @src_mpfn: Source array of migrate PFNs - * @mpfn: Array of migrate PFNs to populate - * @addr: Start address for PFN allocation - * - * This function populates the RAM migrate page frame numbers (PFNs) for the - * specified VM area structure. It allocates and locks pages in the VM area for - * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use - * alloc_page for allocation. - * - * Return: 0 on success, negative error code on failure. - */ -static int drm_gpusvm_migrate_populate_ram_pfn(struct vm_area_struct *vas, - struct page *fault_page, - unsigned long npages, - unsigned long *mpages, - unsigned long *src_mpfn, - unsigned long *mpfn, - unsigned long addr) -{ - unsigned long i; - - for (i = 0; i < npages; ++i, addr += PAGE_SIZE) { - struct page *page, *src_page; - - if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) - continue; - - src_page = migrate_pfn_to_page(src_mpfn[i]); - if (!src_page) - continue; - - if (fault_page) { - if (src_page->zone_device_data != - fault_page->zone_device_data) - continue; - } - - if (vas) - page = alloc_page_vma(GFP_HIGHUSER, vas, addr); - else - page = alloc_page(GFP_HIGHUSER); - - if (!page) - goto free_pages; - - mpfn[i] = migrate_pfn(page_to_pfn(page)); - } - - for (i = 0; i < npages; ++i) { - struct page *page = migrate_pfn_to_page(mpfn[i]); - - if (!page) - continue; - - WARN_ON_ONCE(!trylock_page(page)); - ++*mpages; - } - - return 0; - -free_pages: - for (i = 0; i < npages; ++i) { - struct page *page = migrate_pfn_to_page(mpfn[i]); - - if (!page) - continue; - - put_page(page); - mpfn[i] = 0; - } - return -ENOMEM; -} - -/** - * drm_gpusvm_evict_to_ram() - Evict GPU SVM range to RAM - * @devmem_allocation: Pointer to the device memory allocation - * - * Similar to __drm_gpusvm_migrate_to_ram but does not require mmap lock and - * migration done via migrate_device_* functions. - * - * Return: 0 on success, negative error code on failure. - */ -int drm_gpusvm_evict_to_ram(struct drm_gpusvm_devmem *devmem_allocation) -{ - const struct drm_gpusvm_devmem_ops *ops = devmem_allocation->ops; - unsigned long npages, mpages = 0; - struct page **pages; - unsigned long *src, *dst; - dma_addr_t *dma_addr; - void *buf; - int i, err = 0; - unsigned int retry_count = 2; - - npages = devmem_allocation->size >> PAGE_SHIFT; - -retry: - if (!mmget_not_zero(devmem_allocation->mm)) - return -EFAULT; - - buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*dma_addr) + - sizeof(*pages), GFP_KERNEL); - if (!buf) { - err = -ENOMEM; - goto err_out; - } - src = buf; - dst = buf + (sizeof(*src) * npages); - dma_addr = buf + (2 * sizeof(*src) * npages); - pages = buf + (2 * sizeof(*src) + sizeof(*dma_addr)) * npages; - - err = ops->populate_devmem_pfn(devmem_allocation, npages, src); - if (err) - goto err_free; - - err = migrate_device_pfns(src, npages); - if (err) - goto err_free; - - err = drm_gpusvm_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages, - src, dst, 0); - if (err || !mpages) - goto err_finalize; - - err = drm_gpusvm_migrate_map_pages(devmem_allocation->dev, dma_addr, - dst, npages, DMA_FROM_DEVICE); - if (err) - goto err_finalize; - - for (i = 0; i < npages; ++i) - pages[i] = migrate_pfn_to_page(src[i]); - - err = ops->copy_to_ram(pages, dma_addr, npages); - if (err) - goto err_finalize; - -err_finalize: - if (err) - drm_gpusvm_migration_unlock_put_pages(npages, dst); - migrate_device_pages(src, dst, npages); - migrate_device_finalize(src, dst, npages); - drm_gpusvm_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, - DMA_FROM_DEVICE); -err_free: - kvfree(buf); -err_out: - mmput_async(devmem_allocation->mm); - - if (completion_done(&devmem_allocation->detached)) - return 0; - - if (retry_count--) { - cond_resched(); - goto retry; - } - - return err ?: -EBUSY; -} -EXPORT_SYMBOL_GPL(drm_gpusvm_evict_to_ram); - -/** - * __drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) - * @vas: Pointer to the VM area structure - * @device_private_page_owner: Device private pages owner - * @page: Pointer to the page for fault handling (can be NULL) - * @fault_addr: Fault address - * @size: Size of migration - * - * This internal function performs the migration of the specified GPU SVM range - * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and - * invokes the driver-specific operations for migration to RAM. - * - * Return: 0 on success, negative error code on failure. - */ -static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, - void *device_private_page_owner, - struct page *page, - unsigned long fault_addr, - unsigned long size) -{ - struct migrate_vma migrate = { - .vma = vas, - .pgmap_owner = device_private_page_owner, - .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | - MIGRATE_VMA_SELECT_DEVICE_COHERENT, - .fault_page = page, - }; - struct drm_gpusvm_zdd *zdd; - const struct drm_gpusvm_devmem_ops *ops; - struct device *dev = NULL; - unsigned long npages, mpages = 0; - struct page **pages; - dma_addr_t *dma_addr; - unsigned long start, end; - void *buf; - int i, err = 0; - - if (page) { - zdd = page->zone_device_data; - if (time_before64(get_jiffies_64(), - zdd->devmem_allocation->timeslice_expiration)) - return 0; - } - - start = ALIGN_DOWN(fault_addr, size); - end = ALIGN(fault_addr + 1, size); - - /* Corner where VMA area struct has been partially unmapped */ - if (start < vas->vm_start) - start = vas->vm_start; - if (end > vas->vm_end) - end = vas->vm_end; - - migrate.start = start; - migrate.end = end; - npages = npages_in_range(start, end); - - buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + - sizeof(*pages), GFP_KERNEL); - if (!buf) { - err = -ENOMEM; - goto err_out; - } - dma_addr = buf + (2 * sizeof(*migrate.src) * npages); - pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; - - migrate.vma = vas; - migrate.src = buf; - migrate.dst = migrate.src + npages; - - err = migrate_vma_setup(&migrate); - if (err) - goto err_free; - - /* Raced with another CPU fault, nothing to do */ - if (!migrate.cpages) - goto err_free; - - if (!page) { - for (i = 0; i < npages; ++i) { - if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE)) - continue; - - page = migrate_pfn_to_page(migrate.src[i]); - break; - } - - if (!page) - goto err_finalize; - } - zdd = page->zone_device_data; - ops = zdd->devmem_allocation->ops; - dev = zdd->devmem_allocation->dev; - - err = drm_gpusvm_migrate_populate_ram_pfn(vas, page, npages, &mpages, - migrate.src, migrate.dst, - start); - if (err) - goto err_finalize; - - err = drm_gpusvm_migrate_map_pages(dev, dma_addr, migrate.dst, npages, - DMA_FROM_DEVICE); - if (err) - goto err_finalize; - - for (i = 0; i < npages; ++i) - pages[i] = migrate_pfn_to_page(migrate.src[i]); - - err = ops->copy_to_ram(pages, dma_addr, npages); - if (err) - goto err_finalize; - -err_finalize: - if (err) - drm_gpusvm_migration_unlock_put_pages(npages, migrate.dst); - migrate_vma_pages(&migrate); - migrate_vma_finalize(&migrate); - if (dev) - drm_gpusvm_migrate_unmap_pages(dev, dma_addr, npages, - DMA_FROM_DEVICE); -err_free: - kvfree(buf); -err_out: - - return err; -} - -/** - * drm_gpusvm_range_evict - Evict GPU SVM range * @range: Pointer to the GPU SVM range to be removed * - * This function evicts the specified GPU SVM range. This function will not - * evict coherent pages. + * This function evicts the specified GPU SVM range. * * Return: 0 on success, a negative error code on failure. */ @@ -2183,60 +1527,6 @@ int drm_gpusvm_range_evict(struct drm_gpusvm *gpusvm, } EXPORT_SYMBOL_GPL(drm_gpusvm_range_evict); -/** - * drm_gpusvm_page_free() - Put GPU SVM zone device data associated with a page - * @page: Pointer to the page - * - * This function is a callback used to put the GPU SVM zone device data - * associated with a page when it is being released. - */ -static void drm_gpusvm_page_free(struct page *page) -{ - drm_gpusvm_zdd_put(page->zone_device_data); -} - -/** - * drm_gpusvm_migrate_to_ram() - Migrate GPU SVM range to RAM (page fault handler) - * @vmf: Pointer to the fault information structure - * - * This function is a page fault handler used to migrate a GPU SVM range to RAM. - * It retrieves the GPU SVM range information from the faulting page and invokes - * the internal migration function to migrate the range back to RAM. - * - * Return: VM_FAULT_SIGBUS on failure, 0 on success. - */ -static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf) -{ - struct drm_gpusvm_zdd *zdd = vmf->page->zone_device_data; - int err; - - err = __drm_gpusvm_migrate_to_ram(vmf->vma, - zdd->device_private_page_owner, - vmf->page, vmf->address, - zdd->devmem_allocation->size); - - return err ? VM_FAULT_SIGBUS : 0; -} - -/* - * drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM - */ -static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = { - .page_free = drm_gpusvm_page_free, - .migrate_to_ram = drm_gpusvm_migrate_to_ram, -}; - -/** - * drm_gpusvm_pagemap_ops_get() - Retrieve GPU SVM device page map operations - * - * Return: Pointer to the GPU SVM device page map operations structure. - */ -const struct dev_pagemap_ops *drm_gpusvm_pagemap_ops_get(void) -{ - return &drm_gpusvm_pagemap_ops; -} -EXPORT_SYMBOL_GPL(drm_gpusvm_pagemap_ops_get); - /** * drm_gpusvm_has_mapping() - Check if GPU SVM has mapping for the given address range * @gpusvm: Pointer to the GPU SVM structure. @@ -2281,28 +1571,5 @@ void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, } EXPORT_SYMBOL_GPL(drm_gpusvm_range_set_unmapped); -/** - * drm_gpusvm_devmem_init() - Initialize a GPU SVM device memory allocation - * - * @dev: Pointer to the device structure which device memory allocation belongs to - * @mm: Pointer to the mm_struct for the address space - * @ops: Pointer to the operations structure for GPU SVM device memory - * @dpagemap: The struct drm_pagemap we're allocating from. - * @size: Size of device memory allocation - */ -void drm_gpusvm_devmem_init(struct drm_gpusvm_devmem *devmem_allocation, - struct device *dev, struct mm_struct *mm, - const struct drm_gpusvm_devmem_ops *ops, - struct drm_pagemap *dpagemap, size_t size) -{ - init_completion(&devmem_allocation->detached); - devmem_allocation->dev = dev; - devmem_allocation->mm = mm; - devmem_allocation->ops = ops; - devmem_allocation->dpagemap = dpagemap; - devmem_allocation->size = size; -} -EXPORT_SYMBOL_GPL(drm_gpusvm_devmem_init); - MODULE_DESCRIPTION("DRM GPUSVM"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c new file mode 100644 index 000000000000..cef4657b6e8a --- /dev/null +++ b/drivers/gpu/drm/drm_pagemap.c @@ -0,0 +1,794 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* + * Copyright © 2024-2025 Intel Corporation + */ + +#include +#include +#include +#include + +/** + * DOC: Overview + * + * The DRM pagemap layer is intended to augment the dev_pagemap functionality by + * providing a way to populate a struct mm_struct virtual range with device + * private pages and to provide helpers to abstract device memory allocations, + * to migrate memory back and forth between device memory and system RAM and + * to handle access (and in the future migration) between devices implementing + * a fast interconnect that is not necessarily visible to the rest of the + * system. + * + * Typically the DRM pagemap receives requests from one or more DRM GPU SVM + * instances to populate struct mm_struct virtual ranges with memory. + */ + +/** + * DOC: Migration + * + * The migration support is quite simple, allowing migration between RAM and + * device memory at the range granularity. For example, GPU SVM currently does + * not support mixing RAM and device memory pages within a range. This means + * that upon GPU fault, the entire range can be migrated to device memory, and + * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device + * memory storage within a range could be added in the future if required. + * + * The reasoning for only supporting range granularity is as follows: it + * simplifies the implementation, and range sizes are driver-defined and should + * be relatively small. + * + * + * Key DRM pagemap components: + * + * - Device Memory Allocations: + * Embedded structure containing enough information for the drm_pagemap to + * migrate to / from device memory. + * + * - Device Memory Operations: + * Define the interface for driver-specific device memory operations + * release memory, populate pfns, and copy to / from device memory. + */ + +/** + * struct drm_pagemap_zdd - GPU SVM zone device data + * + * @refcount: Reference count for the zdd + * @devmem_allocation: device memory allocation + * @device_private_page_owner: Device private pages owner + * + * This structure serves as a generic wrapper installed in + * page->zone_device_data. It provides infrastructure for looking up a device + * memory allocation upon CPU page fault and asynchronously releasing device + * memory once the CPU has no page references. Asynchronous release is useful + * because CPU page references can be dropped in IRQ contexts, while releasing + * device memory likely requires sleeping locks. + */ +struct drm_pagemap_zdd { + struct kref refcount; + struct drm_pagemap_devmem *devmem_allocation; + void *device_private_page_owner; +}; + +/** + * drm_pagemap_zdd_alloc() - Allocate a zdd structure. + * @device_private_page_owner: Device private pages owner + * + * This function allocates and initializes a new zdd structure. It sets up the + * reference count and initializes the destroy work. + * + * Return: Pointer to the allocated zdd on success, ERR_PTR() on failure. + */ +static struct drm_pagemap_zdd * +drm_pagemap_zdd_alloc(void *device_private_page_owner) +{ + struct drm_pagemap_zdd *zdd; + + zdd = kmalloc(sizeof(*zdd), GFP_KERNEL); + if (!zdd) + return NULL; + + kref_init(&zdd->refcount); + zdd->devmem_allocation = NULL; + zdd->device_private_page_owner = device_private_page_owner; + + return zdd; +} + +/** + * drm_pagemap_zdd_get() - Get a reference to a zdd structure. + * @zdd: Pointer to the zdd structure. + * + * This function increments the reference count of the provided zdd structure. + * + * Return: Pointer to the zdd structure. + */ +static struct drm_pagemap_zdd *drm_pagemap_zdd_get(struct drm_pagemap_zdd *zdd) +{ + kref_get(&zdd->refcount); + return zdd; +} + +/** + * drm_pagemap_zdd_destroy() - Destroy a zdd structure. + * @ref: Pointer to the reference count structure. + * + * This function queues the destroy_work of the zdd for asynchronous destruction. + */ +static void drm_pagemap_zdd_destroy(struct kref *ref) +{ + struct drm_pagemap_zdd *zdd = + container_of(ref, struct drm_pagemap_zdd, refcount); + struct drm_pagemap_devmem *devmem = zdd->devmem_allocation; + + if (devmem) { + complete_all(&devmem->detached); + if (devmem->ops->devmem_release) + devmem->ops->devmem_release(devmem); + } + kfree(zdd); +} + +/** + * drm_pagemap_zdd_put() - Put a zdd reference. + * @zdd: Pointer to the zdd structure. + * + * This function decrements the reference count of the provided zdd structure + * and schedules its destruction if the count drops to zero. + */ +static void drm_pagemap_zdd_put(struct drm_pagemap_zdd *zdd) +{ + kref_put(&zdd->refcount, drm_pagemap_zdd_destroy); +} + +/** + * drm_pagemap_migration_unlock_put_page() - Put a migration page + * @page: Pointer to the page to put + * + * This function unlocks and puts a page. + */ +static void drm_pagemap_migration_unlock_put_page(struct page *page) +{ + unlock_page(page); + put_page(page); +} + +/** + * drm_pagemap_migration_unlock_put_pages() - Put migration pages + * @npages: Number of pages + * @migrate_pfn: Array of migrate page frame numbers + * + * This function unlocks and puts an array of pages. + */ +static void drm_pagemap_migration_unlock_put_pages(unsigned long npages, + unsigned long *migrate_pfn) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) { + struct page *page; + + if (!migrate_pfn[i]) + continue; + + page = migrate_pfn_to_page(migrate_pfn[i]); + drm_pagemap_migration_unlock_put_page(page); + migrate_pfn[i] = 0; + } +} + +/** + * drm_pagemap_get_devmem_page() - Get a reference to a device memory page + * @page: Pointer to the page + * @zdd: Pointer to the GPU SVM zone device data + * + * This function associates the given page with the specified GPU SVM zone + * device data and initializes it for zone device usage. + */ +static void drm_pagemap_get_devmem_page(struct page *page, + struct drm_pagemap_zdd *zdd) +{ + page->zone_device_data = drm_pagemap_zdd_get(zdd); + zone_device_page_init(page); +} + +/** + * drm_pagemap_migrate_map_pages() - Map migration pages for GPU SVM migration + * @dev: The device for which the pages are being mapped + * @dma_addr: Array to store DMA addresses corresponding to mapped pages + * @migrate_pfn: Array of migrate page frame numbers to map + * @npages: Number of pages to map + * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) + * + * This function maps pages of memory for migration usage in GPU SVM. It + * iterates over each page frame number provided in @migrate_pfn, maps the + * corresponding page, and stores the DMA address in the provided @dma_addr + * array. + * + * Returns: 0 on success, -EFAULT if an error occurs during mapping. + */ +static int drm_pagemap_migrate_map_pages(struct device *dev, + dma_addr_t *dma_addr, + unsigned long *migrate_pfn, + unsigned long npages, + enum dma_data_direction dir) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) { + struct page *page = migrate_pfn_to_page(migrate_pfn[i]); + + if (!page) + continue; + + if (WARN_ON_ONCE(is_zone_device_page(page))) + return -EFAULT; + + dma_addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); + if (dma_mapping_error(dev, dma_addr[i])) + return -EFAULT; + } + + return 0; +} + +/** + * drm_pagemap_migrate_unmap_pages() - Unmap pages previously mapped for GPU SVM migration + * @dev: The device for which the pages were mapped + * @dma_addr: Array of DMA addresses corresponding to mapped pages + * @npages: Number of pages to unmap + * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) + * + * This function unmaps previously mapped pages of memory for GPU Shared Virtual + * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks + * if it's valid and not already unmapped, and unmaps the corresponding page. + */ +static void drm_pagemap_migrate_unmap_pages(struct device *dev, + dma_addr_t *dma_addr, + unsigned long npages, + enum dma_data_direction dir) +{ + unsigned long i; + + for (i = 0; i < npages; ++i) { + if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) + continue; + + dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); + } +} + +static unsigned long +npages_in_range(unsigned long start, unsigned long end) +{ + return (end - start) >> PAGE_SHIFT; +} + +/** + * drm_pagemap_migrate_to_devmem() - Migrate a struct mm_struct range to device memory + * @devmem_allocation: The device memory allocation to migrate to. + * The caller should hold a reference to the device memory allocation, + * and the reference is consumed by this function unless it returns with + * an error. + * @mm: Pointer to the struct mm_struct. + * @start: Start of the virtual address range to migrate. + * @end: End of the virtual address range to migrate. + * @timeslice_ms: The time requested for the migrated pagemap pages to + * be present in @mm before being allowed to be migrated back. + * @pgmap_owner: Not used currently, since only system memory is considered. + * + * This function migrates the specified virtual address range to device memory. + * It performs the necessary setup and invokes the driver-specific operations for + * migration to device memory. Expected to be called while holding the mmap lock in + * at least read mode. + * + * Note: The @timeslice_ms parameter can typically be used to force data to + * remain in pagemap pages long enough for a GPU to perform a task and to prevent + * a migration livelock. One alternative would be for the GPU driver to block + * in a mmu_notifier for the specified amount of time, but adding the + * functionality to the pagemap is likely nicer to the system as a whole. + * + * Return: %0 on success, negative error code on failure. + */ +int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, + struct mm_struct *mm, + unsigned long start, unsigned long end, + unsigned long timeslice_ms, + void *pgmap_owner) +{ + const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; + struct migrate_vma migrate = { + .start = start, + .end = end, + .pgmap_owner = pgmap_owner, + .flags = MIGRATE_VMA_SELECT_SYSTEM, + }; + unsigned long i, npages = npages_in_range(start, end); + struct vm_area_struct *vas; + struct drm_pagemap_zdd *zdd = NULL; + struct page **pages; + dma_addr_t *dma_addr; + void *buf; + int err; + + mmap_assert_locked(mm); + + if (!ops->populate_devmem_pfn || !ops->copy_to_devmem || + !ops->copy_to_ram) + return -EOPNOTSUPP; + + vas = vma_lookup(mm, start); + if (!vas) { + err = -ENOENT; + goto err_out; + } + + if (end > vas->vm_end || start < vas->vm_start) { + err = -EINVAL; + goto err_out; + } + + if (!vma_is_anonymous(vas)) { + err = -EBUSY; + goto err_out; + } + + buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + + sizeof(*pages), GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto err_out; + } + dma_addr = buf + (2 * sizeof(*migrate.src) * npages); + pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; + + zdd = drm_pagemap_zdd_alloc(pgmap_owner); + if (!zdd) { + err = -ENOMEM; + goto err_free; + } + + migrate.vma = vas; + migrate.src = buf; + migrate.dst = migrate.src + npages; + + err = migrate_vma_setup(&migrate); + if (err) + goto err_free; + + if (!migrate.cpages) { + err = -EFAULT; + goto err_free; + } + + if (migrate.cpages != npages) { + err = -EBUSY; + goto err_finalize; + } + + err = ops->populate_devmem_pfn(devmem_allocation, npages, migrate.dst); + if (err) + goto err_finalize; + + err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, dma_addr, + migrate.src, npages, DMA_TO_DEVICE); + if (err) + goto err_finalize; + + for (i = 0; i < npages; ++i) { + struct page *page = pfn_to_page(migrate.dst[i]); + + pages[i] = page; + migrate.dst[i] = migrate_pfn(migrate.dst[i]); + drm_pagemap_get_devmem_page(page, zdd); + } + + err = ops->copy_to_devmem(pages, dma_addr, npages); + if (err) + goto err_finalize; + + /* Upon success bind devmem allocation to range and zdd */ + devmem_allocation->timeslice_expiration = get_jiffies_64() + + msecs_to_jiffies(timeslice_ms); + zdd->devmem_allocation = devmem_allocation; /* Owns ref */ + +err_finalize: + if (err) + drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); + migrate_vma_pages(&migrate); + migrate_vma_finalize(&migrate); + drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, + DMA_TO_DEVICE); +err_free: + if (zdd) + drm_pagemap_zdd_put(zdd); + kvfree(buf); +err_out: + return err; +} +EXPORT_SYMBOL_GPL(drm_pagemap_migrate_to_devmem); + +/** + * drm_pagemap_migrate_populate_ram_pfn() - Populate RAM PFNs for a VM area + * @vas: Pointer to the VM area structure, can be NULL + * @fault_page: Fault page + * @npages: Number of pages to populate + * @mpages: Number of pages to migrate + * @src_mpfn: Source array of migrate PFNs + * @mpfn: Array of migrate PFNs to populate + * @addr: Start address for PFN allocation + * + * This function populates the RAM migrate page frame numbers (PFNs) for the + * specified VM area structure. It allocates and locks pages in the VM area for + * RAM usage. If vas is non-NULL use alloc_page_vma for allocation, if NULL use + * alloc_page for allocation. + * + * Return: 0 on success, negative error code on failure. + */ +static int drm_pagemap_migrate_populate_ram_pfn(struct vm_area_struct *vas, + struct page *fault_page, + unsigned long npages, + unsigned long *mpages, + unsigned long *src_mpfn, + unsigned long *mpfn, + unsigned long addr) +{ + unsigned long i; + + for (i = 0; i < npages; ++i, addr += PAGE_SIZE) { + struct page *page, *src_page; + + if (!(src_mpfn[i] & MIGRATE_PFN_MIGRATE)) + continue; + + src_page = migrate_pfn_to_page(src_mpfn[i]); + if (!src_page) + continue; + + if (fault_page) { + if (src_page->zone_device_data != + fault_page->zone_device_data) + continue; + } + + if (vas) + page = alloc_page_vma(GFP_HIGHUSER, vas, addr); + else + page = alloc_page(GFP_HIGHUSER); + + if (!page) + goto free_pages; + + mpfn[i] = migrate_pfn(page_to_pfn(page)); + } + + for (i = 0; i < npages; ++i) { + struct page *page = migrate_pfn_to_page(mpfn[i]); + + if (!page) + continue; + + WARN_ON_ONCE(!trylock_page(page)); + ++*mpages; + } + + return 0; + +free_pages: + for (i = 0; i < npages; ++i) { + struct page *page = migrate_pfn_to_page(mpfn[i]); + + if (!page) + continue; + + put_page(page); + mpfn[i] = 0; + } + return -ENOMEM; +} + +/** + * drm_pagemap_evict_to_ram() - Evict GPU SVM range to RAM + * @devmem_allocation: Pointer to the device memory allocation + * + * Similar to __drm_pagemap_migrate_to_ram but does not require mmap lock and + * migration done via migrate_device_* functions. + * + * Return: 0 on success, negative error code on failure. + */ +int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) +{ + const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; + unsigned long npages, mpages = 0; + struct page **pages; + unsigned long *src, *dst; + dma_addr_t *dma_addr; + void *buf; + int i, err = 0; + unsigned int retry_count = 2; + + npages = devmem_allocation->size >> PAGE_SHIFT; + +retry: + if (!mmget_not_zero(devmem_allocation->mm)) + return -EFAULT; + + buf = kvcalloc(npages, 2 * sizeof(*src) + sizeof(*dma_addr) + + sizeof(*pages), GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto err_out; + } + src = buf; + dst = buf + (sizeof(*src) * npages); + dma_addr = buf + (2 * sizeof(*src) * npages); + pages = buf + (2 * sizeof(*src) + sizeof(*dma_addr)) * npages; + + err = ops->populate_devmem_pfn(devmem_allocation, npages, src); + if (err) + goto err_free; + + err = migrate_device_pfns(src, npages); + if (err) + goto err_free; + + err = drm_pagemap_migrate_populate_ram_pfn(NULL, NULL, npages, &mpages, + src, dst, 0); + if (err || !mpages) + goto err_finalize; + + err = drm_pagemap_migrate_map_pages(devmem_allocation->dev, dma_addr, + dst, npages, DMA_FROM_DEVICE); + if (err) + goto err_finalize; + + for (i = 0; i < npages; ++i) + pages[i] = migrate_pfn_to_page(src[i]); + + err = ops->copy_to_ram(pages, dma_addr, npages); + if (err) + goto err_finalize; + +err_finalize: + if (err) + drm_pagemap_migration_unlock_put_pages(npages, dst); + migrate_device_pages(src, dst, npages); + migrate_device_finalize(src, dst, npages); + drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, dma_addr, npages, + DMA_FROM_DEVICE); +err_free: + kvfree(buf); +err_out: + mmput_async(devmem_allocation->mm); + + if (completion_done(&devmem_allocation->detached)) + return 0; + + if (retry_count--) { + cond_resched(); + goto retry; + } + + return err ?: -EBUSY; +} +EXPORT_SYMBOL_GPL(drm_pagemap_evict_to_ram); + +/** + * __drm_pagemap_migrate_to_ram() - Migrate GPU SVM range to RAM (internal) + * @vas: Pointer to the VM area structure + * @device_private_page_owner: Device private pages owner + * @page: Pointer to the page for fault handling (can be NULL) + * @fault_addr: Fault address + * @size: Size of migration + * + * This internal function performs the migration of the specified GPU SVM range + * to RAM. It sets up the migration, populates + dma maps RAM PFNs, and + * invokes the driver-specific operations for migration to RAM. + * + * Return: 0 on success, negative error code on failure. + */ +static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, + void *device_private_page_owner, + struct page *page, + unsigned long fault_addr, + unsigned long size) +{ + struct migrate_vma migrate = { + .vma = vas, + .pgmap_owner = device_private_page_owner, + .flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE | + MIGRATE_VMA_SELECT_DEVICE_COHERENT, + .fault_page = page, + }; + struct drm_pagemap_zdd *zdd; + const struct drm_pagemap_devmem_ops *ops; + struct device *dev = NULL; + unsigned long npages, mpages = 0; + struct page **pages; + dma_addr_t *dma_addr; + unsigned long start, end; + void *buf; + int i, err = 0; + + if (page) { + zdd = page->zone_device_data; + if (time_before64(get_jiffies_64(), + zdd->devmem_allocation->timeslice_expiration)) + return 0; + } + + start = ALIGN_DOWN(fault_addr, size); + end = ALIGN(fault_addr + 1, size); + + /* Corner where VMA area struct has been partially unmapped */ + if (start < vas->vm_start) + start = vas->vm_start; + if (end > vas->vm_end) + end = vas->vm_end; + + migrate.start = start; + migrate.end = end; + npages = npages_in_range(start, end); + + buf = kvcalloc(npages, 2 * sizeof(*migrate.src) + sizeof(*dma_addr) + + sizeof(*pages), GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto err_out; + } + dma_addr = buf + (2 * sizeof(*migrate.src) * npages); + pages = buf + (2 * sizeof(*migrate.src) + sizeof(*dma_addr)) * npages; + + migrate.vma = vas; + migrate.src = buf; + migrate.dst = migrate.src + npages; + + err = migrate_vma_setup(&migrate); + if (err) + goto err_free; + + /* Raced with another CPU fault, nothing to do */ + if (!migrate.cpages) + goto err_free; + + if (!page) { + for (i = 0; i < npages; ++i) { + if (!(migrate.src[i] & MIGRATE_PFN_MIGRATE)) + continue; + + page = migrate_pfn_to_page(migrate.src[i]); + break; + } + + if (!page) + goto err_finalize; + } + zdd = page->zone_device_data; + ops = zdd->devmem_allocation->ops; + dev = zdd->devmem_allocation->dev; + + err = drm_pagemap_migrate_populate_ram_pfn(vas, page, npages, &mpages, + migrate.src, migrate.dst, + start); + if (err) + goto err_finalize; + + err = drm_pagemap_migrate_map_pages(dev, dma_addr, migrate.dst, npages, + DMA_FROM_DEVICE); + if (err) + goto err_finalize; + + for (i = 0; i < npages; ++i) + pages[i] = migrate_pfn_to_page(migrate.src[i]); + + err = ops->copy_to_ram(pages, dma_addr, npages); + if (err) + goto err_finalize; + +err_finalize: + if (err) + drm_pagemap_migration_unlock_put_pages(npages, migrate.dst); + migrate_vma_pages(&migrate); + migrate_vma_finalize(&migrate); + if (dev) + drm_pagemap_migrate_unmap_pages(dev, dma_addr, npages, + DMA_FROM_DEVICE); +err_free: + kvfree(buf); +err_out: + + return err; +} + +/** + * drm_pagemap_page_free() - Put GPU SVM zone device data associated with a page + * @page: Pointer to the page + * + * This function is a callback used to put the GPU SVM zone device data + * associated with a page when it is being released. + */ +static void drm_pagemap_page_free(struct page *page) +{ + drm_pagemap_zdd_put(page->zone_device_data); +} + +/** + * drm_pagemap_migrate_to_ram() - Migrate a virtual range to RAM (page fault handler) + * @vmf: Pointer to the fault information structure + * + * This function is a page fault handler used to migrate a virtual range + * to ram. The device memory allocation in which the device page is found is + * migrated in its entirety. + * + * Returns: + * VM_FAULT_SIGBUS on failure, 0 on success. + */ +static vm_fault_t drm_pagemap_migrate_to_ram(struct vm_fault *vmf) +{ + struct drm_pagemap_zdd *zdd = vmf->page->zone_device_data; + int err; + + err = __drm_pagemap_migrate_to_ram(vmf->vma, + zdd->device_private_page_owner, + vmf->page, vmf->address, + zdd->devmem_allocation->size); + + return err ? VM_FAULT_SIGBUS : 0; +} + +static const struct dev_pagemap_ops drm_pagemap_pagemap_ops = { + .page_free = drm_pagemap_page_free, + .migrate_to_ram = drm_pagemap_migrate_to_ram, +}; + +/** + * drm_pagemap_pagemap_ops_get() - Retrieve GPU SVM device page map operations + * + * Returns: + * Pointer to the GPU SVM device page map operations structure. + */ +const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void) +{ + return &drm_pagemap_pagemap_ops; +} +EXPORT_SYMBOL_GPL(drm_pagemap_pagemap_ops_get); + +/** + * drm_pagemap_devmem_init() - Initialize a drm_pagemap device memory allocation + * + * @devmem_allocation: The struct drm_pagemap_devmem to initialize. + * @dev: Pointer to the device structure which device memory allocation belongs to + * @mm: Pointer to the mm_struct for the address space + * @ops: Pointer to the operations structure for GPU SVM device memory + * @dpagemap: The struct drm_pagemap we're allocating from. + * @size: Size of device memory allocation + */ +void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, + struct device *dev, struct mm_struct *mm, + const struct drm_pagemap_devmem_ops *ops, + struct drm_pagemap *dpagemap, size_t size) +{ + init_completion(&devmem_allocation->detached); + devmem_allocation->dev = dev; + devmem_allocation->mm = mm; + devmem_allocation->ops = ops; + devmem_allocation->dpagemap = dpagemap; + devmem_allocation->size = size; +} +EXPORT_SYMBOL_GPL(drm_pagemap_devmem_init); + +/** + * drm_pagemap_page_to_dpagemap() - Return a pointer the drm_pagemap of a page + * @page: The struct page. + * + * Return: A pointer to the struct drm_pagemap of a device private page that + * was populated from the struct drm_pagemap. If the page was *not* populated + * from a struct drm_pagemap, the result is undefined and the function call + * may result in dereferencing and invalid address. + */ +struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page) +{ + struct drm_pagemap_zdd *zdd = page->zone_device_data; + + return zdd->devmem_allocation->dpagemap; +} +EXPORT_SYMBOL_GPL(drm_pagemap_page_to_dpagemap); diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 30ed74ad29ab..553c29e1030b 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -87,14 +87,16 @@ config DRM_XE_GPUSVM If in doubut say "Y". -config DRM_XE_DEVMEM_MIRROR - bool "Enable device memory mirror" +config DRM_XE_PAGEMAP + bool "Enable device memory pool for SVM" depends on DRM_XE_GPUSVM select GET_FREE_REGION default y help - Disable this option only if you want to compile out without device - memory mirror. Will reduce KMD memory footprint when disabled. + Disable this option only if you don't want to expose local device + memory for SVM. Will reduce KMD memory footprint when disabled. + + If in doubut say "Y". config DRM_XE_FORCE_PROBE string "Force probe xe for selected Intel hardware IDs" diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index eb5e83c5f233..e0efaf23d051 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -86,7 +86,7 @@ struct xe_bo { u16 cpu_caching; /** @devmem_allocation: SVM device memory allocation */ - struct drm_gpusvm_devmem devmem_allocation; + struct drm_pagemap_devmem devmem_allocation; /** @vram_userfault_link: Link into @mem_access.vram_userfault.list */ struct list_head vram_userfault_link; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 6aca4b1a2824..7e4f6d846af6 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -106,7 +106,7 @@ struct xe_vram_region { void __iomem *mapping; /** @ttm: VRAM TTM manager */ struct xe_ttm_vram_mgr ttm; -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) /** @pagemap: Used to remap device memory as ZONE_DEVICE */ struct dev_pagemap pagemap; /** diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index 26418e9bdff0..a4bb219b2407 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -295,7 +295,7 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w) up_write(&vm->lock); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct xe_vram_region *page_to_vr(struct page *page) { @@ -483,12 +483,12 @@ static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr, return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM); } -static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation) +static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) { return container_of(devmem_allocation, struct xe_bo, devmem_allocation); } -static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation) +static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) { struct xe_bo *bo = to_xe_bo(devmem_allocation); @@ -505,7 +505,7 @@ static struct drm_buddy *tile_to_buddy(struct xe_tile *tile) return &tile->mem.vram.ttm.mm; } -static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation, +static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation, unsigned long npages, unsigned long *pfn) { struct xe_bo *bo = to_xe_bo(devmem_allocation); @@ -528,7 +528,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio return 0; } -static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = { +static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = { .devmem_release = xe_svm_devmem_release, .populate_devmem_pfn = xe_svm_populate_devmem_pfn, .copy_to_devmem = xe_svm_copy_to_devmem, @@ -676,7 +676,7 @@ u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *v min(end, xe_vma_end(vma))); } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) { return &tile->mem.vram; @@ -704,6 +704,9 @@ int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, ktime_t end = 0; int err; + if (!range->base.flags.migrate_devmem) + return -EINVAL; + range_debug(range, "ALLOCATE VRAM"); if (!mmget_not_zero(mm)) @@ -723,19 +726,23 @@ int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, goto unlock; } - drm_gpusvm_devmem_init(&bo->devmem_allocation, - vm->xe->drm.dev, mm, - &gpusvm_devmem_ops, - &tile->mem.vram.dpagemap, - xe_svm_range_size(range)); + drm_pagemap_devmem_init(&bo->devmem_allocation, + vm->xe->drm.dev, mm, + &dpagemap_devmem_ops, + &tile->mem.vram.dpagemap, + xe_svm_range_size(range)); blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; list_for_each_entry(block, blocks, link) block->private = vr; xe_bo_get(bo); - err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base, - &bo->devmem_allocation, ctx); + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, + mm, + xe_svm_range_start(range), + xe_svm_range_end(range), + ctx->timeslice_ms, + xe_svm_devm_owner(vm->xe)); if (err) xe_svm_devmem_release(&bo->devmem_allocation); @@ -810,13 +817,13 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, struct drm_gpusvm_ctx ctx = { .read_only = xe_vma_read_only(vma), .devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), .check_pages_threshold = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0, + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? SZ_64K : 0, .devmem_only = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR), + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP), .timeslice_ms = atomic && IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) ? vm->xe->atomic_svm_timeslice_ms : 0, }; struct xe_svm_range *range; @@ -944,7 +951,7 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end) */ int xe_svm_bo_evict(struct xe_bo *bo) { - return drm_gpusvm_evict_to_ram(&bo->devmem_allocation); + return drm_pagemap_evict_to_ram(&bo->devmem_allocation); } /** @@ -997,7 +1004,7 @@ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, return err; } -#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) static struct drm_pagemap_device_addr xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, @@ -1054,7 +1061,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) vr->pagemap.range.start = res->start; vr->pagemap.range.end = res->end; vr->pagemap.nr_range = 1; - vr->pagemap.ops = drm_gpusvm_pagemap_ops_get(); + vr->pagemap.ops = drm_pagemap_pagemap_ops_get(); vr->pagemap.owner = xe_svm_devm_owner(xe); addr = devm_memremap_pages(dev, &vr->pagemap); diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h index 6a5156476bf4..4aedc5423aff 100644 --- a/include/drm/drm_gpusvm.h +++ b/include/drm/drm_gpusvm.h @@ -16,91 +16,9 @@ struct drm_gpusvm; struct drm_gpusvm_notifier; struct drm_gpusvm_ops; struct drm_gpusvm_range; -struct drm_gpusvm_devmem; struct drm_pagemap; struct drm_pagemap_device_addr; -/** - * struct drm_gpusvm_devmem_ops - Operations structure for GPU SVM device memory - * - * This structure defines the operations for GPU Shared Virtual Memory (SVM) - * device memory. These operations are provided by the GPU driver to manage device memory - * allocations and perform operations such as migration between device memory and system - * RAM. - */ -struct drm_gpusvm_devmem_ops { - /** - * @devmem_release: Release device memory allocation (optional) - * @devmem_allocation: device memory allocation - * - * Release device memory allocation and drop a reference to device - * memory allocation. - */ - void (*devmem_release)(struct drm_gpusvm_devmem *devmem_allocation); - - /** - * @populate_devmem_pfn: Populate device memory PFN (required for migration) - * @devmem_allocation: device memory allocation - * @npages: Number of pages to populate - * @pfn: Array of page frame numbers to populate - * - * Populate device memory page frame numbers (PFN). - * - * Return: 0 on success, a negative error code on failure. - */ - int (*populate_devmem_pfn)(struct drm_gpusvm_devmem *devmem_allocation, - unsigned long npages, unsigned long *pfn); - - /** - * @copy_to_devmem: Copy to device memory (required for migration) - * @pages: Pointer to array of device memory pages (destination) - * @dma_addr: Pointer to array of DMA addresses (source) - * @npages: Number of pages to copy - * - * Copy pages to device memory. - * - * Return: 0 on success, a negative error code on failure. - */ - int (*copy_to_devmem)(struct page **pages, - dma_addr_t *dma_addr, - unsigned long npages); - - /** - * @copy_to_ram: Copy to system RAM (required for migration) - * @pages: Pointer to array of device memory pages (source) - * @dma_addr: Pointer to array of DMA addresses (destination) - * @npages: Number of pages to copy - * - * Copy pages to system RAM. - * - * Return: 0 on success, a negative error code on failure. - */ - int (*copy_to_ram)(struct page **pages, - dma_addr_t *dma_addr, - unsigned long npages); -}; - -/** - * struct drm_gpusvm_devmem - Structure representing a GPU SVM device memory allocation - * - * @dev: Pointer to the device structure which device memory allocation belongs to - * @mm: Pointer to the mm_struct for the address space - * @detached: device memory allocations is detached from device pages - * @ops: Pointer to the operations structure for GPU SVM device memory - * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. - * @size: Size of device memory allocation - * @timeslice_expiration: Timeslice expiration in jiffies - */ -struct drm_gpusvm_devmem { - struct device *dev; - struct mm_struct *mm; - struct completion detached; - const struct drm_gpusvm_devmem_ops *ops; - struct drm_pagemap *dpagemap; - size_t size; - u64 timeslice_expiration; -}; - /** * struct drm_gpusvm_ops - Operations structure for GPU SVM * @@ -361,15 +279,6 @@ void drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range, const struct drm_gpusvm_ctx *ctx); -int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm, - struct drm_gpusvm_range *range, - struct drm_gpusvm_devmem *devmem_allocation, - const struct drm_gpusvm_ctx *ctx); - -int drm_gpusvm_evict_to_ram(struct drm_gpusvm_devmem *devmem_allocation); - -const struct dev_pagemap_ops *drm_gpusvm_pagemap_ops_get(void); - bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start, unsigned long end); @@ -380,11 +289,6 @@ drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start, void drm_gpusvm_range_set_unmapped(struct drm_gpusvm_range *range, const struct mmu_notifier_range *mmu_range); -void drm_gpusvm_devmem_init(struct drm_gpusvm_devmem *devmem_allocation, - struct device *dev, struct mm_struct *mm, - const struct drm_gpusvm_devmem_ops *ops, - struct drm_pagemap *dpagemap, size_t size); - #ifdef CONFIG_LOCKDEP /** * drm_gpusvm_driver_set_lock() - Set the lock protecting accesses to GPU SVM diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index 202c157ff4d7..dabc9c365df4 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -7,6 +7,7 @@ #include struct drm_pagemap; +struct drm_pagemap_zdd; struct device; /** @@ -104,4 +105,104 @@ struct drm_pagemap { struct device *dev; }; +struct drm_pagemap_devmem; + +/** + * struct drm_pagemap_devmem_ops - Operations structure for GPU SVM device memory + * + * This structure defines the operations for GPU Shared Virtual Memory (SVM) + * device memory. These operations are provided by the GPU driver to manage device memory + * allocations and perform operations such as migration between device memory and system + * RAM. + */ +struct drm_pagemap_devmem_ops { + /** + * @devmem_release: Release device memory allocation (optional) + * @devmem_allocation: device memory allocation + * + * Release device memory allocation and drop a reference to device + * memory allocation. + */ + void (*devmem_release)(struct drm_pagemap_devmem *devmem_allocation); + + /** + * @populate_devmem_pfn: Populate device memory PFN (required for migration) + * @devmem_allocation: device memory allocation + * @npages: Number of pages to populate + * @pfn: Array of page frame numbers to populate + * + * Populate device memory page frame numbers (PFN). + * + * Return: 0 on success, a negative error code on failure. + */ + int (*populate_devmem_pfn)(struct drm_pagemap_devmem *devmem_allocation, + unsigned long npages, unsigned long *pfn); + + /** + * @copy_to_devmem: Copy to device memory (required for migration) + * @pages: Pointer to array of device memory pages (destination) + * @dma_addr: Pointer to array of DMA addresses (source) + * @npages: Number of pages to copy + * + * Copy pages to device memory. + * + * Return: 0 on success, a negative error code on failure. + */ + int (*copy_to_devmem)(struct page **pages, + dma_addr_t *dma_addr, + unsigned long npages); + + /** + * @copy_to_ram: Copy to system RAM (required for migration) + * @pages: Pointer to array of device memory pages (source) + * @dma_addr: Pointer to array of DMA addresses (destination) + * @npages: Number of pages to copy + * + * Copy pages to system RAM. + * + * Return: 0 on success, a negative error code on failure. + */ + int (*copy_to_ram)(struct page **pages, + dma_addr_t *dma_addr, + unsigned long npages); +}; + +/** + * struct drm_pagemap_devmem - Structure representing a GPU SVM device memory allocation + * + * @dev: Pointer to the device structure which device memory allocation belongs to + * @mm: Pointer to the mm_struct for the address space + * @detached: device memory allocations is detached from device pages + * @ops: Pointer to the operations structure for GPU SVM device memory + * @dpagemap: The struct drm_pagemap of the pages this allocation belongs to. + * @size: Size of device memory allocation + * @timeslice_expiration: Timeslice expiration in jiffies + */ +struct drm_pagemap_devmem { + struct device *dev; + struct mm_struct *mm; + struct completion detached; + const struct drm_pagemap_devmem_ops *ops; + struct drm_pagemap *dpagemap; + size_t size; + u64 timeslice_expiration; +}; + +int drm_pagemap_migrate_to_devmem(struct drm_pagemap_devmem *devmem_allocation, + struct mm_struct *mm, + unsigned long start, unsigned long end, + unsigned long timeslice_ms, + void *pgmap_owner); + +int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation); + +const struct dev_pagemap_ops *drm_pagemap_pagemap_ops_get(void); + +struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page); + +void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, + struct device *dev, struct mm_struct *mm, + const struct drm_pagemap_devmem_ops *ops, + struct drm_pagemap *dpagemap, size_t size); + #endif From 2ef19be2a545a63310c5c0fae0e173fc0c33bb6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 19 Jun 2025 15:40:34 +0200 Subject: [PATCH 056/358] drm/pagemap: Add a populate_mm op MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an operation to populate a part of a drm_mm with device private memory. Clarify how migration using it is intended to work. v3: - Kerneldoc fixes and updates (Matt Brost). v4: - More kerneldoc fixes. Rebase. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250619134035.170086-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/drm_gpusvm.c | 9 ++--- drivers/gpu/drm/drm_pagemap.c | 65 +++++++++++++++++++++++++++++------ include/drm/drm_pagemap.h | 34 ++++++++++++++++++ 3 files changed, 91 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index e454bb806c72..5bb4c77db2c3 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -176,12 +176,9 @@ * } * * if (driver_migration_policy(range)) { - * mmap_read_lock(mm); - * devmem = driver_alloc_devmem(); - * err = drm_pagemap_migrate_to_devmem(devmem, gpusvm->mm, gpuva_start, - * gpuva_end, ctx->timeslice_ms, - * driver_pgmap_owner()); - * mmap_read_unlock(mm); + * err = drm_pagemap_populate_mm(driver_choose_drm_pagemap(), + * gpuva_start, gpuva_end, gpusvm->mm, + * ctx->timeslice_ms); * if (err) // CPU mappings may have changed * goto retry; * } diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index cef4657b6e8a..13e1519aa6d6 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -6,6 +6,7 @@ #include #include #include +#include #include /** @@ -20,23 +21,30 @@ * system. * * Typically the DRM pagemap receives requests from one or more DRM GPU SVM - * instances to populate struct mm_struct virtual ranges with memory. + * instances to populate struct mm_struct virtual ranges with memory, and the + * migration is best effort only and may thus fail. The implementation should + * also handle device unbinding by blocking (return an -ENODEV) error for new + * population requests and after that migrate all device pages to system ram. */ /** * DOC: Migration * - * The migration support is quite simple, allowing migration between RAM and - * device memory at the range granularity. For example, GPU SVM currently does - * not support mixing RAM and device memory pages within a range. This means - * that upon GPU fault, the entire range can be migrated to device memory, and - * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device - * memory storage within a range could be added in the future if required. - * - * The reasoning for only supporting range granularity is as follows: it - * simplifies the implementation, and range sizes are driver-defined and should - * be relatively small. + * Migration granularity typically follows the GPU SVM range requests, but + * if there are clashes, due to races or due to the fact that multiple GPU + * SVM instances have different views of the ranges used, and because of that + * parts of a requested range is already present in the requested device memory, + * the implementation has a variety of options. It can fail and it can choose + * to populate only the part of the range that isn't already in device memory, + * and it can evict the range to system before trying to migrate. Ideally an + * implementation would just try to migrate the missing part of the range and + * allocate just enough memory to do so. * + * When migrating to system memory as a response to a cpu fault or a device + * memory eviction request, currently a full device memory allocation is + * migrated back to system. Moving forward this might need improvement for + * situations where a single page needs bouncing between system memory and + * device memory due to, for example, atomic operations. * * Key DRM pagemap components: * @@ -792,3 +800,38 @@ struct drm_pagemap *drm_pagemap_page_to_dpagemap(struct page *page) return zdd->devmem_allocation->dpagemap; } EXPORT_SYMBOL_GPL(drm_pagemap_page_to_dpagemap); + +/** + * drm_pagemap_populate_mm() - Populate a virtual range with device memory pages + * @dpagemap: Pointer to the drm_pagemap managing the device memory + * @start: Start of the virtual range to populate. + * @end: End of the virtual range to populate. + * @mm: Pointer to the virtual address space. + * @timeslice_ms: The time requested for the migrated pagemap pages to + * be present in @mm before being allowed to be migrated back. + * + * Attempt to populate a virtual range with device memory pages, + * clearing them or migrating data from the existing pages if necessary. + * The function is best effort only, and implementations may vary + * in how hard they try to satisfy the request. + * + * Return: %0 on success, negative error code on error. If the hardware + * device was removed / unbound the function will return %-ENODEV. + */ +int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms) +{ + int err; + + if (!mmget_not_zero(mm)) + return -EFAULT; + mmap_read_lock(mm); + err = dpagemap->ops->populate_mm(dpagemap, start, end, mm, + timeslice_ms); + mmap_read_unlock(mm); + mmput(mm); + + return err; +} diff --git a/include/drm/drm_pagemap.h b/include/drm/drm_pagemap.h index dabc9c365df4..e5f20a1235be 100644 --- a/include/drm/drm_pagemap.h +++ b/include/drm/drm_pagemap.h @@ -92,6 +92,35 @@ struct drm_pagemap_ops { struct device *dev, struct drm_pagemap_device_addr addr); + /** + * @populate_mm: Populate part of the mm with @dpagemap memory, + * migrating existing data. + * @dpagemap: The struct drm_pagemap managing the memory. + * @start: The virtual start address in @mm + * @end: The virtual end address in @mm + * @mm: Pointer to a live mm. The caller must have an mmget() + * reference. + * + * The caller will have the mm lock at least in read mode. + * Note that there is no guarantee that the memory is resident + * after the function returns, it's best effort only. + * When the mm is not using the memory anymore, + * it will be released. The struct drm_pagemap might have a + * mechanism in place to reclaim the memory and the data will + * then be migrated. Typically to system memory. + * The implementation should hold sufficient runtime power- + * references while pages are used in an address space and + * should ideally guard against hardware device unbind in + * a way such that device pages are migrated back to system + * followed by device page removal. The implementation should + * return -ENODEV after device removal. + * + * Return: 0 if successful. Negative error code on error. + */ + int (*populate_mm)(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms); }; /** @@ -205,4 +234,9 @@ void drm_pagemap_devmem_init(struct drm_pagemap_devmem *devmem_allocation, const struct drm_pagemap_devmem_ops *ops, struct drm_pagemap *dpagemap, size_t size); +int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms); + #endif From b5870168783168f670bd55ebb00fd8207cb42479 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Thu, 19 Jun 2025 15:40:35 +0200 Subject: [PATCH 057/358] drm/xe: Implement and use the drm_pagemap populate_mm op MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add runtime PM since we might call populate_mm on a foreign device. v3: - Fix a kerneldoc failure (Matt Brost) - Revert the bo type change from device to kernel (Matt Brost) v4: - Add an assert in xe_svm_alloc_vram (Matt Brost) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250619134035.170086-4-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/drm_pagemap.c | 1 + drivers/gpu/drm/xe/xe_svm.c | 102 ++++++++++++++++++++-------------- drivers/gpu/drm/xe/xe_svm.h | 10 ++-- drivers/gpu/drm/xe/xe_tile.h | 11 ++++ drivers/gpu/drm/xe/xe_vm.c | 2 +- 5 files changed, 77 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 13e1519aa6d6..1da55322af12 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -835,3 +835,4 @@ int drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, return err; } +EXPORT_SYMBOL(drm_pagemap_populate_mm); diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index a4bb219b2407..a7ff5975873f 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -3,13 +3,17 @@ * Copyright © 2024 Intel Corporation */ +#include + #include "xe_bo.h" #include "xe_gt_stats.h" #include "xe_gt_tlb_invalidation.h" #include "xe_migrate.h" #include "xe_module.h" +#include "xe_pm.h" #include "xe_pt.h" #include "xe_svm.h" +#include "xe_tile.h" #include "xe_ttm_vram_mgr.h" #include "xe_vm.h" #include "xe_vm_types.h" @@ -491,8 +495,10 @@ static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation) static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation) { struct xe_bo *bo = to_xe_bo(devmem_allocation); + struct xe_device *xe = xe_bo_device(bo); xe_bo_put_async(bo); + xe_pm_runtime_put(xe); } static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset) @@ -682,76 +688,63 @@ static struct xe_vram_region *tile_to_vr(struct xe_tile *tile) return &tile->mem.vram; } -/** - * xe_svm_alloc_vram()- Allocate device memory pages for range, - * migrating existing data. - * @vm: The VM. - * @tile: tile to allocate vram from - * @range: SVM range - * @ctx: DRM GPU SVM context - * - * Return: 0 on success, error code on failure. - */ -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap, + unsigned long start, unsigned long end, + struct mm_struct *mm, + unsigned long timeslice_ms) { - struct mm_struct *mm = vm->svm.gpusvm.mm; + struct xe_tile *tile = container_of(dpagemap, typeof(*tile), mem.vram.dpagemap); + struct xe_device *xe = tile_to_xe(tile); + struct device *dev = xe->drm.dev; struct xe_vram_region *vr = tile_to_vr(tile); struct drm_buddy_block *block; struct list_head *blocks; struct xe_bo *bo; - ktime_t end = 0; - int err; + ktime_t time_end = 0; + int err, idx; - if (!range->base.flags.migrate_devmem) - return -EINVAL; + if (!drm_dev_enter(&xe->drm, &idx)) + return -ENODEV; - range_debug(range, "ALLOCATE VRAM"); + xe_pm_runtime_get(xe); - if (!mmget_not_zero(mm)) - return -EFAULT; - mmap_read_lock(mm); - -retry: - bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, - xe_svm_range_size(range), + retry: + bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL, end - start, ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_CPU_ADDR_MIRROR); if (IS_ERR(bo)) { err = PTR_ERR(bo); - if (xe_vm_validate_should_retry(NULL, err, &end)) + if (xe_vm_validate_should_retry(NULL, err, &time_end)) goto retry; - goto unlock; + goto out_pm_put; } - drm_pagemap_devmem_init(&bo->devmem_allocation, - vm->xe->drm.dev, mm, + drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm, &dpagemap_devmem_ops, &tile->mem.vram.dpagemap, - xe_svm_range_size(range)); + end - start); blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks; list_for_each_entry(block, blocks, link) block->private = vr; xe_bo_get(bo); - err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, - mm, - xe_svm_range_start(range), - xe_svm_range_end(range), - ctx->timeslice_ms, - xe_svm_devm_owner(vm->xe)); + + /* Ensure the device has a pm ref while there are device pages active. */ + xe_pm_runtime_get_noresume(xe); + err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm, + start, end, timeslice_ms, + xe_svm_devm_owner(xe)); if (err) xe_svm_devmem_release(&bo->devmem_allocation); xe_bo_unlock(bo); xe_bo_put(bo); -unlock: - mmap_read_unlock(mm); - mmput(mm); +out_pm_put: + xe_pm_runtime_put(xe); + drm_dev_exit(idx); return err; } @@ -859,7 +852,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma, if (--migrate_try_count >= 0 && xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) { - err = xe_svm_alloc_vram(vm, tile, range, &ctx); + err = xe_svm_alloc_vram(tile, range, &ctx); ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */ if (err) { if (migrate_try_count || !ctx.devmem_only) { @@ -1006,6 +999,30 @@ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range, #if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +/** + * xe_svm_alloc_vram()- Allocate device memory pages for range, + * migrating existing data. + * @tile: tile to allocate vram from + * @range: SVM range + * @ctx: DRM GPU SVM context + * + * Return: 0 on success, error code on failure. + */ +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) +{ + struct drm_pagemap *dpagemap; + + xe_assert(tile_to_xe(tile), range->base.flags.migrate_devmem); + range_debug(range, "ALLOCATE VRAM"); + + dpagemap = xe_tile_local_pagemap(tile); + return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range), + xe_svm_range_end(range), + range->base.gpusvm->mm, + ctx->timeslice_ms); +} + static struct drm_pagemap_device_addr xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, struct device *dev, @@ -1030,6 +1047,7 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap, static const struct drm_pagemap_ops xe_drm_pagemap_ops = { .device_map = xe_drm_pagemap_device_map, + .populate_mm = xe_drm_pagemap_populate_mm, }; /** @@ -1082,7 +1100,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr) return 0; } #else -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx) { diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index 19ce4f2754a7..da9a69ea0bb1 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -70,8 +70,7 @@ int xe_svm_bo_evict(struct xe_bo *bo); void xe_svm_range_debug(struct xe_svm_range *range, const char *operation); -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, +int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, const struct drm_gpusvm_ctx *ctx); struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr, @@ -237,10 +236,9 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation) { } -static inline -int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, - struct xe_svm_range *range, - const struct drm_gpusvm_ctx *ctx) +static inline int +xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range, + const struct drm_gpusvm_ctx *ctx) { return -EOPNOTSUPP; } diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index eb939316d55b..066a3d0cea79 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -16,4 +16,15 @@ int xe_tile_init(struct xe_tile *tile); void xe_tile_migrate_wait(struct xe_tile *tile); +#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP) +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + return &tile->mem.vram.dpagemap; +} +#else +static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) +{ + return NULL; +} +#endif #endif diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 04d1a43b81e3..f590b1553e98 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2913,7 +2913,7 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) { tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0]; - err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx); + err = xe_svm_alloc_vram(tile, svm_range, &ctx); if (err) { drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n", vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err)); From e6018b194b45b1793d61e28e8793efa21c08bd54 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:00 +0200 Subject: [PATCH 058/358] drm/xe: Defer memirq init until needed memirqs require allocations into GGTT, which we cannot use until after display is enabled. Now that the initialisation of interrupts is postponed, move memirq init too. Reviewed-by: Lucas De Marchi Reviewed-by: Ilia Levi Link: https://lore.kernel.org/r/20250619104858.418440-14-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_device.c | 4 ---- drivers/gpu/drm/xe/xe_tile.c | 7 +++++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index cd17c1354ab3..d66cb82c4345 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -44,7 +44,6 @@ #include "xe_hw_engine_group.h" #include "xe_hwmon.h" #include "xe_irq.h" -#include "xe_memirq.h" #include "xe_mmio.h" #include "xe_module.h" #include "xe_nvm.h" @@ -816,9 +815,6 @@ int xe_device_probe(struct xe_device *xe) err = xe_ggtt_init_early(tile->mem.ggtt); if (err) return err; - err = xe_memirq_init(&tile->memirq); - if (err) - return err; } for_each_gt(gt, xe, id) { diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c index 672faa0b67f1..86e9811e60ba 100644 --- a/drivers/gpu/drm/xe/xe_tile.c +++ b/drivers/gpu/drm/xe/xe_tile.c @@ -10,6 +10,7 @@ #include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" +#include "xe_memirq.h" #include "xe_migrate.h" #include "xe_pcode.h" #include "xe_sa.h" @@ -174,6 +175,12 @@ int xe_tile_init_noalloc(struct xe_tile *tile) int xe_tile_init(struct xe_tile *tile) { + int err; + + err = xe_memirq_init(&tile->memirq); + if (err) + return err; + tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16); if (IS_ERR(tile->mem.kernel_bb_pool)) return PTR_ERR(tile->mem.kernel_bb_pool); From b3412d72331a5f2289f1494c09f9b02a450748e3 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:01 +0200 Subject: [PATCH 059/358] drm/xe/sriov: Move VF bootstrap and query_config to vf_guc_init We want to split up GUC init to an alloc and noalloc part to keep the init path the same for VF and !VF as much as possible. Everything in vf_guc_init should be done as early as possible, otherwise VRAM probing becomes impossible. Also move xe_gt_mmio_init to the end of xe_gt_init_early(), cleaning up the init in xe_device slightly. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250619104858.418440-15-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_device.c | 17 ----------------- drivers/gpu/drm/xe/xe_gt.c | 7 +++++++ drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 1 - drivers/gpu/drm/xe/xe_guc.c | 9 +++++++++ 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index d66cb82c4345..f56677d3f0dc 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -792,23 +792,6 @@ int xe_device_probe(struct xe_device *xe) err = xe_gt_init_early(gt); if (err) return err; - - /* - * Only after this point can GT-specific MMIO operations - * (including things like communication with the GuC) - * be performed. - */ - xe_gt_mmio_init(gt); - - if (IS_SRIOV_VF(xe)) { - xe_guc_comm_init_early(>->uc.guc); - err = xe_gt_sriov_vf_bootstrap(gt); - if (err) - return err; - err = xe_gt_sriov_vf_query_config(gt); - if (err) - return err; - } } for_each_tile(tile, xe, id) { diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9752a38c0162..4370a4db95e0 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -419,6 +419,13 @@ int xe_gt_init_early(struct xe_gt *gt) xe_mocs_init_early(gt); + /* + * Only after this point can GT-specific MMIO operations + * (including things like communication with the GuC) + * be performed. + */ + xe_gt_mmio_init(gt); + return 0; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 9b2fc9db55b8..93cd26dca070 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -966,7 +966,6 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg) struct vf_runtime_reg *rr; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, gt->sriov.vf.pf_version.major); xe_gt_assert(gt, !reg.vf); if (reg.addr == GMD_ID.addr) { diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 209e5d53c290..81b22b6fb249 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -629,6 +629,7 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc) static int vf_guc_init(struct xe_guc *guc) { + struct xe_gt *gt = guc_to_gt(guc); int err; xe_guc_comm_init_early(guc); @@ -641,6 +642,14 @@ static int vf_guc_init(struct xe_guc *guc) if (err) return err; + err = xe_gt_sriov_vf_bootstrap(gt); + if (err) + return err; + + err = xe_gt_sriov_vf_query_config(gt); + if (err) + return err; + return 0; } From 396044c9d84d6668d89a5b680688b0d5fcae3aa5 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:02 +0200 Subject: [PATCH 060/358] drm/xe: Simplify GuC early initialization Add a 2-stage GuC init. An early one for everything that is needed for VF, and a full one that loads GuC and is allowed to do allocations. Link: https://lore.kernel.org/r/20250619104858.418440-16-dev@lankhorst.se Signed-off-by: Maarten Lankhorst Reviewed-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 2 +- drivers/gpu/drm/xe/xe_guc.c | 51 +++++++++++++++++++++------------- drivers/gpu/drm/xe/xe_guc.h | 1 + drivers/gpu/drm/xe/xe_guc_ct.c | 28 ++++++++++++------- drivers/gpu/drm/xe/xe_guc_ct.h | 1 + drivers/gpu/drm/xe/xe_uc.c | 16 +++++++++++ drivers/gpu/drm/xe/xe_uc.h | 1 + 7 files changed, 70 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 4370a4db95e0..6bc793043b40 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -426,7 +426,7 @@ int xe_gt_init_early(struct xe_gt *gt) */ xe_gt_mmio_init(gt); - return 0; + return xe_uc_init_noalloc(>->uc); } static void dump_pat_on_error(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 81b22b6fb249..eeb23f8aa9de 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -627,21 +627,11 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc) return 0; } -static int vf_guc_init(struct xe_guc *guc) +static int vf_guc_init_noalloc(struct xe_guc *guc) { struct xe_gt *gt = guc_to_gt(guc); int err; - xe_guc_comm_init_early(guc); - - err = xe_guc_ct_init(&guc->ct); - if (err) - return err; - - err = xe_guc_relay_init(&guc->relay); - if (err) - return err; - err = xe_gt_sriov_vf_bootstrap(gt); if (err) return err; @@ -653,6 +643,35 @@ static int vf_guc_init(struct xe_guc *guc) return 0; } +int xe_guc_init_noalloc(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + int ret; + + xe_guc_comm_init_early(guc); + + ret = xe_guc_ct_init_noalloc(&guc->ct); + if (ret) + goto out; + + ret = xe_guc_relay_init(&guc->relay); + if (ret) + goto out; + + if (IS_SRIOV_VF(xe)) { + ret = vf_guc_init_noalloc(guc); + if (ret) + goto out; + } + + return 0; + +out: + xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret)); + return ret; +} + int xe_guc_init(struct xe_guc *guc) { struct xe_device *xe = guc_to_xe(guc); @@ -662,13 +681,13 @@ int xe_guc_init(struct xe_guc *guc) guc->fw.type = XE_UC_FW_TYPE_GUC; ret = xe_uc_fw_init(&guc->fw); if (ret) - goto out; + return ret; if (!xe_uc_fw_is_enabled(&guc->fw)) return 0; if (IS_SRIOV_VF(xe)) { - ret = vf_guc_init(guc); + ret = xe_guc_ct_init(&guc->ct); if (ret) goto out; return 0; @@ -690,10 +709,6 @@ int xe_guc_init(struct xe_guc *guc) if (ret) goto out; - ret = xe_guc_relay_init(&guc->relay); - if (ret) - goto out; - xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE); ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc); @@ -702,8 +717,6 @@ int xe_guc_init(struct xe_guc *guc) guc_init_params(guc); - xe_guc_comm_init_early(guc); - return 0; out: diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 58338be44558..965bf7291200 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -26,6 +26,7 @@ struct drm_printer; void xe_guc_comm_init_early(struct xe_guc *guc); +int xe_guc_init_noalloc(struct xe_guc *guc); int xe_guc_init(struct xe_guc *guc); int xe_guc_init_post_hwconfig(struct xe_guc *guc); int xe_guc_post_load_init(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index bc4646964c40..209372e8f732 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -209,12 +209,10 @@ static void primelockdep(struct xe_guc_ct *ct) fs_reclaim_release(GFP_KERNEL); } -int xe_guc_ct_init(struct xe_guc_ct *ct) +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct) { struct xe_device *xe = ct_to_xe(ct); struct xe_gt *gt = ct_to_gt(ct); - struct xe_tile *tile = gt_to_tile(gt); - struct xe_bo *bo; int err; xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE)); @@ -240,6 +238,23 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) primelockdep(ct); + err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); + if (err) + return err; + + xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); + ct->state = XE_GUC_CT_STATE_DISABLED; + return 0; +} +ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */ + +int xe_guc_ct_init(struct xe_guc_ct *ct) +{ + struct xe_device *xe = ct_to_xe(ct); + struct xe_gt *gt = ct_to_gt(ct); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(), XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | @@ -249,13 +264,6 @@ int xe_guc_ct_init(struct xe_guc_ct *ct) return PTR_ERR(bo); ct->bo = bo; - - err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct); - if (err) - return err; - - xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED); - ct->state = XE_GUC_CT_STATE_DISABLED; return 0; } ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h index 99c5dec446f2..18d4225e6502 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.h +++ b/drivers/gpu/drm/xe/xe_guc_ct.h @@ -11,6 +11,7 @@ struct drm_printer; struct xe_device; +int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct); int xe_guc_ct_init(struct xe_guc_ct *ct); int xe_guc_ct_enable(struct xe_guc_ct *ct); void xe_guc_ct_disable(struct xe_guc_ct *ct); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3a8751a8b92d..5badba6f85fa 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -33,6 +33,22 @@ uc_to_xe(struct xe_uc *uc) } /* Should be called once at driver load only */ +int xe_uc_init_noalloc(struct xe_uc *uc) +{ + int ret; + + ret = xe_guc_init_noalloc(&uc->guc); + if (ret) + goto err; + + /* HuC and GSC have no early dependencies and will be initialized during xe_uc_init(). */ + return 0; + +err: + xe_gt_err(uc_to_gt(uc), "Failed to early initialize uC (%pe)\n", ERR_PTR(ret)); + return ret; +} + int xe_uc_init(struct xe_uc *uc) { int ret; diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index c23e6f5e2514..b573b7731f62 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -8,6 +8,7 @@ struct xe_uc; +int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); int xe_uc_init_hwconfig(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); From 2e1efcafd4fb61de987482d49a4e7b12395a3f25 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:03 +0200 Subject: [PATCH 061/358] drm/xe: Make it possible to read instance0 MCR registers after xe_gt_mcr_init_early After mcr_init_early, we need to be able to do VRAM and CCS probing without hwconfig probe. Fortunately the relevant registers are all instance 0, which fortunately means no dependencies on further initialization is required. Reviewed-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250619104858.418440-17-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_gt_mcr.c | 47 ++++++++++++++++++-------------- drivers/gpu/drm/xe/xe_gt_types.h | 2 ++ 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index d4d9730f0d2c..64a2f0d6aaf9 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -420,12 +420,6 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt) gt->steering[SQIDI_PSMI].instance_target = select & 0x1; } -static void init_steering_inst0(struct xe_gt *gt) -{ - gt->steering[INSTANCE0].group_target = 0; /* unused */ - gt->steering[INSTANCE0].instance_target = 0; /* unused */ -} - static const struct { const char *name; void (*init)(struct xe_gt *gt); @@ -436,7 +430,7 @@ static const struct { [DSS] = { "DSS", init_steering_dss }, [OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm }, [SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi }, - [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 }, + [INSTANCE0] = { "INSTANCE 0", NULL }, [IMPLICIT_STEERING] = { "IMPLICIT", NULL }, }; @@ -446,25 +440,17 @@ static const struct { * * Perform early software only initialization of the MCR lock to allow * the synchronization on accessing the STEER_SEMAPHORE register and - * use the xe_gt_mcr_multicast_write() function. + * use the xe_gt_mcr_multicast_write() function, plus the minimum + * safe MCR registers required for VRAM/CCS probing. */ void xe_gt_mcr_init_early(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); + BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES); BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES); spin_lock_init(>->mcr_lock); -} - -/** - * xe_gt_mcr_init - Normal initialization of the MCR support - * @gt: GT structure - * - * Perform normal initialization of the MCR for all usages. - */ -void xe_gt_mcr_init(struct xe_gt *gt) -{ - struct xe_device *xe = gt_to_xe(gt); if (IS_SRIOV_VF(xe)) return; @@ -505,10 +491,27 @@ void xe_gt_mcr_init(struct xe_gt *gt) } } + /* Mark instance 0 as initialized, we need this early for VRAM and CCS probe. */ + gt->steering[INSTANCE0].initialized = true; +} + +/** + * xe_gt_mcr_init - Normal initialization of the MCR support + * @gt: GT structure + * + * Perform normal initialization of the MCR for all usages. + */ +void xe_gt_mcr_init(struct xe_gt *gt) +{ + if (IS_SRIOV_VF(gt_to_xe(gt))) + return; + /* Select non-terminated steering target for each type */ - for (int i = 0; i < NUM_STEERING_TYPES; i++) + for (int i = 0; i < NUM_STEERING_TYPES; i++) { + gt->steering[i].initialized = true; if (gt->steering[i].ranges && xe_steering_types[i].init) xe_steering_types[i].init(gt); + } } /** @@ -570,6 +573,10 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt, for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) { if (xe_mmio_in_range(>->mmio, >->steering[type].ranges[i], reg)) { + drm_WARN(>_to_xe(gt)->drm, !gt->steering[type].initialized, + "Uninitialized usage of MCR register %s/%#x\n", + xe_steering_types[type].name, reg.addr); + *group = gt->steering[type].group_target; *instance = gt->steering[type].instance_target; return true; diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 7def0959da35..96344c604726 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -377,6 +377,8 @@ struct xe_gt { u16 group_target; /** @steering.instance_target: instance to steer accesses to */ u16 instance_target; + /** @steering.initialized: Whether this steering range is initialized */ + bool initialized; } steering[NUM_STEERING_TYPES]; /** From 4c5517e9ecd5f13e9981fbf7378e0ce7b0d4af1f Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:04 +0200 Subject: [PATCH 062/358] drm/xe: Only dump PAT when xe_hw_engines_init_early fails After discussion with Lucas De Marchi, it turns out that is the specific caller requiring a dump. This allows us to cleanup gt_init in a bit. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250619104858.418440-18-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_gt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 6bc793043b40..98360986df7d 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -464,8 +464,10 @@ static int gt_fw_domain_init(struct xe_gt *gt) xe_gt_mcr_init(gt); err = xe_hw_engines_init_early(gt); - if (err) + if (err) { + dump_pat_on_error(gt); goto err_force_wake; + } err = xe_hw_engine_class_sysfs_init(gt); if (err) @@ -486,7 +488,6 @@ static int gt_fw_domain_init(struct xe_gt *gt) return 0; err_force_wake: - dump_pat_on_error(gt); xe_force_wake_put(gt_to_fw(gt), fw_ref); return err; From 6386a49951cd2cf3b46f3f214111071174e2f4ad Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:05 +0200 Subject: [PATCH 063/358] drm/xe: Rename gt_init sub-functions s/gt_fw_domain_init/gt_init_with_gt_forcewake()/ s/all_fw_domain_init/gt_init_with_all_forcewake()/ Clarify that the functions are the part of gt_init() that are called with the respective power domains held. all_domain() of course only works after discovering and initialisation of force_wake on all engines, that's why the split is needed in the first place. Suggested-by: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250619104858.418440-19-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_gt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 98360986df7d..435d888a1dfc 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -440,7 +440,7 @@ static void dump_pat_on_error(struct xe_gt *gt) xe_pat_dump(gt, &p); } -static int gt_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_gt_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -493,7 +493,7 @@ static int gt_fw_domain_init(struct xe_gt *gt) return err; } -static int all_fw_domain_init(struct xe_gt *gt) +static int gt_init_with_all_forcewake(struct xe_gt *gt) { unsigned int fw_ref; int err; @@ -648,7 +648,7 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = gt_fw_domain_init(gt); + err = gt_init_with_gt_forcewake(gt); if (err) return err; @@ -662,7 +662,7 @@ int xe_gt_init(struct xe_gt *gt) xe_force_wake_init_engines(gt, gt_to_fw(gt)); - err = all_fw_domain_init(gt); + err = gt_init_with_all_forcewake(gt); if (err) return err; From 11bf0f0b3a4e6234861617356f04225b1a3272af Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:06 +0200 Subject: [PATCH 064/358] drm/xe: Split init of xe_gt_init_hwconfig to xe_gt_init and *_early Now that we added the separate step of initialising GUC in xe_gt_init_early, it should be ok to initialise the minimum during early init, and the rest after allocations are allowed. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250619104858.418440-20-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_device.c | 6 ---- drivers/gpu/drm/xe/xe_gt.c | 60 +++++++++++++++------------------- drivers/gpu/drm/xe/xe_gt.h | 1 - 3 files changed, 26 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index f56677d3f0dc..c3dd4778b8a2 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -800,12 +800,6 @@ int xe_device_probe(struct xe_device *xe) return err; } - for_each_gt(gt, xe, id) { - err = xe_gt_init_hwconfig(gt); - if (err) - return err; - } - err = xe_devcoredump_init(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 435d888a1dfc..3942f1ac253e 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -390,6 +390,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) int xe_gt_init_early(struct xe_gt *gt) { + unsigned int fw_ref; int err; if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -426,7 +427,19 @@ int xe_gt_init_early(struct xe_gt *gt) */ xe_gt_mmio_init(gt); - return xe_uc_init_noalloc(>->uc); + err = xe_uc_init_noalloc(>->uc); + if (err) + return err; + + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (!fw_ref) + return -ETIMEDOUT; + + xe_gt_mcr_init_early(gt); + xe_pat_init(gt); + xe_force_wake_put(gt_to_fw(gt), fw_ref); + + return 0; } static void dump_pat_on_error(struct xe_gt *gt) @@ -449,6 +462,18 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt) if (!fw_ref) return -ETIMEDOUT; + err = xe_uc_init(>->uc); + if (err) + goto err_force_wake; + + err = xe_uc_init_hwconfig(>->uc); + if (err) + goto err_force_wake; + + xe_gt_topology_init(gt); + xe_gt_mcr_init(gt); + xe_gt_enable_host_l2_vram(gt); + if (!xe_gt_is_media_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) @@ -580,39 +605,6 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) return err; } -/* - * Initialize enough GT to be able to load GuC in order to obtain hwconfig and - * enable CTB communication. - */ -int xe_gt_init_hwconfig(struct xe_gt *gt) -{ - unsigned int fw_ref; - int err; - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); - if (!fw_ref) - return -ETIMEDOUT; - - xe_gt_mcr_init_early(gt); - xe_pat_init(gt); - - err = xe_uc_init(>->uc); - if (err) - goto out_fw; - - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto out_fw; - - xe_gt_topology_init(gt); - xe_gt_mcr_init(gt); - xe_gt_enable_host_l2_vram(gt); - -out_fw: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - return err; -} - static void xe_gt_fini(void *arg) { struct xe_gt *gt = arg; diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 187fa6490eaf..8128ddfdd788 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -28,7 +28,6 @@ static inline bool xe_fault_inject_gt_reset(void) } struct xe_gt *xe_gt_alloc(struct xe_tile *tile); -int xe_gt_init_hwconfig(struct xe_gt *gt); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); void xe_gt_mmio_init(struct xe_gt *gt); From 3effd109c6ef1dfc66ef1f09092251bd8c2b35e9 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:07 +0200 Subject: [PATCH 065/358] drm/xe: Move xe_ttm_sys_mgr_init() downwards. Now that all previous allocations are gone, ensure no new allocations will ever be done before xe_display_init_early(), by moving the call that allows allocations downwards. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250619104858.418440-21-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_device.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index c3dd4778b8a2..0b73cb72bad1 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -784,10 +784,6 @@ int xe_device_probe(struct xe_device *xe) if (err) return err; - err = xe_ttm_sys_mgr_init(xe); - if (err) - return err; - for_each_gt(gt, xe, id) { err = xe_gt_init_early(gt); if (err) @@ -825,6 +821,14 @@ int xe_device_probe(struct xe_device *xe) return err; } + /* + * Allow allocations only now to ensure xe_display_init_early() + * is the first to allocate, always. + */ + err = xe_ttm_sys_mgr_init(xe); + if (err) + return err; + /* Allocate and map stolen after potential VRAM resize */ err = xe_ttm_stolen_mgr_init(xe); if (err) From 80fa03eb8a703d03ec30a39c8362ff9b9d96eead Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:08 +0200 Subject: [PATCH 066/358] drm/xe: Remove xe_uc_init_hwconfig() This function is called immediately after xe_uc_init(), so just put it there instead. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250619104858.418440-22-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_gt.c | 4 ---- drivers/gpu/drm/xe/xe_uc.c | 31 ++++++------------------------- drivers/gpu/drm/xe/xe_uc.h | 1 - 3 files changed, 6 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3942f1ac253e..14761849390d 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -466,10 +466,6 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt) if (err) goto err_force_wake; - err = xe_uc_init_hwconfig(>->uc); - if (err) - goto err_force_wake; - xe_gt_topology_init(gt); xe_gt_mcr_init(gt); xe_gt_enable_host_l2_vram(gt); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 5badba6f85fa..52f0b0ecf5a9 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -72,15 +72,17 @@ int xe_uc_init(struct xe_uc *uc) if (!xe_device_uc_enabled(uc_to_xe(uc))) return 0; - if (IS_SRIOV_VF(uc_to_xe(uc))) - return 0; + if (!IS_SRIOV_VF(uc_to_xe(uc))) { + ret = xe_wopcm_init(&uc->wopcm); + if (ret) + goto err; + } - ret = xe_wopcm_init(&uc->wopcm); + ret = xe_guc_min_load_for_hwconfig(&uc->guc); if (ret) goto err; return 0; - err: xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret)); return ret; @@ -142,27 +144,6 @@ int xe_uc_sanitize_reset(struct xe_uc *uc) return uc_reset(uc); } -/** - * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig - * @uc: The UC object - * - * Return: 0 on success, negative error code on error. - */ -int xe_uc_init_hwconfig(struct xe_uc *uc) -{ - int ret; - - /* GuC submission not enabled, nothing to do */ - if (!xe_device_uc_enabled(uc_to_xe(uc))) - return 0; - - ret = xe_guc_min_load_for_hwconfig(&uc->guc); - if (ret) - return ret; - - return 0; -} - static int vf_uc_init_hw(struct xe_uc *uc) { int err; diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index b573b7731f62..c13d705fe02c 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -10,7 +10,6 @@ struct xe_uc; int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); -int xe_uc_init_hwconfig(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); int xe_uc_init_hw(struct xe_uc *uc); int xe_uc_fini_hw(struct xe_uc *uc); From a42939ee863efb2420cf6c56f9dfb550eb3cd56a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:09 +0200 Subject: [PATCH 067/358] drm/xe: Remove xe_uc_fini_hw xe_uc_init_hw() is called multiple times from xe_gt.c, and that makes the name xe_uc_fini_hw(), called for a different reason in xe_guc.c confusing. Remove it and inline the xe_uc_sanitize_reset into xe_guc.c directly. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250619104858.418440-23-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_guc.c | 2 +- drivers/gpu/drm/xe/xe_uc.c | 5 ----- drivers/gpu/drm/xe/xe_uc.h | 1 - 3 files changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index eeb23f8aa9de..7a4207a2c1e0 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -577,7 +577,7 @@ static void guc_fini_hw(void *arg) unsigned int fw_ref; fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - xe_uc_fini_hw(&guc_to_gt(guc)->uc); + xe_uc_sanitize_reset(&guc_to_gt(guc)->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); guc_g2g_fini(guc); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 52f0b0ecf5a9..c66db45ba250 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -220,11 +220,6 @@ int xe_uc_init_hw(struct xe_uc *uc) return 0; } -int xe_uc_fini_hw(struct xe_uc *uc) -{ - return xe_uc_sanitize_reset(uc); -} - int xe_uc_reset_prepare(struct xe_uc *uc) { /* GuC submission not enabled, nothing to do */ diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index c13d705fe02c..58ff3946a80e 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -12,7 +12,6 @@ int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); int xe_uc_init_hw(struct xe_uc *uc); -int xe_uc_fini_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); From 18635b6328d9edfdfab5366f8465e930226dd3e6 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 19 Jun 2025 12:49:10 +0200 Subject: [PATCH 068/358] drm/xe: Rename xe_uc_init_hw to xe_uc_load_hw It feels to me like load is closer to the intention than init_hw. It makes the init calls slightly less confusing to me. :) Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250619104858.418440-24-dev@lankhorst.se Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/xe/xe_gt.c | 6 +++--- drivers/gpu/drm/xe/xe_uc.c | 6 +++--- drivers/gpu/drm/xe/xe_uc.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 14761849390d..d397df056e4c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -573,7 +573,7 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) } } - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) goto err_force_wake; @@ -738,7 +738,7 @@ static int vf_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; @@ -776,7 +776,7 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - err = xe_uc_init_hw(>->uc); + err = xe_uc_load_hw(>->uc); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index c66db45ba250..ccfd059382fe 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -144,7 +144,7 @@ int xe_uc_sanitize_reset(struct xe_uc *uc) return uc_reset(uc); } -static int vf_uc_init_hw(struct xe_uc *uc) +static int vf_uc_load_hw(struct xe_uc *uc) { int err; @@ -173,7 +173,7 @@ static int vf_uc_init_hw(struct xe_uc *uc) * Should be called during driver load, after every GT reset, and after every * suspend to reload / auth the firmwares. */ -int xe_uc_init_hw(struct xe_uc *uc) +int xe_uc_load_hw(struct xe_uc *uc) { int ret; @@ -182,7 +182,7 @@ int xe_uc_init_hw(struct xe_uc *uc) return 0; if (IS_SRIOV_VF(uc_to_xe(uc))) - return vf_uc_init_hw(uc); + return vf_uc_load_hw(uc); ret = xe_huc_upload(&uc->huc); if (ret) diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h index 58ff3946a80e..21c9306098cf 100644 --- a/drivers/gpu/drm/xe/xe_uc.h +++ b/drivers/gpu/drm/xe/xe_uc.h @@ -11,7 +11,7 @@ struct xe_uc; int xe_uc_init_noalloc(struct xe_uc *uc); int xe_uc_init(struct xe_uc *uc); int xe_uc_init_post_hwconfig(struct xe_uc *uc); -int xe_uc_init_hw(struct xe_uc *uc); +int xe_uc_load_hw(struct xe_uc *uc); void xe_uc_gucrc_disable(struct xe_uc *uc); int xe_uc_reset_prepare(struct xe_uc *uc); void xe_uc_stop_prepare(struct xe_uc *uc); From c038bdba98c9f6a36378044a9d4385531a194d3e Mon Sep 17 00:00:00 2001 From: Jia Yao Date: Thu, 12 Jun 2025 22:46:20 +0000 Subject: [PATCH 069/358] drm/xe: Fix out-of-bounds field write in MI_STORE_DATA_IMM According to Bspec, bits 0~9 of MI_STORE_DATA_IMM must not exceed 0x3FE. The macro MI_SDI_NUM_QW(x) evaluates to 2 * x + 1, which means the condition 2 * x + 1 <= 0x3FE must be satisfied. Therefore, the maximum valid value for x is 0x1FE, not 0x1FF. v2 - Replace 0x1fe with macro MAX_PTE_PER_SDI (Auld, Matthew & Patelczyk, Maciej) v3 - Change macro MAX_PTE_PER_SDI from 0x1fe to 0x1feU (De Marchi, Lucas) Bspec: 60246 Fixes: 9c44fd5f6e8a ("drm/xe: Add migrate layer functions for SVM support") Cc: Matthew Brost Cc: Brian3 Nguyen Cc: Alex Zuo Cc: Matthew Auld Cc: Maciej Patelczyk Cc: Lucas De Marchi Suggested-by: Shuicheng Lin Signed-off-by: Jia Yao Reviewed-by: Matthew Brost Reviewed-by: Lucas De Marchi Reviewed-by: Maciej Patelczyk Link: https://lore.kernel.org/r/20250612224620.161105-1-jia.yao@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8f8e9fdfb2a8..7acdc4c78866 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -82,7 +82,7 @@ struct xe_migrate { * of the instruction. Subtracting the instruction header (1 dword) and * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. */ -#define MAX_PTE_PER_SDI 0x1FE +#define MAX_PTE_PER_SDI 0x1FEU /** * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue. @@ -1553,15 +1553,17 @@ static u32 pte_update_cmd_size(u64 size) u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE); XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER); + /* * MI_STORE_DATA_IMM command is used to update page table. Each - * instruction can update maximumly 0x1ff pte entries. To update - * n (n <= 0x1ff) pte entries, we need: - * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) - * 2 dword for the page table's physical location - * 2*n dword for value of pte to fill (each pte entry is 2 dwords) + * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To + * update n (n <= MAX_PTE_PER_SDI) pte entries, we need: + * + * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc) + * - 2 dword for the page table's physical location + * - 2*n dword for value of pte to fill (each pte entry is 2 dwords) */ - num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff); + num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI); num_dword += entries * 2; return num_dword; @@ -1577,7 +1579,7 @@ static void build_pt_update_batch_sram(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = pt_offset; From a7ffcea8631af91479cab10aa7fbfd0722f01d9a Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 25 Jun 2025 13:54:06 -0700 Subject: [PATCH 070/358] drm/xe/guc: Enable extended CAT error reporting On newer HW (Xe2 onwards + PVC) it is possible to get extra information when a CAT error occurs, specifically a dword reporting the error type. To enable this extra reporting, we need to opt-in with the GuC, which is done via a specific per-VF feature opt-in H2G. On platforms where the HW does not support the extra reporting, the GuC will set the type to 0xdeadbeef, so we can keep the code simple and opt-in to the feature on every platform and then just discard the data if it is invalid. Note that on native/PF we're guaranteed that the opt in is available because we don't support any GuC old enough to not have it, but if we're a VF we might be running on a non-XE PF with an older GuC, so we need to handle that case. We can re-use the invalid type above to handle this scenario the same way as if the feature was not supported in HW. Given that this patch is the first user of the guc_buf_cache on native and VF, it also extends that feature to non-PF use-cases. v2: simpler print for the error type (John), rebase v3: use guc_buf_cache instead of new alloc, simpler doc (Michal) Signed-off-by: Daniele Ceraolo Spurio Cc: Nirmoy Das Cc: John Harrison Cc: Michal Wajdeczko Reviewed-by: Nirmoy Das #v1 Reviewed-by: Michal Wajdeczko Reviewed-by: John Harrison Link: https://lore.kernel.org/r/20250625205405.1653212-3-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/abi/guc_actions_abi.h | 4 ++ drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 15 +++++++ drivers/gpu/drm/xe/xe_guc.c | 56 ++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc.h | 1 + drivers/gpu/drm/xe/xe_guc_submit.c | 21 +++++++-- drivers/gpu/drm/xe/xe_uc.c | 4 ++ 6 files changed, 98 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index ff4f412c28d8..81eb046aeebf 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -142,6 +142,7 @@ enum xe_guc_action { XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C, XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D, + XE_GUC_ACTION_OPT_IN_FEATURE_KLV = 0x550E, XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000, XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002, XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003, @@ -271,4 +272,7 @@ enum xe_guc_g2g_type { #define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) #define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) +/* invalid type for XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR */ +#define XE_GUC_CAT_ERR_TYPE_INVALID 0xdeadbeef + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 7de8f827281f..5b2502bec2dc 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -16,6 +16,7 @@ * +===+=======+==============================================================+ * | 0 | 31:16 | **KEY** - KLV key identifier | * | | | - `GuC Self Config KLVs`_ | + * | | | - `GuC Opt In Feature KLVs`_ | * | | | - `GuC VGT Policy KLVs`_ | * | | | - `GuC VF Configuration KLVs`_ | * | | | | @@ -124,6 +125,20 @@ enum { GUC_CONTEXT_POLICIES_KLV_NUM_IDS = 5, }; +/** + * DOC: GuC Opt In Feature KLVs + * + * `GuC KLV`_ keys available for use with OPT_IN_FEATURE_KLV + * + * _`GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE` : 0x4001 + * Adds an extra dword to the XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR G2H + * containing the type of the CAT error. On HW that does not support + * reporting the CAT error type, the extra dword is set to 0xdeadbeef. + */ + +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_KEY 0x4001 +#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_LEN 0u + /** * DOC: GuC VGT Policy KLVs * diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 7a4207a2c1e0..28e24fe923a4 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -29,6 +29,7 @@ #include "xe_guc_db_mgr.h" #include "xe_guc_engine_activity.h" #include "xe_guc_hwconfig.h" +#include "xe_guc_klv_helpers.h" #include "xe_guc_log.h" #include "xe_guc_pc.h" #include "xe_guc_relay.h" @@ -570,6 +571,57 @@ static int guc_g2g_start(struct xe_guc *guc) return err; } +static int __guc_opt_in_features_enable(struct xe_guc *guc, u64 addr, u32 num_dwords) +{ + u32 action[] = { + XE_GUC_ACTION_OPT_IN_FEATURE_KLV, + lower_32_bits(addr), + upper_32_bits(addr), + num_dwords + }; + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +#define OPT_IN_MAX_DWORDS 16 +int xe_guc_opt_in_features_enable(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + CLASS(xe_guc_buf, buf)(&guc->buf, OPT_IN_MAX_DWORDS); + u32 count = 0; + u32 *klvs; + int ret; + + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + klvs = xe_guc_buf_cpu_ptr(buf); + + /* + * The extra CAT error type opt-in was added in GuC v70.17.0, which maps + * to compatibility version v1.7.0. + * Note that the GuC allows enabling this KLV even on platforms that do + * not support the extra type; in such case the returned type variable + * will be set to a known invalid value which we can check against. + */ + if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 7, 0)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_EXT_CAT_ERR_TYPE); + + if (count) { + xe_assert(xe, count <= OPT_IN_MAX_DWORDS); + + ret = __guc_opt_in_features_enable(guc, xe_guc_buf_flush(buf), count); + if (ret < 0) { + xe_gt_err(guc_to_gt(guc), + "failed to enable GuC opt-in features: %pe\n", + ERR_PTR(ret)); + return ret; + } + } + + return 0; +} + static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; @@ -789,6 +841,10 @@ int xe_guc_post_load_init(struct xe_guc *guc) xe_guc_ads_populate_post_load(&guc->ads); + ret = xe_guc_opt_in_features_enable(guc); + if (ret) + return ret; + if (xe_guc_g2g_wanted(guc_to_xe(guc))) { ret = guc_g2g_start(guc); if (ret) diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h index 965bf7291200..22cf019a11bf 100644 --- a/drivers/gpu/drm/xe/xe_guc.h +++ b/drivers/gpu/drm/xe/xe_guc.h @@ -34,6 +34,7 @@ int xe_guc_reset(struct xe_guc *guc); int xe_guc_upload(struct xe_guc *guc); int xe_guc_min_load_for_hwconfig(struct xe_guc *guc); int xe_guc_enable_communication(struct xe_guc *guc); +int xe_guc_opt_in_features_enable(struct xe_guc *guc); int xe_guc_suspend(struct xe_guc *guc); void xe_guc_notify(struct xe_guc *guc); int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 72477ccc5c5e..5f91b9a6ab7d 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -2092,12 +2092,16 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; u32 guc_id; + u32 type = XE_GUC_CAT_ERR_TYPE_INVALID; - if (unlikely(len < 1)) + if (unlikely(!len || len > 2)) return -EPROTO; guc_id = msg[0]; + if (len == 2) + type = msg[1]; + if (guc_id == GUC_ID_UNKNOWN) { /* * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF @@ -2111,8 +2115,19 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, if (unlikely(!q)) return -EPROTO; - xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d", - xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + /* + * The type is HW-defined and changes based on platform, so we don't + * decode it in the kernel and only check if it is valid. + * See bspec 54047 and 72187 for details. + */ + if (type != XE_GUC_CAT_ERR_TYPE_INVALID) + xe_gt_dbg(gt, + "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d", + type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); + else + xe_gt_dbg(gt, + "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d", + xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); trace_xe_exec_queue_memory_cat_error(q); diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index ccfd059382fe..6431ba3a2c53 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -162,6 +162,10 @@ static int vf_uc_load_hw(struct xe_uc *uc) uc->guc.submission_state.enabled = true; + err = xe_guc_opt_in_features_enable(&uc->guc); + if (err) + return err; + err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) return err; From 9c7d93a8f1ec0415457037a5748b0ee60e50a095 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Wed, 25 Jun 2025 13:54:07 -0700 Subject: [PATCH 071/358] drm/xe/guc: Enable the Dynamic Inhibit Context Switch optimization The Dynamic Inhibit Context Switch is an optimization aimed at reducing the amount of time the HW is stuck waiting on an unsatisfied semaphore. When this optimization is enabled, the GuC will dynamically modify the CTX_CTRL_INHIBIT_SYN_CTX_SWITCH in the CTX_CONTEXT_CONTROL register of LRCs to enable immediate switching out on an unsatisfied semaphore wait when multiple contexts are competing for time on the same engine. This feature is available on recent HW from GuC 70.40.1 onwards and it is enabled via a per-VF feature opt-in. v2: rebase v3: switch to using guc_buf_cache instead of dedicated alloc v4: add helper to check for feature availability (Michal), don't enable if multi-lrc is possible. Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Cc: Julia Filipchuk Cc: Michal Wajdeczko Reviewed-by: John Harrison Link: https://lore.kernel.org/r/20250625205405.1653212-4-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/abi/guc_klvs_abi.h | 13 ++++++++++++ drivers/gpu/drm/xe/xe_guc.c | 29 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 5b2502bec2dc..0366a9da5977 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -134,11 +134,24 @@ enum { * Adds an extra dword to the XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR G2H * containing the type of the CAT error. On HW that does not support * reporting the CAT error type, the extra dword is set to 0xdeadbeef. + * + * _`GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH` : 0x4003 + * This KLV enables the Dynamic Inhibit Context Switch optimization, which + * consists in the GuC setting the CTX_CTRL_INHIBIT_SYN_CTX_SWITCH bit to + * zero in the CTX_CONTEXT_CONTROL register of LRCs that are submitted + * to an oversubscribed engine. This will cause those contexts to be + * switched out immediately if they hit an unsatisfied semaphore wait + * (instead of waiting the full timeslice duration). The bit is instead set + * to one if a single context is queued on the engine, to avoid it being + * switched out if there isn't another context that can run in its place. */ #define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_KEY 0x4001 #define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_LEN 0u +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003 +#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u + /** * DOC: GuC VGT Policy KLVs * diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 28e24fe923a4..fe1277f69238 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -583,6 +583,32 @@ static int __guc_opt_in_features_enable(struct xe_guc *guc, u64 addr, u32 num_dw return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); } +static bool supports_dynamic_ics(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); + + /* Dynamic ICS is available for PVC and Xe2 and newer platforms. */ + if (xe->info.platform != XE_PVC && GRAPHICS_VER(xe) < 20) + return false; + + /* + * The feature is currently not compatible with multi-lrc, so the GuC + * does not support it at all on the media engines (which are the main + * users of mlrc). On the primary GT side, to avoid it being used in + * conjunction with mlrc, we only enable it if we are in single CCS + * mode. + */ + if (xe_gt_is_media_type(gt) || gt->ccs_mode > 1) + return false; + + /* + * Dynamic ICS requires GuC v70.40.1, which maps to compatibility + * version v1.18.4. + */ + return GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 18, 4); +} + #define OPT_IN_MAX_DWORDS 16 int xe_guc_opt_in_features_enable(struct xe_guc *guc) { @@ -607,6 +633,9 @@ int xe_guc_opt_in_features_enable(struct xe_guc *guc) if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 7, 0)) klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_EXT_CAT_ERR_TYPE); + if (supports_dynamic_ics(guc)) + klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH); + if (count) { xe_assert(xe, count <= OPT_IN_MAX_DWORDS); From ec9223b49ab14a39f252d786f055931d403d5491 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 25 Jun 2025 07:41:28 -0700 Subject: [PATCH 072/358] drm/xe: Drop bo->size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bo->size is redundant because the base GEM object already has a size field with the same value. Drop bo->size and use the base GEM object’s size instead. While at it, introduce xe_bo_size() to abstract the BO size. v2: - Fix typo in kernel doc (Ashutosh) - Fix kunit (CI) - Fix line wrap (Checkpatch) v3: - Fix sriov build (CI) v4: - Fix display build (CI) Signed-off-by: Matthew Brost Reviewed-by: Ashutosh Dixit Link: https://lore.kernel.org/r/20250625144128.2827577-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/display/xe_fb_pin.c | 2 +- drivers/gpu/drm/xe/display/xe_hdcp_gsc.c | 2 +- drivers/gpu/drm/xe/tests/xe_bo.c | 2 +- drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c | 4 +- drivers/gpu/drm/xe/tests/xe_migrate.c | 52 ++++++++++----------- drivers/gpu/drm/xe/xe_bo.c | 20 ++++---- drivers/gpu/drm/xe/xe_bo.h | 17 ++++++- drivers/gpu/drm/xe/xe_bo_types.h | 2 - drivers/gpu/drm/xe/xe_drm_client.c | 2 +- drivers/gpu/drm/xe/xe_ggtt.c | 14 +++--- drivers/gpu/drm/xe/xe_gsc.c | 6 ++- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 16 +++---- drivers/gpu/drm/xe/xe_guc.c | 4 +- drivers/gpu/drm/xe/xe_guc_ads.c | 4 +- drivers/gpu/drm/xe/xe_guc_ct.c | 4 +- drivers/gpu/drm/xe/xe_guc_log.c | 2 +- drivers/gpu/drm/xe/xe_huc.c | 2 +- drivers/gpu/drm/xe/xe_lmtt.c | 4 +- drivers/gpu/drm/xe/xe_lrc.c | 4 +- drivers/gpu/drm/xe/xe_migrate.c | 30 ++++++------ drivers/gpu/drm/xe/xe_oa.c | 10 ++-- drivers/gpu/drm/xe/xe_trace_bo.h | 4 +- drivers/gpu/drm/xe/xe_vm.c | 6 +-- 23 files changed, 113 insertions(+), 100 deletions(-) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 5e846f0bec21..405967df2c29 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -227,7 +227,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, goto out_unlock; } - ret = xe_ggtt_node_insert_locked(vma->node, bo->size, align, 0); + ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0); if (ret) { xe_ggtt_node_fini(vma->node); goto out_unlock; diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c index b35a6f201d4a..30f1073141fc 100644 --- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c +++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c @@ -85,7 +85,7 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe, cmd_in = xe_bo_ggtt_addr(bo); cmd_out = cmd_in + PAGE_SIZE; - xe_map_memset(xe, &bo->vmap, 0, 0, bo->size); + xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo)); gsc_context->hdcp_bo = bo; gsc_context->hdcp_cmd_in = cmd_in; diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 77ca1ab527ec..bb469096d072 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -106,7 +106,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, } /* Check last CCS value, or at least last value in page. */ - offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size); + offset = xe_device_ccs_bytes(tile_to_xe(tile), xe_bo_size(bo)); offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1; if (cpu_map[offset] != get_val) { KUNIT_FAIL(test, diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c index 537766cdd882..d266882adc0e 100644 --- a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c +++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c @@ -32,7 +32,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * bo->tile = tile; bo->ttm.bdev = &xe->ttm; - bo->size = size; + bo->ttm.base.size = size; iosys_map_set_vaddr(&bo->vmap, buf); if (flags & XE_BO_FLAG_GGTT) { @@ -43,7 +43,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device * KUNIT_ASSERT_EQ(test, 0, xe_ggtt_node_insert(bo->ggtt_node[tile->id], - bo->size, SZ_4K)); + xe_bo_size(bo), SZ_4K)); } return bo; diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 4a65e3103f77..edd1e701aa1c 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -74,13 +74,13 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, { struct xe_device *xe = tile_to_xe(m->tile); u64 retval, expected = 0; - bool big = bo->size >= SZ_2M; + bool big = xe_bo_size(bo) >= SZ_2M; struct dma_fence *fence; const char *str = big ? "Copying big bo" : "Copying small bo"; int err; struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL, - bo->size, + xe_bo_size(bo), ttm_bo_type_kernel, region | XE_BO_FLAG_NEEDS_CPU_ACCESS | @@ -105,7 +105,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, goto out_unlock; } - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); fence = xe_migrate_clear(m, remote, remote->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" : @@ -113,15 +113,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "remote first offset should be cleared", test); - retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(remote) - 8, u64); check(retval, expected, "remote last offset should be cleared", test); } dma_fence_put(fence); /* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */ - xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xc0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xd0, xe_bo_size(bo)); expected = 0xc0c0c0c0c0c0c0c0; fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource, @@ -131,15 +131,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &bo->vmap, 0, u64); check(retval, expected, "remote -> vram bo first offset should be copied", test); - retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &bo->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "remote -> vram bo offset should be copied", test); } dma_fence_put(fence); /* And other way around.. slightly hacky.. */ - xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size); - xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size); + xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote)); + xe_map_memset(xe, &bo->vmap, 0, 0xc0, xe_bo_size(bo)); fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource, remote->ttm.resource, false); @@ -148,7 +148,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, retval = xe_map_rd(xe, &remote->vmap, 0, u64); check(retval, expected, "vram -> remote bo first offset should be copied", test); - retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64); + retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(bo) - 8, u64); check(retval, expected, "vram -> remote bo last offset should be copied", test); } @@ -245,9 +245,9 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) if (m->q->vm->flags & XE_VM_FLAG_64K) expected |= XE_PTE_PS64; if (xe_bo_is_vram(pt)) - xe_res_first(pt->ttm.resource, 0, pt->size, &src_it); + xe_res_first(pt->ttm.resource, 0, xe_bo_size(pt), &src_it); else - xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it); + xe_res_first_sg(xe_bo_sg(pt), 0, xe_bo_size(pt), &src_it); emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false, &src_it, XE_PAGE_SIZE, pt->ttm.resource); @@ -276,7 +276,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a small bo */ kunit_info(test, "Clearing small buffer object\n"); - xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size); + xe_map_memset(xe, &tiny->vmap, 0, 0x22, xe_bo_size(tiny)); expected = 0; fence = xe_migrate_clear(m, tiny, tiny->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -286,7 +286,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &tiny->vmap, 0, u32); check(retval, expected, "Command clear small first value", test); - retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32); + retval = xe_map_rd(xe, &tiny->vmap, xe_bo_size(tiny) - 4, u32); check(retval, expected, "Command clear small last value", test); kunit_info(test, "Copying small buffer object to system\n"); @@ -298,7 +298,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) /* Clear a big bo */ kunit_info(test, "Clearing big buffer object\n"); - xe_map_memset(xe, &big->vmap, 0, 0x11, big->size); + xe_map_memset(xe, &big->vmap, 0, 0x11, xe_bo_size(big)); expected = 0; fence = xe_migrate_clear(m, big, big->ttm.resource, XE_MIGRATE_CLEAR_FLAG_FULL); @@ -308,7 +308,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) dma_fence_put(fence); retval = xe_map_rd(xe, &big->vmap, 0, u32); check(retval, expected, "Command clear big first value", test); - retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32); + retval = xe_map_rd(xe, &big->vmap, xe_bo_size(big) - 4, u32); check(retval, expected, "Command clear big last value", test); kunit_info(test, "Copying big buffer object to system\n"); @@ -370,7 +370,7 @@ static struct dma_fence *blt_copy(struct xe_tile *tile, struct xe_migrate *m = tile->migrate; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it; struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource; u64 src_L0_ofs, dst_L0_ofs; @@ -498,7 +498,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, long ret; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -523,7 +523,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Clear evicted vram data first value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Clear evicted vram data last value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -532,7 +532,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -562,7 +562,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64); check(retval, expected, "Restored value must be equal to initial value", test); - retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64); + retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64); check(retval, expected, "Restored value must be equal to initial value", test); fence = blt_copy(tile, vram_bo, ccs_bo, @@ -570,7 +570,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64); check(retval, 0, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64); + retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64); check(retval, 0, "Clear ccs data last value", test); } dma_fence_put(fence); @@ -583,7 +583,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, u64 expected, retval; expected = 0xd0d0d0d0d0d0d0d0; - xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size); + xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo)); fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test); if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) { @@ -597,7 +597,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Decompressed value must be equal to initial value", test); } dma_fence_put(fence); @@ -615,7 +615,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear main buffer first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear main buffer last value", test); } dma_fence_put(fence); @@ -625,7 +625,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile, if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) { retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64); check(retval, expected, "Clear ccs data first value", test); - retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64); + retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64); check(retval, expected, "Clear ccs data last value", test); } dma_fence_put(fence); diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4e39188a021a..7f8470b22dc9 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -437,7 +437,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = 0; if (xe_bo_needs_ccs_pages(bo)) - extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), + extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)), PAGE_SIZE); /* @@ -1122,7 +1122,7 @@ int xe_bo_notifier_prepare_pinned(struct xe_bo *bo) if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE) goto out_unlock_bo; - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo), DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED); @@ -1200,7 +1200,8 @@ int xe_bo_evict_pinned(struct xe_bo *bo) goto out_unlock_bo; if (!backup) { - backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size, + backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, + NULL, xe_bo_size(bo), DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED); @@ -1254,7 +1255,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo) } xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0, - bo->size); + xe_bo_size(bo)); } if (!bo->backup_obj) @@ -1347,7 +1348,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) } xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr, - bo->size); + xe_bo_size(bo)); } bo->backup_obj = NULL; @@ -1558,7 +1559,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, vram = res_to_mem_region(ttm_bo->resource); xe_res_first(ttm_bo->resource, offset & PAGE_MASK, - bo->size - (offset & PAGE_MASK), &cursor); + xe_bo_size(bo) - (offset & PAGE_MASK), &cursor); do { unsigned long page_offset = (offset & ~PAGE_MASK); @@ -1858,7 +1859,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, bo->ccs_cleared = false; bo->tile = tile; - bo->size = size; bo->flags = flags; bo->cpu_caching = cpu_caching; bo->ttm.base.funcs = &xe_gem_object_funcs; @@ -2036,7 +2036,7 @@ __xe_bo_create_locked(struct xe_device *xe, if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, - start + bo->size, U64_MAX); + start + xe_bo_size(bo), U64_MAX); } else { err = xe_ggtt_insert_bo(t->mem.ggtt, bo); } @@ -2234,7 +2234,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str xe_assert(xe, !(*src)->vmap.is_iomem); bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr, - (*src)->size, dst_flags); + xe_bo_size(*src), dst_flags); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -2524,7 +2524,7 @@ int xe_bo_vmap(struct xe_bo *bo) * TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap * to use struct iosys_map. */ - ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap); + ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap); if (ret) return ret; diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 02ada1fb8a23..8559901e4088 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -238,6 +238,19 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) return xe_bo_addr(bo, 0, page_size); } +/** + * xe_bo_size() - Xe BO size + * @bo: The bo object. + * + * Simple helper to return Xe BO's size. + * + * Return: Xe BO's size + */ +static inline size_t xe_bo_size(struct xe_bo *bo) +{ + return bo->ttm.base.size; +} + static inline u32 __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) { @@ -246,7 +259,7 @@ __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) if (XE_WARN_ON(!ggtt_node)) return 0; - XE_WARN_ON(ggtt_node->base.size > bo->size); + XE_WARN_ON(ggtt_node->base.size > xe_bo_size(bo)); XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); return ggtt_node->base.start; } @@ -300,7 +313,7 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo); static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo) { - return PAGE_ALIGN(bo->ttm.base.size); + return PAGE_ALIGN(xe_bo_size(bo)); } static inline bool xe_bo_has_pages(struct xe_bo *bo) diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index e0efaf23d051..ff560d82496f 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -32,8 +32,6 @@ struct xe_bo { struct xe_bo *backup_obj; /** @parent_obj: Ref to parent bo if this a backup_obj */ struct xe_bo *parent_obj; - /** @size: Size of this buffer object */ - size_t size; /** @flags: flags for this buffer object */ u32 flags; /** @vm: VM this BO is attached to, for extobj this will be NULL */ diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 31f688e953d7..f931ff9b1ec0 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -167,7 +167,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo) static void bo_meminfo(struct xe_bo *bo, struct drm_memory_stats stats[TTM_NUM_MEM_TYPES]) { - u64 sz = bo->size; + u64 sz = xe_bo_size(bo); u32 mem_type = bo->ttm.resource->mem_type; xe_bo_assert_held(bo); diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index a8830cdb185f..29d4d3f51da1 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -421,7 +421,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt) goto err; } - xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size); + xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch)); xe_ggtt_initial_clear(ggtt); @@ -693,13 +693,13 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, return; start = node->base.start; - end = start + bo->size; + end = start + xe_bo_size(bo); pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index); if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) { xe_assert(xe_bo_device(bo), bo->ttm.ttm); - for (xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &cur); + for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur); cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, pte | xe_res_dma(&cur)); @@ -707,7 +707,7 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node, /* Prepend GPU offset */ pte |= vram_region_gpu_offset(bo->ttm.resource); - for (xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE)) ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining, pte + cur.start); @@ -743,7 +743,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, if (XE_WARN_ON(bo->ggtt_node[tile_id])) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); return 0; } @@ -762,7 +762,7 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, mutex_lock(&ggtt->lock); err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, - bo->size, alignment, 0, start, end, 0); + xe_bo_size(bo), alignment, 0, start, end, 0); if (err) { xe_ggtt_node_fini(bo->ggtt_node[tile_id]); bo->ggtt_node[tile_id] = NULL; @@ -823,7 +823,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo)); xe_ggtt_node_remove(bo->ggtt_node[tile_id], bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index 0bcf97063ff6..1d84bf2f2cef 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -59,7 +59,8 @@ static int memcpy_fw(struct xe_gsc *gsc) xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size); xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size); - xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size); + xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, + xe_bo_size(gsc->private) - fw_size); kfree(storage); @@ -82,7 +83,8 @@ static int emit_gsc_upload(struct xe_gsc *gsc) bb->cs[bb->len++] = GSC_FW_LOAD; bb->cs[bb->len++] = lower_32_bits(offset); bb->cs[bb->len++] = upper_32_bits(offset); - bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID; + bb->cs[bb->len++] = (xe_bo_size(gsc->private) / SZ_4K) | + GSC_FW_LOAD_LIMIT_VALID; job = xe_bb_create_job(gsc->q, bb); if (IS_ERR(job)) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 3556c41c041b..d186f780885d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -282,8 +282,8 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool if (config->lmem_obj) { cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE); - cfg[n++] = lower_32_bits(config->lmem_obj->size); - cfg[n++] = upper_32_bits(config->lmem_obj->size); + cfg[n++] = lower_32_bits(xe_bo_size(config->lmem_obj)); + cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj)); } cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM); @@ -1299,7 +1299,7 @@ static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid) struct xe_bo *bo; bo = config->lmem_obj; - return bo ? bo->size : 0; + return bo ? xe_bo_size(bo) : 0; } static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) @@ -1388,7 +1388,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset); if (err) goto fail; - offset += bo->size; + offset += xe_bo_size(bo); } } @@ -1469,12 +1469,12 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) goto release; } - err = pf_push_vf_cfg_lmem(gt, vfid, bo->size); + err = pf_push_vf_cfg_lmem(gt, vfid, xe_bo_size(bo)); if (unlikely(err)) goto reset_lmtt; xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n", - vfid, bo->size, bo->size / SZ_1M); + vfid, xe_bo_size(bo), xe_bo_size(bo) / SZ_1M); return 0; reset_lmtt: @@ -2552,10 +2552,10 @@ int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p) if (!config->lmem_obj) continue; - string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2, + string_get_size(xe_bo_size(config->lmem_obj), 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "VF%u:\t%zu\t(%s)\n", - n, config->lmem_obj->size, buf); + n, xe_bo_size(config->lmem_obj), buf); } mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index fe1277f69238..8573957facae 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -60,7 +60,7 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); xe_assert(xe, addr < GUC_GGTT_TOP); - xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr); + xe_assert(xe, xe_bo_size(bo) <= GUC_GGTT_TOP - addr); return addr; } @@ -421,7 +421,7 @@ static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 t buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); - xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(g2g_bo)); return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, desc, buf, G2G_BUFFER_SIZE); diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 07a027755627..b4d81f4bd548 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -890,7 +890,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); guc_golden_lrc_init(ads); guc_mapping_table_init_invalid(gt, &info_map); @@ -914,7 +914,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads) xe_gt_assert(gt, ads->bo); - xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size); + xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo)); guc_policies_init(ads); fill_engine_enable_masks(gt, &info_map); guc_mmio_reg_state_init(ads); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 209372e8f732..23e8c155025e 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -453,7 +453,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) xe_gt_assert(gt, !xe_guc_ct_enabled(ct)); - xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size); + xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo)); guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap); guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap); @@ -1907,7 +1907,7 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bo return NULL; if (ct->bo && want_ctb) { - snapshot->ctb_size = ct->bo->size; + snapshot->ctb_size = xe_bo_size(ct->bo); snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL); } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index 38039c411387..c01ccb35dc75 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -79,7 +79,7 @@ static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log * * Also, can't use vmalloc as might be called from atomic context. So need * to break the buffer up into smaller chunks that can be allocated. */ - snapshot->size = log->bo->size; + snapshot->size = xe_bo_size(log->bo); snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE); snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy), diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c index 6a846e4cb221..7e43b2dd6a32 100644 --- a/drivers/gpu/drm/xe/xe_huc.c +++ b/drivers/gpu/drm/xe/xe_huc.c @@ -171,7 +171,7 @@ static int huc_auth_via_gsccs(struct xe_huc *huc) sizeof(struct pxp43_new_huc_auth_in)); wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset, xe_bo_ggtt_addr(huc->fw.bo), - huc->fw.bo->size); + xe_bo_size(huc->fw.bo)); do { err = xe_gsc_pkt_submit_kernel(>->uc.gsc, ggtt_offset, wr_offset, ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE, diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 63db66df064b..b56437a816e4 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -386,11 +386,11 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo u64 addr, vram_offset; lmtt_assert(lmtt, IS_ALIGNED(start, page_size)); - lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size)); + lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size)); lmtt_assert(lmtt, xe_bo_is_vram(bo)); vram_offset = vram_region_gpu_offset(bo->ttm.resource); - xe_res_first(bo->ttm.resource, 0, bo->size, &cur); + xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur); while (cur.remaining) { addr = xe_res_dma(&cur); addr += vram_offset; /* XXX */ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 37598588a54f..c92e7eff79f7 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -975,7 +975,7 @@ struct wa_bb_setup { static size_t wa_bb_offset(struct xe_lrc *lrc) { - return lrc->bo->size - LRC_WA_BB_SIZE; + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; } static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) @@ -1859,7 +1859,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset - + snapshot->lrc_size = xe_bo_size(lrc->bo) - snapshot->lrc_offset - LRC_WA_BB_SIZE; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7acdc4c78866..0838582537e8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -203,7 +203,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1)); /* Need to be sure everything fits in the first PT, or create more */ - xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M); + xe_tile_assert(tile, m->batch_base_ofs + xe_bo_size(batch) < SZ_2M); bo = xe_bo_create_pin_map(vm->xe, tile, vm, num_entries * XE_PAGE_SIZE, @@ -214,7 +214,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, return PTR_ERR(bo); /* PT30 & PT31 reserved for 2M identity map */ - pt29_ofs = bo->size - 3 * XE_PAGE_SIZE; + pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE; entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index); xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry); @@ -236,7 +236,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, if (!IS_DGFX(xe)) { /* Write out batch too */ m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE; - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -247,13 +247,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, level++; } if (xe->info.has_usm) { - xe_tile_assert(tile, batch->size == SZ_1M); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_1M); batch = tile->primary_gt->usm.bb_pool->bo; m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M; - xe_tile_assert(tile, batch->size == SZ_512K); + xe_tile_assert(tile, xe_bo_size(batch) == SZ_512K); - for (i = 0; i < batch->size; + for (i = 0; i < xe_bo_size(batch); i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE : XE_PAGE_SIZE) { entry = vm->pt_ops->pte_encode_bo(batch, i, @@ -306,7 +306,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, /* Identity map the entire vram at 256GiB offset */ if (IS_DGFX(xe)) { - u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE; + u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE; xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET, pat_index, pt30_ofs); @@ -321,7 +321,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION]; u64 vram_offset = IDENTITY_OFFSET + DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G); - u64 pt31_ofs = bo->size - XE_PAGE_SIZE; + u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE; xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE - IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G); @@ -768,7 +768,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, struct xe_gt *gt = m->tile->primary_gt; struct xe_device *xe = gt_to_xe(gt); struct dma_fence *fence = NULL; - u64 size = src_bo->size; + u64 size = xe_bo_size(src_bo); struct xe_res_cursor src_it, dst_it, ccs_it; u64 src_L0_ofs, dst_L0_ofs; u32 src_L0_pt, dst_L0_pt; @@ -791,7 +791,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (XE_WARN_ON(copy_ccs && src_bo != dst_bo)) return ERR_PTR(-EINVAL); - if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size)) + if (src_bo != dst_bo && XE_WARN_ON(xe_bo_size(src_bo) != xe_bo_size(dst_bo))) return ERR_PTR(-EINVAL); if (!src_is_vram) @@ -1064,7 +1064,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, struct xe_device *xe = gt_to_xe(gt); bool clear_only_system_ccs = false; struct dma_fence *fence = NULL; - u64 size = bo->size; + u64 size = xe_bo_size(bo); struct xe_res_cursor src_it; struct ttm_resource *src = dst; int err; @@ -1076,9 +1076,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m, clear_only_system_ccs = true; if (!clear_vram) - xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it); + xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &src_it); else - xe_res_first(src, 0, bo->size, &src_it); + xe_res_first(src, 0, xe_bo_size(bo), &src_it); while (size) { u64 clear_L0_ofs; @@ -1407,7 +1407,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, if (idx == chunk) goto next_cmd; - xe_tile_assert(tile, pt_bo->size == SZ_4K); + xe_tile_assert(tile, xe_bo_size(pt_bo) == SZ_4K); /* Map a PT at most once */ if (pt_bo->update_index < 0) @@ -1868,7 +1868,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, if (IS_ERR(dma_addr)) return PTR_ERR(dma_addr); - xe_res_first(bo->ttm.resource, offset, bo->size - offset, &cursor); + xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor); do { struct dma_fence *__fence; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4829ed46a8b4..a3379d39f76d 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -403,7 +403,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - int size_exponent = __ffs(stream->oa_buffer.bo->size); + int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo)); u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; @@ -435,7 +435,7 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ - memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); + memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); } static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) @@ -1065,7 +1065,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) static u32 oag_buf_size_select(const struct xe_oa_stream *stream) { return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, - stream->oa_buffer.bo->size > SZ_16M ? + xe_bo_size(stream->oa_buffer.bo) > SZ_16M ? OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); } @@ -1582,7 +1582,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1668,7 +1668,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { + if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index ccebd5f0878e..86323cf3be2c 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -33,7 +33,7 @@ DECLARE_EVENT_CLASS(xe_bo, TP_fast_assign( __assign_str(dev); - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __entry->flags = bo->flags; __entry->vm = bo->vm; ), @@ -73,7 +73,7 @@ TRACE_EVENT(xe_bo_move, TP_fast_assign( __entry->bo = bo; - __entry->size = bo->size; + __entry->size = xe_bo_size(bo); __assign_str(new_placement_name); __assign_str(old_placement_name); __assign_str(device_id); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f590b1553e98..e875ea4658a9 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3466,9 +3466,9 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo, { u16 coh_mode; - if (XE_IOCTL_DBG(xe, range > bo->size) || + if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) || XE_IOCTL_DBG(xe, obj_offset > - bo->size - range)) { + xe_bo_size(bo) - range)) { return -EINVAL; } @@ -3771,7 +3771,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo, xe_vma_ops_init(&vops, vm, q, NULL, 0); - ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, bo->size, + ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo), DRM_XE_VM_BIND_OP_MAP, 0, 0, vm->xe->pat.idx[cache_lvl]); if (IS_ERR(ops)) { From a559434880b320b83733d739733250815aecf1b0 Mon Sep 17 00:00:00 2001 From: Harry Austen Date: Fri, 27 Jun 2025 13:30:35 -0700 Subject: [PATCH 073/358] drm/xe: Allow dropping kunit dependency as built-in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix Kconfig symbol dependency on KUNIT, which isn't actually required for XE to be built-in. However, if KUNIT is enabled, it must be built-in too. Fixes: 08987a8b6820 ("drm/xe: Fix build with KUNIT=m") Cc: Lucas De Marchi Cc: Thomas Hellström Cc: Jani Nikula Cc: Maarten Lankhorst Signed-off-by: Harry Austen Reviewed-by: Lucas De Marchi Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20250627-xe-kunit-v2-2-756fe5cd56cf@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 553c29e1030b..827bc75b8b48 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_XE tristate "Intel Xe2 Graphics" - depends on DRM && PCI && (m || (y && KUNIT=y)) + depends on DRM && PCI + depends on KUNIT || !KUNIT depends on INTEL_VSEC || !INTEL_VSEC depends on X86_PLATFORM_DEVICES || !(X86 && ACPI) select INTERVAL_TREE From 5ac5e191973920488cc9050dd12574d33b97eba4 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 27 Jun 2025 09:41:19 +0200 Subject: [PATCH 074/358] drm/xe: Fix typo in Kconfig doubut -> doubt. Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250627074119.347826-1-dev@lankhorst.se Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index 827bc75b8b48..f66e6d39e319 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -86,7 +86,7 @@ config DRM_XE_GPUSVM Enable this option if you want support for CPU to GPU address mirroring. - If in doubut say "Y". + If in doubt say "Y". config DRM_XE_PAGEMAP bool "Enable device memory pool for SVM" @@ -97,7 +97,7 @@ config DRM_XE_PAGEMAP Disable this option only if you don't want to expose local device memory for SVM. Will reduce KMD memory footprint when disabled. - If in doubut say "Y". + If in doubt say "Y". config DRM_XE_FORCE_PROBE string "Force probe xe for selected Intel hardware IDs" From a34ba68d0938687e56f442102b5f9e149af7a415 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 30 Jun 2025 13:46:47 +0100 Subject: [PATCH 075/358] drm/xe: Consolidate LRC offset calculations Attempt to consolidate the LRC offsets calculations by aligning the recently added wa_bb_offset with the naming scheme in the file and also change the size stored in struct xe_lrc to not include the ring buffer. The former makes it somewhat visually easier to follow the layout of the various logical blocks stored in the LRC bo, while the latter reduces the number of sprinkled around calculations. Signed-off-by: Tvrtko Ursulin Cc: Matthew Brost Cc: Matt Roper Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250630124711.8209-2-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 41 ++++++++++++++----------------- drivers/gpu/drm/xe/xe_lrc_types.h | 2 +- 2 files changed, 19 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index c92e7eff79f7..d2ad8fe737eb 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -717,8 +717,12 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { - /* Indirect ring state page is at the very end of LRC */ - return lrc->size - LRC_INDIRECT_RING_STATE_SIZE; + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_RING_STATE_SIZE; +} + +static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; } #define DECL_MAP_ADDR_HELPERS(elem) \ @@ -973,11 +977,6 @@ struct wa_bb_setup { u32 *batch, size_t max_size); }; -static size_t wa_bb_offset(struct xe_lrc *lrc) -{ - return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE; -} - static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) { const size_t max_size = LRC_WA_BB_SIZE; @@ -993,7 +992,7 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) return -ENOMEM; cmd = buf; } else { - cmd = lrc->bo->vmap.vaddr + wa_bb_offset(lrc); + cmd = lrc->bo->vmap.vaddr + __xe_lrc_wa_bb_offset(lrc); } remain = max_size / sizeof(*cmd); @@ -1017,13 +1016,13 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) if (buf) { xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, - wa_bb_offset(lrc), buf, + __xe_lrc_wa_bb_offset(lrc), buf, (cmd - buf) * sizeof(*cmd)); kfree(buf); } xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + - wa_bb_offset(lrc) + 1); + __xe_lrc_wa_bb_offset(lrc) + 1); return 0; @@ -1040,19 +1039,22 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 init_flags) { struct xe_gt *gt = hwe->gt; + const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); + const u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; void *init_data = NULL; u32 arb_enable; - u32 lrc_size; u32 bo_flags; int err; kref_init(&lrc->refcount); lrc->gt = gt; + lrc->size = lrc_size; lrc->flags = 0; - lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class); + lrc->ring.size = ring_size; + lrc->ring.tail = 0; if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; @@ -1065,17 +1067,12 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address * via VM bind calls. */ - lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, - lrc_size + LRC_WA_BB_SIZE, + lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, ttm_bo_type_kernel, bo_flags); if (IS_ERR(lrc->bo)) return PTR_ERR(lrc->bo); - lrc->size = lrc_size; - lrc->ring.size = ring_size; - lrc->ring.tail = 0; - xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); @@ -1096,10 +1093,9 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, - xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE); + lrc_size - LRC_PPHWSP_SIZE); } else { - xe_map_memcpy_to(xe, &map, 0, init_data, - xe_gt_lrc_size(gt, hwe->class)); + xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); kfree(init_data); } @@ -1859,8 +1855,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc); - snapshot->lrc_size = xe_bo_size(lrc->bo) - snapshot->lrc_offset - - LRC_WA_BB_SIZE; + snapshot->lrc_size = lrc->size; snapshot->lrc_snapshot = NULL; snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc)); snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc); diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 883e550a9423..2c7c81079801 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -22,7 +22,7 @@ struct xe_lrc { */ struct xe_bo *bo; - /** @size: size of lrc including any indirect ring state page */ + /** @size: size of the lrc and optional indirect ring state */ u32 size; /** @gt: gt which this LRC belongs to */ From 67979060740f7f978c8cb580ccea6c91154150f9 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 27 Jun 2025 20:41:43 +0200 Subject: [PATCH 076/358] drm/xe/hw_engine_group: Fix potential leak If we fail to allocate a workqueue we will leak kzalloc'ed group object since it was designed to be kfree'ed in the drmm cleanup action, but we didn't have a chance to register this action yet. To avoid this leak allocate a group object using drmm_kzalloc() and start using predefined drmm action to release the workqueue. Signed-off-by: Michal Wajdeczko Cc: Francois Dugast Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250627184143.1480-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 2d68c5b5262a..87a6dcb1b4b5 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -12,15 +12,6 @@ #include "xe_hw_engine_group.h" #include "xe_vm.h" -static void -hw_engine_group_free(struct drm_device *drm, void *arg) -{ - struct xe_hw_engine_group *group = arg; - - destroy_workqueue(group->resume_wq); - kfree(group); -} - static void hw_engine_group_resume_lr_jobs_func(struct work_struct *w) { @@ -53,7 +44,7 @@ hw_engine_group_alloc(struct xe_device *xe) struct xe_hw_engine_group *group; int err; - group = kzalloc(sizeof(*group), GFP_KERNEL); + group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); @@ -61,14 +52,14 @@ hw_engine_group_alloc(struct xe_device *xe) if (!group->resume_wq) return ERR_PTR(-ENOMEM); + err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq); + if (err) + return ERR_PTR(err); + init_rwsem(&group->mode_sem); INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func); INIT_LIST_HEAD(&group->exec_queue_list); - err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group); - if (err) - return ERR_PTR(err); - return group; } From 0fc957c20df343f82d4c3b934bcb21cc51dd49b0 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Jul 2025 13:13:23 -0700 Subject: [PATCH 077/358] drm/xe: Export xe_step_name for kunit tests xe_step_name() is used by xe_assert(), so adding assertions to functions like xe_device_get_gt() will result in ERROR: modpost: "xe_step_name" [drivers/gpu/drm/xe/tests/xe_test.ko] undefined! while building the kunit tests. Export xe_step_name to avoid these build failures when adding assertions. Reviewed-by: Michal Wajdeczko Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250701201320.2514369-11-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_step.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c index c77b5c317fa0..10e88f2c9615 100644 --- a/drivers/gpu/drm/xe/xe_step.c +++ b/drivers/gpu/drm/xe/xe_step.c @@ -5,6 +5,7 @@ #include "xe_step.h" +#include #include #include "xe_device.h" @@ -255,3 +256,4 @@ const char *xe_step_name(enum xe_step step) return "**"; } } +EXPORT_SYMBOL_IF_KUNIT(xe_step_name); From f8e0f4c526a4332e568b2384671ca9d016e0c5a5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Jul 2025 13:13:24 -0700 Subject: [PATCH 078/358] drm/xe: Track maximum GTs per tile on a per-platform basis Today all of our platforms fall into one of three cases: * Single tile platforms with a single (primary) GT * Single tile platforms with two GTs (primary + media) * Two-tile platforms with a single GT (primary) in each Our numbering of GTs has been a bit inconsistent between platforms (e.g., GT1 is the media GT on some platforms, but the second tile's primary GT on others). In the future we'll likely have platforms that are both multi-tile and multi-GT, which will make the situation more confusing. We could also wind up with more than just two types of GTs at some point in the future. Going forward we should standardize the way we assign uapi GT IDs to internal GT structures. Let's declare that for userspace GT ID n, GT[n]'s tile = n / (max gt per tile) GT[n]'s slot within tile = n % (max gt per tile) We don't want the GT numbering to change for any of our current platforms since the current IDs are part of our ABI contract with userspace so this means we should track the 'max gt per tile' value on a per-platform basis rather than just using a single value across the driver. Encode this into device descriptors in xe_pci.c and use the per-platform number for various checks in the code. Constant XE_MAX_GT_PER_TILE will remain just as the maximum across all platforms for easy of sizing array allocations. Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250701201320.2514369-12-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_device.h | 41 +++++++++++++--------------- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_pci.c | 18 ++++++++++++ drivers/gpu/drm/xe/xe_query.c | 2 +- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index e4da797a984b..4e719d398c88 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -60,35 +60,32 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe) return &xe->tiles[0]; } +/* + * Highest GT/tile count for any platform. Used only for memory allocation + * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT + * structures should use the per-platform xe->info.max_gt_per_tile instead. + */ #define XE_MAX_GT_PER_TILE 2 -static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id) -{ - if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE)) - gt_id = 0; - - return gt_id ? tile->media_gt : tile->primary_gt; -} - static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id) { - struct xe_tile *root_tile = xe_device_get_root_tile(xe); + struct xe_tile *tile; struct xe_gt *gt; - /* - * FIXME: This only works for now because multi-tile and standalone - * media are mutually exclusive on the platforms we have today. - * - * id => GT mapping may change once we settle on how we want to handle - * our UAPI. - */ - if (MEDIA_VER(xe) >= 13) { - gt = xe_tile_get_gt(root_tile, gt_id); - } else { - if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE)) - gt_id = 0; + if (gt_id >= xe->info.tile_count * xe->info.max_gt_per_tile) + return NULL; - gt = xe->tiles[gt_id].primary_gt; + tile = &xe->tiles[gt_id / xe->info.max_gt_per_tile]; + switch (gt_id % xe->info.max_gt_per_tile) { + default: + xe_assert(xe, false); + fallthrough; + case 0: + gt = tile->primary_gt; + break; + case 1: + gt = tile->media_gt; + break; } if (!gt) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 7e4f6d846af6..78c4acafd268 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -294,6 +294,8 @@ struct xe_device { u8 vram_flags; /** @info.tile_count: Number of tiles */ u8 tile_count; + /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */ + u8 max_gt_per_tile; /** @info.gt_count: Total number of GTs for entire device */ u8 gt_count; /** @info.vm_max_level: Max VM level */ diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 08e21d4099e0..ace910e4a1d2 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -57,6 +57,7 @@ struct xe_device_desc { u8 dma_mask_size; u8 max_remote_tiles:2; + u8 max_gt_per_tile:2; u8 require_force_probe:1; u8 is_dgfx:1; @@ -208,6 +209,7 @@ static const struct xe_device_desc tgl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -218,6 +220,7 @@ static const struct xe_device_desc rkl_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -230,6 +233,7 @@ static const struct xe_device_desc adl_s_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids }, @@ -246,6 +250,7 @@ static const struct xe_device_desc adl_p_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, .subplatforms = (const struct xe_subplatform_desc[]) { { XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids }, @@ -260,6 +265,7 @@ static const struct xe_device_desc adl_n_desc = { .dma_mask_size = 39, .has_display = true, .has_llc = true, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -275,6 +281,7 @@ static const struct xe_device_desc dg1_desc = { .has_display = true, .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .require_force_probe = true, }; @@ -298,6 +305,7 @@ static const struct xe_device_desc ats_m_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, @@ -308,6 +316,7 @@ static const struct xe_device_desc dg2_desc = { .pre_gmdid_graphics_ip = &graphics_ip_xehpg, .pre_gmdid_media_ip = &media_ip_xehpm, .dma_mask_size = 46, + .max_gt_per_tile = 1, .require_force_probe = true, DG2_FEATURES, @@ -324,6 +333,7 @@ static const __maybe_unused struct xe_device_desc pvc_desc = { .has_display = false, .has_gsc_nvm = 1, .has_heci_gscfi = 1, + .max_gt_per_tile = 1, .max_remote_tiles = 1, .require_force_probe = true, .has_mbx_power_limits = false, @@ -336,6 +346,7 @@ static const struct xe_device_desc mtl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, }; static const struct xe_device_desc lnl_desc = { @@ -343,6 +354,7 @@ static const struct xe_device_desc lnl_desc = { .dma_mask_size = 46, .has_display = true, .has_pxp = true, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -355,6 +367,7 @@ static const struct xe_device_desc bmg_desc = { .has_mbx_power_limits = true, .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .max_gt_per_tile = 2, .needs_scratch = true, }; @@ -363,6 +376,7 @@ static const struct xe_device_desc ptl_desc = { .dma_mask_size = 46, .has_display = true, .has_sriov = true, + .max_gt_per_tile = 2, .require_force_probe = true, .needs_scratch = true, }; @@ -611,6 +625,10 @@ static int xe_info_init_early(struct xe_device *xe, xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) && xe_modparam.probe_display && desc->has_display; + + xe_assert(xe, desc->max_gt_per_tile > 0); + xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE); + xe->info.max_gt_per_tile = desc->max_gt_per_tile; xe->info.tile_count = 1 + desc->max_remote_tiles; err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0); diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e8e1743dcb1e..e615b0916217 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -141,7 +141,7 @@ query_engine_cycles(struct xe_device *xe, return -EINVAL; eci = &resp.eci; - if (eci->gt_id >= XE_MAX_GT_PER_TILE) + if (eci->gt_id >= xe->info.max_gt_per_tile) return -EINVAL; gt = xe_device_get_gt(xe, eci->gt_id); From fb72cd2104a9a57c390fe773e0c6ff58679a0a12 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Jul 2025 13:13:25 -0700 Subject: [PATCH 079/358] drm/xe/tests/pci: Ensure all platforms have a valid GT/tile count Add a simple kunit test to ensure each platform's GT per tile count is non-zero and does not exceed the global XE_MAX_GT_PER_TILE definition. We need to move 'struct xe_subplatform_desc' from the .c file to the types header to ensure it is accessible from the kunit test. v2: - Rebase on latest xe_pci test rework from Michal and convert to a parameterized test that runs on each PCI ID supported by the driver. Cc: Michal Wajdeczko Reviewed-by: Ravi Kumar Vodapalli Link: https://lore.kernel.org/r/20250701201320.2514369-13-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/tests/xe_pci.c | 31 +++++++++++++++++++ drivers/gpu/drm/xe/tests/xe_pci_test.c | 12 ++++++++ drivers/gpu/drm/xe/tests/xe_pci_test.h | 1 + drivers/gpu/drm/xe/xe_pci.c | 39 ------------------------ drivers/gpu/drm/xe/xe_pci_types.h | 41 ++++++++++++++++++++++++++ 5 files changed, 85 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c index baccb657bd05..9c715e59f030 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci.c +++ b/drivers/gpu/drm/xe/tests/xe_pci.c @@ -21,6 +21,18 @@ static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc) KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc); KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc); +static void xe_pci_id_kunit_desc(const struct pci_device_id *param, char *desc) +{ + const struct xe_device_desc *dev_desc = + (const struct xe_device_desc *)param->driver_data; + + if (dev_desc) + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "0x%X (%s)", + param->device, dev_desc->platform_name); +} + +KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc); + /** * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters * @prev: the pointer to the previous parameter to iterate from or NULL @@ -55,6 +67,25 @@ const void *xe_pci_media_ip_gen_param(const void *prev, char *desc) } EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param); +/** + * xe_pci_id_gen_param - Generate struct pci_device_id parameters + * @prev: the pointer to the previous parameter to iterate from or NULL + * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE + * + * This function prepares struct pci_device_id parameter. + * + * To be used only as a parameter generator function in &KUNIT_CASE_PARAM. + * + * Return: pointer to the next parameter or NULL if no more parameters + */ +const void *xe_pci_id_gen_param(const void *prev, char *desc) +{ + const struct pci_device_id *pci = pci_id_gen_params(prev, desc); + + return pci->driver_data ? pci : NULL; +} +EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param); + static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid) { diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c index 95fed41f7ff2..37b344df2dc3 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.c +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c @@ -44,9 +44,21 @@ static void check_media_ip(struct kunit *test) KUNIT_ASSERT_EQ(test, mask, 0); } +static void check_platform_gt_count(struct kunit *test) +{ + const struct pci_device_id *pci = test->param_value; + const struct xe_device_desc *desc = + (const struct xe_device_desc *)pci->driver_data; + int max_gt = desc->max_gt_per_tile; + + KUNIT_ASSERT_GT(test, max_gt, 0); + KUNIT_ASSERT_LE(test, max_gt, XE_MAX_GT_PER_TILE); +} + static struct kunit_case xe_pci_tests[] = { KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param), KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param), + KUNIT_CASE_PARAM(check_platform_gt_count, xe_pci_id_gen_param), {} }; diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h index 3a1df7a5e291..ce4d2b86b778 100644 --- a/drivers/gpu/drm/xe/tests/xe_pci_test.h +++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h @@ -27,6 +27,7 @@ int xe_pci_fake_device_init(struct xe_device *xe); const void *xe_pci_graphics_ip_gen_param(const void *prev, char *desc); const void *xe_pci_media_ip_gen_param(const void *prev, char *desc); +const void *xe_pci_id_gen_param(const void *prev, char *desc); const void *xe_pci_live_device_gen_param(const void *prev, char *desc); #endif diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index ace910e4a1d2..99dede6e7b21 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -38,45 +38,6 @@ enum toggle_d3cold { D3COLD_ENABLE, }; -struct xe_subplatform_desc { - enum xe_subplatform subplatform; - const char *name; - const u16 *pciidlist; -}; - -struct xe_device_desc { - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_graphics_ip; - /* Should only ever be set for platforms without GMD_ID */ - const struct xe_ip *pre_gmdid_media_ip; - - const char *platform_name; - const struct xe_subplatform_desc *subplatforms; - - enum xe_platform platform; - - u8 dma_mask_size; - u8 max_remote_tiles:2; - u8 max_gt_per_tile:2; - - u8 require_force_probe:1; - u8 is_dgfx:1; - - u8 has_display:1; - u8 has_fan_control:1; - u8 has_gsc_nvm:1; - u8 has_heci_gscfi:1; - u8 has_heci_cscfi:1; - u8 has_llc:1; - u8 has_mbx_power_limits:1; - u8 has_pxp:1; - u8 has_sriov:1; - u8 needs_scratch:1; - u8 skip_guc_pc:1; - u8 skip_mtcfg:1; - u8 skip_pcode:1; -}; - __diag_push(); __diag_ignore_all("-Woverride-init", "Allow field overrides in table"); diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h index ca6b10d35573..4de6f69ed975 100644 --- a/drivers/gpu/drm/xe/xe_pci_types.h +++ b/drivers/gpu/drm/xe/xe_pci_types.h @@ -8,6 +8,47 @@ #include +#include "xe_platform_types.h" + +struct xe_subplatform_desc { + enum xe_subplatform subplatform; + const char *name; + const u16 *pciidlist; +}; + +struct xe_device_desc { + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_graphics_ip; + /* Should only ever be set for platforms without GMD_ID */ + const struct xe_ip *pre_gmdid_media_ip; + + const char *platform_name; + const struct xe_subplatform_desc *subplatforms; + + enum xe_platform platform; + + u8 dma_mask_size; + u8 max_remote_tiles:2; + u8 max_gt_per_tile:2; + + u8 require_force_probe:1; + u8 is_dgfx:1; + + u8 has_display:1; + u8 has_fan_control:1; + u8 has_gsc_nvm:1; + u8 has_heci_gscfi:1; + u8 has_heci_cscfi:1; + u8 has_llc:1; + u8 has_mbx_power_limits:1; + u8 has_pxp:1; + u8 has_sriov:1; + u8 needs_scratch:1; + u8 skip_guc_pc:1; + u8 skip_mtcfg:1; + u8 skip_pcode:1; +}; + struct xe_graphics_desc { u8 va_bits; u8 vm_max_level; From bd6a4b978584cb633be0d5cdfbf79803fd31da07 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Jul 2025 13:13:26 -0700 Subject: [PATCH 080/358] drm/xe: Assign GT IDs properly on multi-tile + multi-GT platforms Although "multi-tile" and "multiple GTs per tile" are mutually-exclusive characteristics on all of our platforms today, this may not always be true. Assign GT IDs according to xe->info.max_gt_per_tile in a way that should work even if future platforms have different configurations. This patch should not change the behavior of current platforms; it only future-proofs for potential future designs. v2: - Re-calculate gt_count if tile count gets reduced by MTCFG. (PVC CI) Reviewed-by: Ravi Kumar Vodapalli Link: https://lore.kernel.org/r/20250701201320.2514369-14-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_mmio.c | 16 ++++++++++------ drivers/gpu/drm/xe/xe_pci.c | 14 ++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 7357458bc0d2..751586d6806a 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -55,6 +55,7 @@ static void tiles_fini(void *arg) static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) { struct xe_tile *tile; + struct xe_gt *gt; u8 id; /* @@ -67,7 +68,7 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) /* Possibly override number of tile based on configuration register */ if (!xe->info.skip_mtcfg) { struct xe_mmio *mmio = xe_root_tile_mmio(xe); - u8 tile_count; + u8 tile_count, gt_count; u32 mtcfg; /* @@ -84,12 +85,15 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size) xe->info.tile_count = tile_count; /* - * FIXME: Needs some work for standalone media, but - * should be impossible with multi-tile for now: - * multi-tile platform with standalone media doesn't - * exist + * We've already setup gt_count according to the full + * tile count. Re-calculate it to only include the GTs + * that belong to the remaining tile(s). */ - xe->info.gt_count = xe->info.tile_count; + gt_count = 0; + for_each_gt(gt, xe, id) + if (gt->info.id < tile_count * xe->info.max_gt_per_tile) + gt_count++; + xe->info.gt_count = gt_count; } } diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 99dede6e7b21..42aaef9fa2ea 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -689,10 +689,11 @@ static int xe_info_init(struct xe_device *xe, */ for_each_tile(tile, xe, id) { gt = tile->primary_gt; - gt->info.id = xe->info.gt_count++; gt->info.type = XE_GT_TYPE_MAIN; + gt->info.id = tile->id * xe->info.max_gt_per_tile; gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state; gt->info.engine_mask = graphics_desc->hw_engine_mask; + xe->info.gt_count++; if (MEDIA_VER(xe) < 13 && media_desc) gt->info.engine_mask |= media_desc->hw_engine_mask; @@ -710,17 +711,10 @@ static int xe_info_init(struct xe_device *xe, gt = tile->media_gt; gt->info.type = XE_GT_TYPE_MEDIA; + gt->info.id = tile->id * xe->info.max_gt_per_tile + 1; gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state; gt->info.engine_mask = media_desc->hw_engine_mask; - - /* - * FIXME: At the moment multi-tile and standalone media are - * mutually exclusive on current platforms. We'll need to - * come up with a better way to number GTs if we ever wind - * up with platforms that support both together. - */ - drm_WARN_ON(&xe->drm, id != 0); - gt->info.id = xe->info.gt_count++; + xe->info.gt_count++; } return 0; From 457123d5a0351792280c7441107db49560bdd3c3 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Jul 2025 13:13:27 -0700 Subject: [PATCH 081/358] drm/xe: Don't compare GT ID to GT count when determining valid GTs On current platforms with multiple GTs, all of the GT IDs are consecutive; as a result we know that the GT IDs range from 0 to gt_count-1 and can determine if a GT ID is valid by comparing against the count. The consecutive nature of GT IDs may not hold true on future platforms if/when we have platforms that are both multi-tile and have multiple GTs within each tile. Once such platforms exist, it's quite possible that we could wind up with something like a GT list composed of IDs 0, 2, and 3 with no GT 1 (which would be a 2-tile platform with media only on the second tile). To future-proof the code we should stop comparing against the GT count to determine whether a GT ID is valid or not. Instead we should do an actual lookup of the ID to determine whether the GT exists. This also means that our GT loop macro should not end at the GT count, but should rather examine the entire space up to (# of tiles) * (max GT per tile) to ensure it doesn't stop prematurely. Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250701201320.2514369-15-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_device.h | 6 +----- drivers/gpu/drm/xe/xe_eu_stall.c | 6 ++++-- drivers/gpu/drm/xe/xe_exec_queue.c | 2 +- drivers/gpu/drm/xe/xe_hw_engine.c | 3 ++- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index 4e719d398c88..f0eb8150f185 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -127,12 +127,8 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \ for_each_if((tile__) = &(xe__)->tiles[(id__)]) -/* - * FIXME: This only works for now since multi-tile and standalone media - * happen to be mutually exclusive. Future platforms may change this... - */ #define for_each_gt(gt__, xe__, id__) \ - for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \ + for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \ for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 96732613b4b7..af7916315ac6 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -258,11 +258,13 @@ static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value, static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value, struct eu_stall_open_properties *props) { - if (value >= xe->info.gt_count) { + struct xe_gt *gt = xe_device_get_gt(xe, value); + + if (!gt) { drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value); return -EINVAL; } - props->gt = xe_device_get_gt(xe, value); + props->gt = gt; return 0; } diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index fee22358cc09..8991b4aed440 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -610,7 +610,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) return -EFAULT; - if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count)) + if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id))) return -EINVAL; if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 3439c8522d01..796ba8c34a16 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -1059,12 +1059,13 @@ struct xe_hw_engine * xe_hw_engine_lookup(struct xe_device *xe, struct drm_xe_engine_class_instance eci) { + struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id); unsigned int idx; if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) return NULL; - if (eci.gt_id >= xe->info.gt_count) + if (!gt) return NULL; idx = array_index_nospec(eci.engine_class, From d4eb4a010262ea7801e576d1033b355910f2f7d4 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Tue, 1 Jul 2025 13:13:28 -0700 Subject: [PATCH 082/358] drm/xe/xe_query: Use separate iterator while filling GT list The 'id' value updated by for_each_gt() is the uapi GT ID of the GTs being iterated over, and may skip over values if a GT is not present on the device. Use a separate iterator for GT list array assignments to ensure that the array will be filled properly on future platforms where index in the GT query list may not match the uapi ID. v2: - Include the missing increment of the iterator. (Jonathan) Cc: Jonathan Cavitt Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250701201320.2514369-16-matthew.d.roper@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_query.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index e615b0916217..d517ec9ddcbf 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -368,6 +368,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query struct drm_xe_query_gt_list __user *query_ptr = u64_to_user_ptr(query->data); struct drm_xe_query_gt_list *gt_list; + int iter = 0; u8 id; if (query->size == 0) { @@ -385,12 +386,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MEDIA; else - gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; - gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; - gt_list->gt_list[id].gt_id = gt->info.id; - gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[iter].tile_id = gt_to_tile(gt)->id; + gt_list->gt_list[iter].gt_id = gt->info.id; + gt_list->gt_list[iter].reference_clock = gt->info.reference_clock; /* * The mem_regions indexes in the mask below need to * directly identify the struct @@ -406,19 +407,21 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query * assumption. */ if (!IS_DGFX(xe)) - gt_list->gt_list[id].near_mem_regions = 0x1; + gt_list->gt_list[iter].near_mem_regions = 0x1; else - gt_list->gt_list[id].near_mem_regions = + gt_list->gt_list[iter].near_mem_regions = BIT(gt_to_tile(gt)->id) << 1; - gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ - gt_list->gt_list[id].near_mem_regions; + gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[iter].near_mem_regions; - gt_list->gt_list[id].ip_ver_major = + gt_list->gt_list[iter].ip_ver_major = REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_minor = + gt_list->gt_list[iter].ip_ver_minor = REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid); - gt_list->gt_list[id].ip_ver_rev = + gt_list->gt_list[iter].ip_ver_rev = REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid); + + iter++; } if (copy_to_user(query_ptr, gt_list, size)) { From b9329f51677e5ca3288ab652f488e99d5db11693 Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Mon, 30 Jun 2025 15:07:41 +0530 Subject: [PATCH 083/358] drm/xe/xe_pmu: Validate gt in event supported Validate gt instead of checking gt_id is lesser than max gts per tile Signed-off-by: Riana Tauro Reviewed-by: Lucas De Marchi Reviewed-by: Matt Roper Link: https://lore.kernel.org/r/20250630093741.2435281-1-riana.tauro@intel.com Signed-off-by: Matt Roper --- drivers/gpu/drm/xe/xe_pmu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c index 69df0e3520a5..cab51d826345 100644 --- a/drivers/gpu/drm/xe/xe_pmu.c +++ b/drivers/gpu/drm/xe/xe_pmu.c @@ -157,10 +157,13 @@ static bool event_gt_forcewake(struct perf_event *event) return true; } -static bool event_supported(struct xe_pmu *pmu, unsigned int gt, +static bool event_supported(struct xe_pmu *pmu, unsigned int gt_id, unsigned int id) { - if (gt >= XE_MAX_GT_PER_TILE) + struct xe_device *xe = container_of(pmu, typeof(*xe), pmu); + struct xe_gt *gt = xe_device_get_gt(xe, gt_id); + + if (!gt) return false; return id < sizeof(pmu->supported_events) * BITS_PER_BYTE && From 3fae6918a3e27cce20ded2551f863fb05d4bef8d Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 2 Jul 2025 00:00:52 +0200 Subject: [PATCH 084/358] drm/xe/pf: Clear all LMTT pages on alloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our LMEM buffer objects are not cleared by default on alloc and during VF provisioning we only setup LMTT PTEs for the actually provisioned LMEM range. But beyond that valid range we might leave some stale data that could either point to some other VFs allocations or even to the PF pages. Explicitly clear all new LMTT page to avoid the risk that a malicious VF would try to exploit that gap. While around add asserts to catch any undesired PTE overwrites and low-level debug traces to track LMTT PT life-cycle. Fixes: b1d204058218 ("drm/xe/pf: Introduce Local Memory Translation Table") Signed-off-by: Michal Wajdeczko Cc: Michał Winiarski Cc: Lukasz Laguna Reviewed-by: Michał Winiarski Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250701220052.1612-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_lmtt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index b56437a816e4..381f576036d0 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -78,6 +78,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level } lmtt_assert(lmtt, xe_bo_is_vram(bo)); + lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE)); + + xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, xe_bo_size(bo)); pt->level = level; pt->bo = bo; @@ -91,6 +94,9 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level static void lmtt_pt_free(struct xe_lmtt_pt *pt) { + lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n", + pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE)); + xe_bo_unpin_map_no_vm(pt->bo); kfree(pt); } @@ -226,9 +232,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, switch (lmtt->ops->lmtt_pte_size(level)) { case sizeof(u32): + lmtt_assert(lmtt, !overflows_type(pte, u32)); + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte); break; case sizeof(u64): + lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64)); + xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte); break; default: From 491b9783126303755717c0cbde0b08ee59b6abab Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 2 Jul 2025 14:35:11 -0700 Subject: [PATCH 085/358] drm/xe: Allocate PF queue size on pow2 boundary CIRC_SPACE does not work unless the size argument is a power of 2, allocate PF queue size on power of 2 boundary. Cc: stable@vger.kernel.org Fixes: 3338e4f90c14 ("drm/xe: Use topology to determine page fault queue size") Fixes: 29582e0ea75c ("drm/xe: Add page queue multiplier") Signed-off-by: Matthew Brost Reviewed-by: Francois Dugast Link: https://lore.kernel.org/r/20250702213511.3226167-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt_pagefault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 3522865c67c9..5a75d56d8558 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -419,6 +419,7 @@ static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue) #define PF_MULTIPLIER 8 pf_queue->num_dw = (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER; + pf_queue->num_dw = roundup_pow_of_two(pf_queue->num_dw); #undef PF_MULTIPLIER pf_queue->gt = gt; From 7eba6a80fef4187d70ed4c4ef5a2cd8cde09ef95 Mon Sep 17 00:00:00 2001 From: Tomasz Lis Date: Mon, 30 Jun 2025 17:21:55 +0200 Subject: [PATCH 086/358] drm/xe/vf: Make multi-GT migration less error prone MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a remote chance that after migration, some GTs will not send the MIGRATED interrupt, or due to current VF KMD state the interrupt will not lead to marking the GT for recovery. Requiring IRQs from all GTs before starting migration introduces the possibility that the process will get stalled due to one GuC. One could argue it is also waste of time to wait for all IRQs, but we should get them all IRQs as soon as VGPU starts, so that's not really an impactful argument. Still, not waiting for all GTs makes it easier to handle situations: * where one GuC IRQ is missing * where state before probe is unclean - getting MIGRATED IRQ as soon as interrupts are enabled * where multiple migrations happen close to each other To help with these cases, this patch alters the post-migration recovery so that recovery task is started as soon as one GuC IRQ is handled, and other GTs are included in recovery later as the subsequent IRQs are serviced. The post-migration recovery can now be called for any selection of GTs, and it will perform recovery on all GTs for which IRQs have arrived, even multiple times if necessary. v2: Typos and style fixes v3: Transferring gt_flags by value rather than reference to last function where it is used Signed-off-by: Tomasz Lis Cc: Michal Wajdeczko Cc: Michal Winiarski Cc: Satyanarayana K V P Acked-by: Satyanarayana K V P Reviewed-by: Michal Winiarski Link: https://lore.kernel.org/r/20250630152155.195648-1-tomasz.lis@intel.com Signed-off-by: Michał Winiarski --- drivers/gpu/drm/xe/xe_sriov_vf.c | 195 +++++++++++++++---------------- 1 file changed, 91 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c index 6526fe450e55..26e243c28994 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -147,127 +147,113 @@ void xe_sriov_vf_init_early(struct xe_device *xe) xe_sriov_info(xe, "migration not supported by this module version\n"); } -/** - * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. +static bool gt_vf_post_migration_needed(struct xe_gt *gt) +{ + return test_bit(gt->info.id, >_to_xe(gt)->sriov.vf.migration.gt_flags); +} + +/* + * Notify GuCs marked in flags about resource fixups apply finished. * @xe: the &xe_device struct instance + * @gt_flags: flags marking to which GTs the notification shall be sent + */ +static int vf_post_migration_notify_resfix_done(struct xe_device *xe, unsigned long gt_flags) +{ + struct xe_gt *gt; + unsigned int id; + int err = 0; + + for_each_gt(gt, xe, id) { + if (!test_bit(id, >_flags)) + continue; + /* skip asking GuC for RESFIX exit if new recovery request arrived */ + if (gt_vf_post_migration_needed(gt)) + continue; + err = xe_gt_sriov_vf_notify_resfix_done(gt); + if (err) + break; + clear_bit(id, >_flags); + } + + if (gt_flags && !err) + drm_dbg(&xe->drm, "another recovery imminent, skipped some notifications\n"); + return err; +} + +static int vf_get_next_migrated_gt_id(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt(gt, xe, id) { + if (test_and_clear_bit(id, &xe->sriov.vf.migration.gt_flags)) + return id; + } + return -1; +} + +/** + * Perform post-migration fixups on a single GT. * - * After migration, we need to re-query all VF configuration to make sure - * they match previous provisioning. Note that most of VF provisioning - * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. + * After migration, GuC needs to be re-queried for VF configuration to check + * if it matches previous provisioning. Most of VF provisioning shall be the + * same, except GGTT range, since GGTT is not virtualized per-VF. If GGTT + * range has changed, we have to perform fixups - shift all GGTT references + * used anywhere within the driver. After the fixups in this function succeed, + * it is allowed to ask the GuC bound to this GT to continue normal operation. * * Returns: 0 if the operation completed successfully, or a negative error * code otherwise. */ -static int vf_post_migration_requery_guc(struct xe_device *xe) +static int gt_vf_post_migration_fixups(struct xe_gt *gt) { - struct xe_gt *gt; - unsigned int id; - int err, ret = 0; + s64 shift; + int err; - for_each_gt(gt, xe, id) { - err = xe_gt_sriov_vf_query_config(gt); - ret = ret ?: err; - } - - return ret; -} - -static void vf_post_migration_fixup_ctb(struct xe_device *xe) -{ - struct xe_gt *gt; - unsigned int id; - - xe_assert(xe, IS_SRIOV_VF(xe)); - - for_each_gt(gt, xe, id) { - s32 shift = xe_gt_sriov_vf_ggtt_shift(gt); + err = xe_gt_sriov_vf_query_config(gt); + if (err) + return err; + shift = xe_gt_sriov_vf_ggtt_shift(gt); + if (shift) { + xe_tile_sriov_vf_fixup_ggtt_nodes(gt_to_tile(gt), shift); + /* FIXME: add the recovery steps */ xe_guc_ct_fixup_messages_with_ggtt(>->uc.guc.ct, shift); } -} - -/* - * vf_post_migration_imminent - Check if post-restore recovery is coming. - * @xe: the &xe_device struct instance - * - * Return: True if migration recovery worker will soon be running. Any worker currently - * executing does not affect the result. - */ -static bool vf_post_migration_imminent(struct xe_device *xe) -{ - return xe->sriov.vf.migration.gt_flags != 0 || - work_pending(&xe->sriov.vf.migration.worker); -} - -static bool vf_post_migration_fixup_ggtt_nodes(struct xe_device *xe) -{ - bool need_fixups = false; - struct xe_tile *tile; - unsigned int id; - - for_each_tile(tile, xe, id) { - struct xe_gt *gt = tile->primary_gt; - s64 shift; - - shift = xe_gt_sriov_vf_ggtt_shift(gt); - if (shift) { - need_fixups = true; - xe_tile_sriov_vf_fixup_ggtt_nodes(tile, shift); - } - } - return need_fixups; -} - -/* - * Notify all GuCs about resource fixups apply finished. - */ -static void vf_post_migration_notify_resfix_done(struct xe_device *xe) -{ - struct xe_gt *gt; - unsigned int id; - - for_each_gt(gt, xe, id) { - if (vf_post_migration_imminent(xe)) - goto skip; - xe_gt_sriov_vf_notify_resfix_done(gt); - } - return; - -skip: - drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); + return 0; } static void vf_post_migration_recovery(struct xe_device *xe) { - bool need_fixups; - int err; + unsigned long fixed_gts = 0; + int id, err; drm_dbg(&xe->drm, "migration recovery in progress\n"); xe_pm_runtime_get(xe); - err = vf_post_migration_requery_guc(xe); - if (vf_post_migration_imminent(xe)) - goto defer; - if (unlikely(err)) - goto fail; + if (!vf_migration_supported(xe)) { xe_sriov_err(xe, "migration not supported by this module version\n"); err = -ENOTRECOVERABLE; goto fail; } - need_fixups = vf_post_migration_fixup_ggtt_nodes(xe); - /* FIXME: add the recovery steps */ - if (need_fixups) - vf_post_migration_fixup_ctb(xe); + while (id = vf_get_next_migrated_gt_id(xe), id >= 0) { + struct xe_gt *gt = xe_device_get_gt(xe, id); + + err = gt_vf_post_migration_fixups(gt); + if (err) + goto fail; + + set_bit(id, &fixed_gts); + } + + err = vf_post_migration_notify_resfix_done(xe, fixed_gts); + if (err) + goto fail; - vf_post_migration_notify_resfix_done(xe); xe_pm_runtime_put(xe); drm_notice(&xe->drm, "migration recovery ended\n"); return; -defer: - xe_pm_runtime_put(xe); - drm_dbg(&xe->drm, "migration recovery deferred\n"); - return; fail: xe_pm_runtime_put(xe); drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); @@ -282,18 +268,23 @@ static void migration_worker_func(struct work_struct *w) vf_post_migration_recovery(xe); } -static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +/* + * Check if post-restore recovery is coming on any of GTs. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_ready_to_recovery_on_any_gts(struct xe_device *xe) { struct xe_gt *gt; unsigned int id; for_each_gt(gt, xe, id) { - if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { - xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); - return false; - } + if (test_bit(id, &xe->sriov.vf.migration.gt_flags)) + return true; } - return true; + return false; } /** @@ -308,13 +299,9 @@ void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) xe_assert(xe, IS_SRIOV_VF(xe)); - if (!vf_ready_to_recovery_on_all_gts(xe)) + if (!vf_ready_to_recovery_on_any_gts(xe)) return; - WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); - /* Ensure other threads see that no flags are set now. */ - smp_mb(); - started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); drm_info(&xe->drm, "VF migration recovery %s\n", started ? "scheduled" : "already in progress"); From 03d85ab36bcbcbe9dc962fccd3f8e54d7bb93b35 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 1 Jul 2025 20:58:46 -0700 Subject: [PATCH 087/358] Revert "drm/xe/xe2: Enable Indirect Ring State support for Xe2" This reverts commit fe0154cf8222d9e38c60ccc124adb2f9b5272371. Seeing some unexplained random failures during LRC context switches with indirect ring state enabled. The failures were always there, but the repro rate increased with the addition of WA BB as a separate BO. Commit 3a1edef8f4b5 ("drm/xe: Make WA BB part of LRC BO") helped to reduce the issues in the context switches, but didn't eliminate them completely. Indirect ring state is not required for any current features, so disable for now until failures can be root caused. Cc: stable@vger.kernel.org Fixes: fe0154cf8222 ("drm/xe/xe2: Enable Indirect Ring State support for Xe2") Signed-off-by: Matthew Brost Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250702035846.3178344-1-matthew.brost@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 42aaef9fa2ea..755e335c8e18 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -103,7 +103,6 @@ static const struct xe_graphics_desc graphics_xelpg = { .has_asid = 1, \ .has_atomic_enable_pte_bit = 1, \ .has_flat_ccs = 1, \ - .has_indirect_ring_state = 1, \ .has_range_tlb_invalidation = 1, \ .has_usm = 1, \ .has_64bit_timestamp = 1, \ From f7a2fd776e57bd6468644bdecd91ab3aba57ba58 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Tue, 1 Jul 2025 11:39:50 +0100 Subject: [PATCH 088/358] drm/xe/bmg: fix compressed VRAM handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There looks to be an issue in our compression handling when the BO pages are very fragmented, where we choose to skip the identity map and instead fall back to emitting the PTEs by hand when migrating memory, such that we can hopefully do more work per blit operation. However in such a case we need to ensure the src PTEs are correctly tagged with a compression enabled PAT index on dgpu xe2+, otherwise the copy will simply treat the src memory as uncompressed, leading to corruption if the memory was compressed by the user. To fix this pass along use_comp_pat into emit_pte() on the src side, to indicate that compression should be considered. v2 (Jonathan): tweak the commit message Fixes: 523f191cc0c7 ("drm/xe/xe_migrate: Handle migration logic for xe2+ dgfx") Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Thomas Hellström Cc: Akshata Jahagirdar Cc: # v6.12+ Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250701103949.83116-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 0838582537e8..4e2bdf70eb70 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -863,7 +863,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m, if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it)) xe_res_next(&src_it, src_L0); else - emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs, + emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat, &src_it, src_L0, src); if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it)) From ccfb15b8158c11a8304204aeac354c7b1cfb18a3 Mon Sep 17 00:00:00 2001 From: "Vodapalli, Ravi Kumar" Date: Fri, 4 Jul 2025 16:05:27 +0530 Subject: [PATCH 089/358] drm/xe/bmg: Add one additional PCI ID One additional PCI ID is added in Bspec for BMG, Add it so that driver recognizes this device with this new ID. Bspec: 68090 Cc: stable@vger.kernel.org # v6.12+ Signed-off-by: Vodapalli, Ravi Kumar Reviewed-by: Shekhar Chauhan Acked-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://lore.kernel.org/r/20250704103527.100178-1-ravi.kumar.vodapalli@intel.com --- include/drm/intel/pciids.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/intel/pciids.h b/include/drm/intel/pciids.h index a0180d10e260..76f8d26f9cc9 100644 --- a/include/drm/intel/pciids.h +++ b/include/drm/intel/pciids.h @@ -846,6 +846,7 @@ /* BMG */ #define INTEL_BMG_IDS(MACRO__, ...) \ MACRO__(0xE202, ## __VA_ARGS__), \ + MACRO__(0xE209, ## __VA_ARGS__), \ MACRO__(0xE20B, ## __VA_ARGS__), \ MACRO__(0xE20C, ## __VA_ARGS__), \ MACRO__(0xE20D, ## __VA_ARGS__), \ From 0b64addcae7f04745bc5f62d41e27268052f812e Mon Sep 17 00:00:00 2001 From: Julia Filipchuk Date: Thu, 26 Jun 2025 11:28:10 -0700 Subject: [PATCH 090/358] drm/xe/guc: Recommend GuC v70.46.2 for BMG, LNL, DG2 UAPI compatibility version 1.22.2 Resolves various bugs. Recommend newer version. Signed-off-by: Julia Filipchuk Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250626182805.1701096-13-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_uc_fw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 6d0869518652..4d3ace56f38a 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -115,10 +115,10 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ - fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \ - fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \ + fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ + fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ - fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \ + fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \ fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \ fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \ fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \ From 5cdb71d3b0db88d26ca34984fe61755faf681626 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 26 Jun 2025 11:28:11 -0700 Subject: [PATCH 091/358] drm/xe/ptl: Add GuC FW definition for PTL The first official GuC relase for PTL is 70.47.0, which maps to API version 1.22.4. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250626182805.1701096-14-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 4d3ace56f38a..2584ee3b0316 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -115,6 +115,7 @@ struct fw_blobs_by_type { #define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED #define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 47, 0)) \ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 45, 2)) \ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \ fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \ From 4c93e2c34154d36c9eab54d81d8ec22fa2e2afd6 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Thu, 26 Jun 2025 11:28:12 -0700 Subject: [PATCH 092/358] drm/xe/ptl: Add HuC FW definition for PTL Add the unversioned define for the PTL HuC FW. Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250626182805.1701096-15-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/xe/xe_uc_fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c index 2584ee3b0316..9bbdde604923 100644 --- a/drivers/gpu/drm/xe/xe_uc_fw.c +++ b/drivers/gpu/drm/xe/xe_uc_fw.c @@ -128,6 +128,7 @@ struct fw_blobs_by_type { fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) #define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \ + fw_def(PANTHERLAKE, GT_TYPE_ANY, no_ver(xe, huc, ptl)) \ fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \ fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \ fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \ From 127ed492ad2df0aa2351a1ad32a793ae7d91161b Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 30 Jun 2025 19:11:33 +0530 Subject: [PATCH 093/358] drm/amdgpu: Pass adev pointer to functions Pass amdgpu device context instead of drm device context to some amdgpu_device_* functions. DRM device context is not required in those functions. No functional change. Signed-off-by: Lijo Lazar Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 24 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 8 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 65 ++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 15 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 4 +- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 +- 6 files changed, 57 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7edb7ba91786..1f531b5f594d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1562,16 +1562,16 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, int amdgpu_device_mode1_reset(struct amdgpu_device *adev); int amdgpu_device_link_reset(struct amdgpu_device *adev); -bool amdgpu_device_supports_atpx(struct drm_device *dev); -bool amdgpu_device_supports_px(struct drm_device *dev); -bool amdgpu_device_supports_boco(struct drm_device *dev); -bool amdgpu_device_supports_smart_shift(struct drm_device *dev); -int amdgpu_device_supports_baco(struct drm_device *dev); +bool amdgpu_device_supports_atpx(struct amdgpu_device *adev); +bool amdgpu_device_supports_px(struct amdgpu_device *adev); +bool amdgpu_device_supports_boco(struct amdgpu_device *adev); +bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev); +int amdgpu_device_supports_baco(struct amdgpu_device *adev); void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); -int amdgpu_device_baco_enter(struct drm_device *dev); -int amdgpu_device_baco_exit(struct drm_device *dev); +int amdgpu_device_baco_enter(struct amdgpu_device *adev); +int amdgpu_device_baco_exit(struct amdgpu_device *adev); void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring); @@ -1674,7 +1674,8 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, u8 perf_req, bool advertise); int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state); -int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state); +int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, + enum amdgpu_ss ss_state); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, u64 *tmr_size); @@ -1705,8 +1706,11 @@ static inline void amdgpu_acpi_release(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, u8 dev_state, bool drv_state) { return 0; } -static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, - enum amdgpu_ss ss_state) { return 0; } +static inline int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, + enum amdgpu_ss ss_state) +{ + return 0; +} static inline void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index f5466c592d94..b047fdf81543 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -811,18 +811,18 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, /** * amdgpu_acpi_smart_shift_update - update dGPU device state to SBIOS * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * @ss_state: current smart shift event * * returns 0 on success, * otherwise return error number. */ -int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) +int amdgpu_acpi_smart_shift_update(struct amdgpu_device *adev, + enum amdgpu_ss ss_state) { - struct amdgpu_device *adev = drm_to_adev(dev); int r; - if (!amdgpu_device_supports_smart_shift(dev)) + if (!amdgpu_device_supports_smart_shift(adev)) return 0; switch (ss_state) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d282c0753b14..45b44dec0d7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -411,19 +411,16 @@ static const struct attribute_group amdgpu_board_attrs_group = { static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); - /** * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Returns true if the device is a dGPU with ATPX power control, * otherwise return false. */ -bool amdgpu_device_supports_px(struct drm_device *dev) +bool amdgpu_device_supports_px(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); - if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid()) return true; return false; @@ -432,15 +429,13 @@ bool amdgpu_device_supports_px(struct drm_device *dev) /** * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Returns true if the device is a dGPU with ACPI power control, * otherwise return false. */ -bool amdgpu_device_supports_boco(struct drm_device *dev) +bool amdgpu_device_supports_boco(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); - if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) return false; @@ -453,29 +448,24 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) /** * amdgpu_device_supports_baco - Does the device support BACO * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Return: * 1 if the device supports BACO; * 3 if the device supports MACO (only works if BACO is supported) * otherwise return 0. */ -int amdgpu_device_supports_baco(struct drm_device *dev) +int amdgpu_device_supports_baco(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); - return amdgpu_asic_supports_baco(adev); } void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) { - struct drm_device *dev; int bamaco_support; - dev = adev_to_drm(adev); - adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; - bamaco_support = amdgpu_device_supports_baco(dev); + bamaco_support = amdgpu_device_supports_baco(adev); switch (amdgpu_runtime_pm) { case 2: @@ -495,10 +485,12 @@ void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) break; case -1: case -2: - if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */ + if (amdgpu_device_supports_px(adev)) { + /* enable PX as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_PX; dev_info(adev->dev, "Using ATPX for runtime pm\n"); - } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */ + } else if (amdgpu_device_supports_boco(adev)) { + /* enable boco as runtime mode */ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; dev_info(adev->dev, "Using BOCO for runtime pm\n"); } else { @@ -547,14 +539,14 @@ void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) * amdgpu_device_supports_smart_shift - Is the device dGPU with * smart shift support * - * @dev: drm_device pointer + * @adev: amdgpu device pointer * * Returns true if the device is a dGPU with Smart Shift support, * otherwise returns false. */ -bool amdgpu_device_supports_smart_shift(struct drm_device *dev) +bool amdgpu_device_supports_smart_shift(struct amdgpu_device *adev) { - return (amdgpu_device_supports_boco(dev) && + return (amdgpu_device_supports_boco(adev) && amdgpu_acpi_is_power_shift_control_supported()); } @@ -2200,7 +2192,8 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, struct drm_device *dev = pci_get_drvdata(pdev); int r; - if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF) + if (amdgpu_device_supports_px(drm_to_adev(dev)) && + state == VGA_SWITCHEROO_OFF) return; if (state == VGA_SWITCHEROO_ON) { @@ -4192,13 +4185,13 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { task_barrier_enter(&hive->tb); - adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev)); + adev->asic_reset_res = amdgpu_device_baco_enter(adev); if (adev->asic_reset_res) goto fail; task_barrier_exit(&hive->tb); - adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev)); + adev->asic_reset_res = amdgpu_device_baco_exit(adev); if (adev->asic_reset_res) goto fail; @@ -4353,7 +4346,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) int amdgpu_device_init(struct amdgpu_device *adev, uint32_t flags) { - struct drm_device *ddev = adev_to_drm(adev); struct pci_dev *pdev = adev->pdev; int r, i; bool px = false; @@ -4814,7 +4806,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); - px = amdgpu_device_supports_px(ddev); + px = amdgpu_device_supports_px(adev); if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) @@ -4980,7 +4972,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) kfree(adev->xcp_mgr); adev->xcp_mgr = NULL; - px = amdgpu_device_supports_px(adev_to_drm(adev)); + px = amdgpu_device_supports_px(adev); if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) @@ -5152,7 +5144,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) return r; } - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3)) + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D3)) dev_warn(adev->dev, "smart shift update failed\n"); if (notify_clients) @@ -5321,7 +5313,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) } adev->in_suspend = false; - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DEV_D0)) dev_warn(adev->dev, "smart shift update failed\n"); return 0; @@ -6365,7 +6357,8 @@ static int amdgpu_device_sched_resume(struct list_head *device_list, amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); } else { dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); - if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0)) + if (amdgpu_acpi_smart_shift_update(tmp_adev, + AMDGPU_SS_DEV_D0)) dev_warn(tmp_adev->dev, "smart shift update failed\n"); } @@ -6839,12 +6832,11 @@ bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, #endif } -int amdgpu_device_baco_enter(struct drm_device *dev) +int amdgpu_device_baco_enter(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - if (!amdgpu_device_supports_baco(dev)) + if (!amdgpu_device_supports_baco(adev)) return -ENOTSUPP; if (ras && adev->ras_enabled && @@ -6854,13 +6846,12 @@ int amdgpu_device_baco_enter(struct drm_device *dev) return amdgpu_dpm_baco_enter(adev); } -int amdgpu_device_baco_exit(struct drm_device *dev) +int amdgpu_device_baco_exit(struct amdgpu_device *adev) { - struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); int ret = 0; - if (!amdgpu_device_supports_baco(dev)) + if (!amdgpu_device_supports_baco(adev)) return -ENOTSUPP; ret = amdgpu_dpm_baco_exit(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b299e15bb5e5..4f8632737574 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2457,10 +2457,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) { /* only need to skip on ATPX */ - if (amdgpu_device_supports_px(ddev)) + if (amdgpu_device_supports_px(adev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); /* we want direct complete for BOCO */ - if (amdgpu_device_supports_boco(ddev)) + if (amdgpu_device_supports_boco(adev)) dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE | DPM_FLAG_SMART_SUSPEND | DPM_FLAG_MAY_SKIP_RESUME); @@ -2493,9 +2493,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, * into D0 state. Then there will be a PMFW-aware D-state * transition(D0->D3) on runpm suspend. */ - if (amdgpu_device_supports_baco(ddev) && + if (amdgpu_device_supports_baco(adev) && !(adev->flags & AMD_IS_APU) && - (adev->asic_type >= CHIP_NAVI10)) + adev->asic_type >= CHIP_NAVI10) amdgpu_get_secondary_funcs(adev); } @@ -2560,8 +2560,7 @@ static int amdgpu_pmops_prepare(struct device *dev) /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ - if (amdgpu_device_supports_boco(drm_dev) && - pm_runtime_suspended(dev)) + if (amdgpu_device_supports_boco(adev) && pm_runtime_suspended(dev)) return 1; /* if we will not support s3 or s2i for the device @@ -2834,7 +2833,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) /* nothing to do */ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { - amdgpu_device_baco_enter(drm_dev); + amdgpu_device_baco_enter(adev); } dev_dbg(&pdev->dev, "asic/device is runtime suspended\n"); @@ -2875,7 +2874,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) pci_set_master(pdev); } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { - amdgpu_device_baco_exit(drm_dev); + amdgpu_device_baco_exit(adev); } ret = amdgpu_device_resume(drm_dev, false); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 195ed81d39ff..bfabb64e5535 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -91,7 +91,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) if (adev->rmmio == NULL) return; - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD)) + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_UNLOAD)) DRM_WARN("smart shift update failed\n"); amdgpu_acpi_fini(adev); @@ -161,7 +161,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) if (acpi_status) dev_dbg(dev->dev, "Error during ACPI methods call\n"); - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD)) + if (amdgpu_acpi_smart_shift_update(adev, AMDGPU_SS_DRV_LOAD)) DRM_WARN("smart shift update failed\n"); out: diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index edd9895b46c0..4b151bbaffaa 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1890,7 +1890,7 @@ static ssize_t amdgpu_set_smartshift_bias(struct device *dev, static int ss_power_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, uint32_t mask, enum amdgpu_device_attr_states *states) { - if (!amdgpu_device_supports_smart_shift(adev_to_drm(adev))) + if (!amdgpu_device_supports_smart_shift(adev)) *states = ATTR_STATE_UNSUPPORTED; return 0; @@ -1901,7 +1901,7 @@ static int ss_bias_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ { uint32_t ss_power; - if (!amdgpu_device_supports_smart_shift(adev_to_drm(adev))) + if (!amdgpu_device_supports_smart_shift(adev)) *states = ATTR_STATE_UNSUPPORTED; else if (amdgpu_hwmon_get_sensor_generic(adev, AMDGPU_PP_SENSOR_SS_APU_SHARE, (void *)&ss_power)) From 14b2d71a9a24727f1b9f2131ed5eb2e345840a3a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 26 Jun 2025 17:51:02 -0400 Subject: [PATCH 094/358] drm/amdgpu/gfx10: fix KGQ reset sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to reinit the ring before remapping it and all of the KIQ handling needs to be within the kiq lock. Fixes: 1741281a157f ("drm/amdgpu/gfx10: add ring reset callbacks") Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8c377ecbb8a7..5e099b5dc9a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9544,7 +9544,7 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, spin_lock_irqsave(&kiq->ring_lock, flags); - if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7 + kiq->pmf->map_queues_size)) { + if (amdgpu_ring_alloc(kiq_ring, 5 + 7 + 7)) { spin_unlock_irqrestore(&kiq->ring_lock, flags); return -ENOMEM; } @@ -9564,12 +9564,9 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, 0, 1, 0x20); gfx_v10_0_ring_emit_reg_wait(kiq_ring, SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffffffff); - kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; @@ -9579,6 +9576,19 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, return r; } + spin_lock_irqsave(&kiq->ring_lock, flags); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size)) { + spin_unlock_irqrestore(&kiq->ring_lock, flags); + return -ENOMEM; + } + kiq->pmf->kiq_map_queues(kiq_ring, ring); + amdgpu_ring_commit(kiq_ring); + r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + if (r) + return r; + r = amdgpu_ring_test_ring(ring); if (r) return r; From a54e4639c4ef37a0241bac7d2a77f2e6ffb57099 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Mon, 30 Jun 2025 23:26:17 +0300 Subject: [PATCH 095/358] drm/amd/pm/powerplay/hwmgr/smu_helper: fix order of mask and value There is a small typo in phm_wait_on_indirect_register(). Swap mask and value arguments provided to phm_wait_on_register() so that they satisfy the function signature and actual usage scheme. Found by Linux Verification Center (linuxtesting.org) with Svace static analysis tool. In practice this doesn't fix any issues because the only place this function is used uses the same value for the value and mask. Fixes: 3bace3591493 ("drm/amd/powerplay: add hardware manager sub-component") Signed-off-by: Fedor Pchelkin Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c index 79a566f3564a..c305ea4ec17d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c @@ -149,7 +149,7 @@ int phm_wait_on_indirect_register(struct pp_hwmgr *hwmgr, } cgs_write_register(hwmgr->device, indirect_port, index); - return phm_wait_on_register(hwmgr, indirect_port + 1, mask, value); + return phm_wait_on_register(hwmgr, indirect_port + 1, value, mask); } int phm_wait_for_register_unequal(struct pp_hwmgr *hwmgr, From 9888f73679b72394bcee4f2af304456f03ceed8b Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 1 Jul 2025 14:48:00 +0530 Subject: [PATCH 096/358] drm/amdgpu: Add a noverbose flag to psp_wait_for For extended wait with retries on a PSP register value, add a noverbose flag to avoid excessive error messages on each timeout. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 13 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 7 ++- drivers/gpu/drm/amd/amdgpu/psp_v10_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 35 ++++++++------- drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c | 10 ++--- drivers/gpu/drm/amd/amdgpu/psp_v12_0.c | 20 ++++----- drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 41 ++++++++++-------- drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c | 22 +++++----- drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 55 +++++++++++++----------- drivers/gpu/drm/amd/amdgpu/psp_v3_1.c | 41 +++++++++--------- 10 files changed, 130 insertions(+), 118 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a4bede28df17..75911600d504 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -575,9 +575,11 @@ static int psp_sw_fini(struct amdgpu_ip_block *ip_block) return 0; } -int psp_wait_for(struct psp_context *psp, uint32_t reg_index, - uint32_t reg_val, uint32_t mask, bool check_changed) +int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t reg_val, + uint32_t mask, uint32_t flags) { + bool check_changed = flags & PSP_WAITREG_CHANGED; + bool verbose = !(flags & PSP_WAITREG_NOVERBOSE); uint32_t val; int i; struct amdgpu_device *adev = psp->adev; @@ -597,9 +599,10 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, udelay(1); } - dev_err(adev->dev, - "psp reg (0x%x) wait timed out, mask: %x, read: %x exp: %x", - reg_index, mask, val, reg_val); + if (verbose) + dev_err(adev->dev, + "psp reg (0x%x) wait timed out, mask: %x, read: %x exp: %x", + reg_index, mask, val, reg_val); return -ETIME; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 4bc0ec49d2e9..237b624aa51c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -134,6 +134,9 @@ enum psp_reg_prog_id { PSP_REG_LAST }; +#define PSP_WAITREG_CHANGED BIT(0) /* check if the value has changed */ +#define PSP_WAITREG_NOVERBOSE BIT(1) /* No error verbose */ + struct psp_funcs { int (*init_microcode)(struct psp_context *psp); int (*wait_for_bootloader)(struct psp_context *psp); @@ -532,8 +535,8 @@ extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block; extern const struct amdgpu_ip_block_version psp_v14_0_ip_block; -extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, - uint32_t field_val, uint32_t mask, bool check_changed); +int psp_wait_for(struct psp_context *psp, uint32_t reg_index, + uint32_t field_val, uint32_t mask, uint32_t flags); extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, uint32_t msec_timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 2c4ebd98927f..3584b8c18fd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -94,7 +94,7 @@ static int psp_v10_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -115,7 +115,7 @@ static int psp_v10_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 1a4a26e6ffd2..6cc05d36e359 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -152,11 +152,9 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < 10; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -252,8 +250,8 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -279,11 +277,11 @@ static int psp_v11_0_ring_stop(struct psp_context *psp, if (amdgpu_sriov_vf(adev)) ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); else ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -321,13 +319,13 @@ static int psp_v11_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_101 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, false); + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for sOS ready for ring creation\n"); return ret; @@ -353,7 +351,7 @@ static int psp_v11_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -387,7 +385,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG, - MBOX_TOS_READY_MASK, false); + MBOX_TOS_READY_MASK, 0); if (ret) { DRM_INFO("psp is not working correctly before mode1 reset!\n"); @@ -402,7 +400,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, - false); + 0); if (ret) { DRM_INFO("psp mode 1 reset failed!\n"); @@ -428,8 +426,9 @@ static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; } @@ -608,7 +607,7 @@ static int psp_v11_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -645,7 +644,7 @@ static int psp_v11_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (!ret) *fw_ver = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c index 338d015c0f2e..93787a90d598 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c @@ -43,7 +43,7 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, @@ -53,7 +53,7 @@ static int psp_v11_0_8_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -91,13 +91,13 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_101 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, false); + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -123,7 +123,7 @@ static int psp_v11_0_8_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index d54b3e0fabaf..4c6450d62299 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -82,7 +82,7 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -97,7 +97,7 @@ static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) psp_gfxdrv_command_reg); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); return ret; } @@ -118,7 +118,7 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -133,8 +133,8 @@ static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) psp_gfxdrv_command_reg); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -163,7 +163,7 @@ static int psp_v12_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -186,11 +186,11 @@ static int psp_v12_0_ring_stop(struct psp_context *psp, if (amdgpu_sriov_vf(adev)) ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); else ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); return ret; } @@ -222,7 +222,7 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); ret = psp_wait_for(psp, offset, MBOX_TOS_READY_FLAG, - MBOX_TOS_READY_MASK, false); + MBOX_TOS_READY_MASK, 0); if (ret) { DRM_INFO("psp is not working correctly before mode1 reset!\n"); @@ -237,7 +237,7 @@ static int psp_v12_0_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, - false); + 0); if (ret) { DRM_INFO("psp mode 1 reset failed!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 58b6b64dcd68..af4a7d7c4abd 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -182,7 +182,7 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) ready having bit 31 of C2PMSG_33 set to 1 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33), - 0x80000000, 0xffffffff, false); + 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; @@ -213,7 +213,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0xffffffff, false); + 0x80000000, 0xffffffff, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -362,8 +362,8 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); if (!ret) psp_v13_0_init_sos_version(psp); @@ -386,7 +386,7 @@ static int psp_v13_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, @@ -396,7 +396,7 @@ static int psp_v13_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -434,13 +434,13 @@ static int psp_v13_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_101 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, false); + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -466,7 +466,7 @@ static int psp_v13_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -529,8 +529,9 @@ static int psp_v13_0_memory_training_send_msg(struct psp_context *psp, int msg) max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; } @@ -682,7 +683,7 @@ static int psp_v13_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -719,7 +720,7 @@ static int psp_v13_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (!ret) *fw_ver = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_36); @@ -744,8 +745,9 @@ static int psp_v13_0_exec_spi_cmd(struct psp_context *psp, int cmd) ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); return ret; @@ -769,7 +771,7 @@ static int psp_v13_0_update_spirom(struct psp_context *psp, /* Confirm PSP is ready to start */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); return ret; @@ -804,7 +806,7 @@ static int psp_v13_0_dump_spirom(struct psp_context *psp, /* Confirm PSP is ready to start */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); return ret; @@ -931,8 +933,9 @@ static int psp_v13_0_reg_program_no_ring(struct psp_context *psp, uint32_t val, WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_102, id); WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_103, val); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, 0); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c index f65af52c1c19..5f39a2edcc95 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c @@ -76,11 +76,9 @@ static int psp_v13_0_4_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < 10; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -185,8 +183,8 @@ static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -206,7 +204,7 @@ static int psp_v13_0_4_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_64, @@ -216,7 +214,7 @@ static int psp_v13_0_4_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -254,13 +252,13 @@ static int psp_v13_0_4_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_101 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, false); + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -286,7 +284,7 @@ static int psp_v13_0_4_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index ffa47c7d24c9..36ef4a72ad1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -109,11 +109,9 @@ static int psp_v14_0_wait_for_bootloader(struct psp_context *psp) for (retry_loop = 0; retry_loop < 10; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ - ret = psp_wait_for(psp, - SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, - 0x80000000, - false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) return 0; @@ -228,9 +226,10 @@ static int psp_v14_0_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), - 0, true); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -250,7 +249,7 @@ static int psp_v14_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Write the ring destroy command*/ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, @@ -260,7 +259,7 @@ static int psp_v14_0_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -298,13 +297,13 @@ static int psp_v14_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_101 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } else { /* Wait for sOS ready for ring creation */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), - MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, false); + MBOX_TOS_READY_FLAG, MBOX_TOS_READY_MASK, 0); if (ret) { DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); return ret; @@ -330,7 +329,7 @@ static int psp_v14_0_ring_create(struct psp_context *psp, /* Wait for response flag (bit 31) in C2PMSG_64 */ ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), - MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, false); + MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, 0); } return ret; @@ -393,8 +392,9 @@ static int psp_v14_0_memory_training_send_msg(struct psp_context *psp, int msg) max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; for (i = 0; i < max_wait; i++) { - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); if (ret == 0) break; } @@ -545,8 +545,9 @@ static int psp_v14_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc */ WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -582,8 +583,9 @@ static int psp_v14_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, 0); if (!ret) *fw_ver = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36); @@ -607,11 +609,13 @@ static int psp_v14_0_exec_spi_cmd(struct psp_context *psp, int cmd) ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); return ret; @@ -634,8 +638,9 @@ static int psp_v14_0_update_spirom(struct psp_context *psp, int ret; /* Confirm PSP is ready to start */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), - MBOX_READY_FLAG, MBOX_READY_MASK, false); + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, 0); if (ret) { dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index f6b75e3e47ff..833830bc3e2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -91,7 +91,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -109,7 +109,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); return ret; } @@ -130,7 +130,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), - 0x80000000, 0x80000000, false); + 0x80000000, 0x80000000, 0); if (ret) return ret; @@ -147,8 +147,8 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* there might be handshake issue with hardware which needs delay */ mdelay(20); ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), - 0, true); + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, + PSP_WAITREG_CHANGED); return ret; } @@ -168,7 +168,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp) mdelay(20); psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + 0x80000000, 0x8000FFFF, 0); /* Change IH ring for UMC */ tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); @@ -180,7 +180,7 @@ static void psp_v3_1_reroute_ih(struct psp_context *psp) mdelay(20); psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + 0x80000000, 0x8000FFFF, 0); } static int psp_v3_1_ring_create(struct psp_context *psp, @@ -217,9 +217,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_101 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, - mmMP0_SMN_C2PMSG_101), 0x80000000, - 0x8000FFFF, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, 0); } else { /* Write low address of the ring to C2PMSG_69 */ @@ -240,10 +240,9 @@ static int psp_v3_1_ring_create(struct psp_context *psp, mdelay(20); /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, - mmMP0_SMN_C2PMSG_64), 0x80000000, - 0x8000FFFF, false); - + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, 0); } return ret; } @@ -267,11 +266,13 @@ static int psp_v3_1_ring_stop(struct psp_context *psp, /* Wait for response flag (bit 31) */ if (amdgpu_sriov_vf(adev)) - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, 0); else - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, 0); return ret; } @@ -311,7 +312,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); - ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, 0); if (ret) { DRM_INFO("psp is not working correctly before mode1 reset!\n"); @@ -325,7 +326,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, 0); if (ret) { DRM_INFO("psp mode 1 reset failed!\n"); From 91134e800894fc6992cb0cdadea4cc94fe21b6e2 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 30 Jun 2025 10:07:14 +0530 Subject: [PATCH 097/358] drm/amdkfd: Avoid queue reset if disabled If ring reset is disabled, skip resetting queues. Instead, fall back to device based reset. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 76359c6a3f3a..500f51552038 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2339,9 +2339,18 @@ static int reset_hung_queues_sdma(struct device_queue_manager *dqm) static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma) { + struct amdgpu_device *adev = dqm->dev->adev; + while (halt_if_hws_hang) schedule(); + if (adev->debug_disable_gpu_ring_reset) { + dev_info_once(adev->dev, + "%s queue hung, but ring reset disabled", + is_sdma ? "sdma" : "compute"); + + return -EPERM; + } if (!amdgpu_gpu_recovery) return -ENOTRECOVERABLE; From f8410a17d377d062ef381316669653fb0ba0edc5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 26 Jun 2025 08:49:07 -0400 Subject: [PATCH 098/358] drm/amdgpu/sdma: consolidate engine reset handling Move the force completion handling into the common engine reset function. No need to duplicate it for every IP version. Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 17 +---------------- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 6 ++---- 4 files changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 7e26a44dcc1f..56939bb1d1a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -590,9 +590,12 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) * to be submitted to the queues after the reset is complete. */ if (!ret) { + amdgpu_fence_driver_force_completion(gfx_ring); drm_sched_wqueue_start(&gfx_ring->sched); - if (adev->sdma.has_page_queue) + if (adev->sdma.has_page_queue) { + amdgpu_fence_driver_force_completion(page_ring); drm_sched_wqueue_start(&page_ring->sched); + } } mutex_unlock(&sdma_instance->engine_reset_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index c05f3c1f50db..a7e1dbe03b29 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1714,7 +1714,7 @@ static int sdma_v4_4_2_stop_queue(struct amdgpu_ring *ring) static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 inst_mask, tmp_mask; + u32 inst_mask; int i, r; inst_mask = 1 << ring->me; @@ -1733,21 +1733,6 @@ static int sdma_v4_4_2_restore_queue(struct amdgpu_ring *ring) } r = sdma_v4_4_2_inst_start(adev, inst_mask, true); - if (r) - return r; - - tmp_mask = inst_mask; - for_each_inst(i, tmp_mask) { - ring = &adev->sdma.instance[i].ring; - - amdgpu_fence_driver_force_completion(ring); - - if (adev->sdma.has_page_queue) { - struct amdgpu_ring *page = &adev->sdma.instance[i].page; - - amdgpu_fence_driver_force_completion(page); - } - } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 4d72b085b3dd..ed1706da7dee 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1618,10 +1618,8 @@ static int sdma_v5_0_restore_queue(struct amdgpu_ring *ring) r = sdma_v5_0_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - return 0; + + return r; } static int sdma_v5_0_ring_preempt_ib(struct amdgpu_ring *ring) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 42a25150f83a..b87a4b44fa93 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1534,10 +1534,8 @@ static int sdma_v5_2_restore_queue(struct amdgpu_ring *ring) r = sdma_v5_2_gfx_resume_instance(adev, inst_id, true); amdgpu_gfx_rlc_exit_safe_mode(adev, 0); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - return 0; + + return r; } static int sdma_v5_2_ring_preempt_ib(struct amdgpu_ring *ring) From 0c3c2e334c4fd00ed7a8ddb9c163a8c1138af1f1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 26 Jun 2025 08:58:21 -0400 Subject: [PATCH 099/358] drm/amdgpu/sdma: allow caller to handle kernel rings in engine reset Add a parameter to amdgpu_sdma_reset_engine() to let the caller handle the kernel rings. This allows the kernel rings to back up their unprocessed state if the reset comes in via the drm scheduler rather than KFD. Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 44 +++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 3 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- 6 files changed, 32 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 56939bb1d1a9..8b8a04138711 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -545,10 +545,13 @@ static int amdgpu_sdma_soft_reset(struct amdgpu_device *adev, u32 instance_id) * amdgpu_sdma_reset_engine - Reset a specific SDMA engine * @adev: Pointer to the AMDGPU device * @instance_id: Logical ID of the SDMA engine instance to reset + * @caller_handles_kernel_queues: Skip kernel queue processing. Caller + * will handle it. * * Returns: 0 on success, or a negative error code on failure. */ -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, + bool caller_handles_kernel_queues) { int ret = 0; struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; @@ -556,14 +559,17 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) struct amdgpu_ring *page_ring = &sdma_instance->page; mutex_lock(&sdma_instance->engine_reset_mutex); - /* Stop the scheduler's work queue for the GFX and page rings if they are running. - * This ensures that no new tasks are submitted to the queues while - * the reset is in progress. - */ - drm_sched_wqueue_stop(&gfx_ring->sched); - if (adev->sdma.has_page_queue) - drm_sched_wqueue_stop(&page_ring->sched); + if (!caller_handles_kernel_queues) { + /* Stop the scheduler's work queue for the GFX and page rings if they are running. + * This ensures that no new tasks are submitted to the queues while + * the reset is in progress. + */ + drm_sched_wqueue_stop(&gfx_ring->sched); + + if (adev->sdma.has_page_queue) + drm_sched_wqueue_stop(&page_ring->sched); + } if (sdma_instance->funcs->stop_kernel_queue) { sdma_instance->funcs->stop_kernel_queue(gfx_ring); @@ -585,16 +591,18 @@ int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) } exit: - /* Restart the scheduler's work queue for the GFX and page rings - * if they were stopped by this function. This allows new tasks - * to be submitted to the queues after the reset is complete. - */ - if (!ret) { - amdgpu_fence_driver_force_completion(gfx_ring); - drm_sched_wqueue_start(&gfx_ring->sched); - if (adev->sdma.has_page_queue) { - amdgpu_fence_driver_force_completion(page_ring); - drm_sched_wqueue_start(&page_ring->sched); + if (!caller_handles_kernel_queues) { + /* Restart the scheduler's work queue for the GFX and page rings + * if they were stopped by this function. This allows new tasks + * to be submitted to the queues after the reset is complete. + */ + if (!ret) { + amdgpu_fence_driver_force_completion(gfx_ring); + drm_sched_wqueue_start(&gfx_ring->sched); + if (adev->sdma.has_page_queue) { + amdgpu_fence_driver_force_completion(page_ring); + drm_sched_wqueue_start(&page_ring->sched); + } } } mutex_unlock(&sdma_instance->engine_reset_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index e5f8951bbb6f..34311f32be4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -172,7 +172,8 @@ struct amdgpu_buffer_funcs { uint32_t byte_count); }; -int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id); +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id, + bool caller_handles_kernel_queues); #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index a7e1dbe03b29..20fad2525969 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1668,7 +1668,7 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, return -EOPNOTSUPP; amdgpu_amdkfd_suspend(adev, true); - r = amdgpu_sdma_reset_engine(adev, id); + r = amdgpu_sdma_reset_engine(adev, id, false); amdgpu_amdkfd_resume(adev, true); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index ed1706da7dee..5a1098bdd825 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1548,7 +1548,7 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, int r; amdgpu_amdkfd_suspend(adev, true); - r = amdgpu_sdma_reset_engine(adev, inst_id); + r = amdgpu_sdma_reset_engine(adev, inst_id, false); amdgpu_amdkfd_resume(adev, true); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index b87a4b44fa93..6843c2c3d71f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1461,7 +1461,7 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, int r; amdgpu_amdkfd_suspend(adev, true); - r = amdgpu_sdma_reset_engine(adev, inst_id); + r = amdgpu_sdma_reset_engine(adev, inst_id, false); amdgpu_amdkfd_resume(adev, true); return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 500f51552038..2d91027e2a74 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -2312,7 +2312,7 @@ static int reset_hung_queues_sdma(struct device_queue_manager *dqm) continue; /* Reset engine and check. */ - if (amdgpu_sdma_reset_engine(dqm->dev->adev, i) || + if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) || dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) || !set_sdma_queue_as_reset(dqm, doorbell_off)) { r = -ENOTRECOVERABLE; From 6d396e7ac1ce36079147933aa77ea977b03a8a9d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 1 Jul 2025 16:09:19 -0400 Subject: [PATCH 100/358] drm/amd/display: Disable common modes for LVDS [Why] Common modes are added to LVDS for compatibility in clone mode, but not all panels support them. Non-native modes were disabled in the past but this caused problems because compositors didn't use scaling for non native modes. Now non-native modes on LVDS will enable the scaler by default. [How] Check the connector type. If the connector is LVDS avoid adding common modes. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3dd4f9e9931d..7a4fca9a61bb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8374,7 +8374,8 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) drm_add_modes_noedid(connector, 1920, 1080); } else { amdgpu_dm_connector_ddc_get_modes(connector, drm_edid); - if (encoder && connector->connector_type != DRM_MODE_CONNECTOR_eDP) + if (encoder && (connector->connector_type != DRM_MODE_CONNECTOR_eDP) && + (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)) amdgpu_dm_connector_add_common_modes(encoder, connector); amdgpu_dm_connector_add_freesync_modes(connector, drm_edid); } From df2719088b6a8084960ed6e2ddc7066ffc5b558d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 1 Jul 2025 16:13:17 -0400 Subject: [PATCH 101/358] drm/amd/display: Use scaling for non-native resolutions on LVDS [Why] Common resolutions are added to supported modes to enable compatibility scenarios that compositors may use to do things like clone displays. There is no guarantee however that the panel will natively support these modes. [How] If the compositor hasn't enabled scaling but a non-native resolution has been picked for an LVDS panel turn the scaler on anyway. This will ensure compatibility. Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7a4fca9a61bb..096b23ad4845 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -7901,7 +7901,8 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, int clock, bpp = 0; bool is_y420 = false; - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) { + if ((connector->connector_type == DRM_MODE_CONNECTOR_eDP) || + (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)) { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct drm_display_mode *native_mode = &amdgpu_encoder->native_mode; enum drm_mode_status result; From a73345b866ff8bbd93135af667c973a8fb4b2c40 Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Tue, 24 Jun 2025 12:05:10 -0400 Subject: [PATCH 102/358] Revert "drm/amdgpu: fix slab-use-after-free in amdgpu_userq_mgr_fini" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 5fb90421fa0fbe0a968274912101fe917bf1c47b. The original patch moved `amdgpu_userq_mgr_fini()` to the driver's `postclose` callback, which is called after `drm_gem_release()` in the DRM file cleanup sequence.If a user application crashes or aborts without cleaning up its user queues, 'drm_gem_release()` may free GEM objects that are still referenced by active user queues, leading to use-after-free. By reverting, we ensure that user queues are disabled and cleaned up before any GEM objects are released, preventing this class of bug. However, this reintroduces a race during PCI hot-unplug, where device removal can race with per-file cleanup, leading to use-after-free in suspend/unplug paths. This will be fixed in the next patch. Fixes: 5fb90421fa0f ("drm/amdgpu: fix slab-use-after-free in amdgpu_userq_mgr_fini+0x70c") Signed-off-by: Vitaly Prosyak Acked-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 16 +++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 3 --- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 4f8632737574..1c54b2e5a225 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2911,6 +2911,20 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) return ret; } +static int amdgpu_drm_release(struct inode *inode, struct file *filp) +{ + struct drm_file *file_priv = filp->private_data; + struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + + if (fpriv) { + fpriv->evf_mgr.fd_closing = true; + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); + amdgpu_userq_mgr_fini(&fpriv->userq_mgr); + } + + return drm_release(inode, filp); +} + long amdgpu_drm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -2962,7 +2976,7 @@ static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, .flush = amdgpu_flush, - .release = drm_release, + .release = amdgpu_drm_release, .unlocked_ioctl = amdgpu_drm_ioctl, .mmap = drm_gem_mmap, .poll = drm_poll, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bfabb64e5535..b3c514b5e216 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1501,9 +1501,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_vm_bo_del(adev, fpriv->prt_va); amdgpu_bo_unreserve(pd); } - fpriv->evf_mgr.fd_closing = true; - amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); - amdgpu_userq_mgr_fini(&fpriv->userq_mgr); amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); From a886d26f2c8f9e3f3c1869ae368d09c75daac553 Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Wed, 2 Jul 2025 08:35:30 -0400 Subject: [PATCH 103/358] drm/amdgpu: fix use-after-free in amdgpu_userq_suspend+0x51a/0x5a0 [ +0.000020] BUG: KASAN: slab-use-after-free in amdgpu_userq_suspend+0x51a/0x5a0 [amdgpu] [ +0.000817] Read of size 8 at addr ffff88812eec8c58 by task amd_pci_unplug/1733 [ +0.000027] CPU: 10 UID: 0 PID: 1733 Comm: amd_pci_unplug Tainted: G W 6.14.0+ #2 [ +0.000009] Tainted: [W]=WARN [ +0.000003] Hardware name: ASUS System Product Name/ROG STRIX B550-F GAMING (WI-FI), BIOS 1401 12/03/2020 [ +0.000004] Call Trace: [ +0.000004] [ +0.000003] dump_stack_lvl+0x76/0xa0 [ +0.000011] print_report+0xce/0x600 [ +0.000009] ? srso_return_thunk+0x5/0x5f [ +0.000006] ? kasan_complete_mode_report_info+0x76/0x200 [ +0.000007] ? kasan_addr_to_slab+0xd/0xb0 [ +0.000006] ? amdgpu_userq_suspend+0x51a/0x5a0 [amdgpu] [ +0.000707] kasan_report+0xbe/0x110 [ +0.000006] ? amdgpu_userq_suspend+0x51a/0x5a0 [amdgpu] [ +0.000541] __asan_report_load8_noabort+0x14/0x30 [ +0.000005] amdgpu_userq_suspend+0x51a/0x5a0 [amdgpu] [ +0.000535] ? stop_cpsch+0x396/0x600 [amdgpu] [ +0.000556] ? stop_cpsch+0x429/0x600 [amdgpu] [ +0.000536] ? __pfx_amdgpu_userq_suspend+0x10/0x10 [amdgpu] [ +0.000536] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? kgd2kfd_suspend+0x132/0x1d0 [amdgpu] [ +0.000542] amdgpu_device_fini_hw+0x581/0xe90 [amdgpu] [ +0.000485] ? down_write+0xbb/0x140 [ +0.000007] ? __mutex_unlock_slowpath.constprop.0+0x317/0x360 [ +0.000005] ? __pfx_amdgpu_device_fini_hw+0x10/0x10 [amdgpu] [ +0.000482] ? __kasan_check_write+0x14/0x30 [ +0.000004] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? up_write+0x55/0xb0 [ +0.000007] ? srso_return_thunk+0x5/0x5f [ +0.000005] ? blocking_notifier_chain_unregister+0x6c/0xc0 [ +0.000008] amdgpu_driver_unload_kms+0x69/0x90 [amdgpu] [ +0.000484] amdgpu_pci_remove+0x93/0x130 [amdgpu] [ +0.000482] pci_device_remove+0xae/0x1e0 [ +0.000008] device_remove+0xc7/0x180 [ +0.000008] device_release_driver_internal+0x3d4/0x5a0 [ +0.000007] device_release_driver+0x12/0x20 [ +0.000004] pci_stop_bus_device+0x104/0x150 [ +0.000006] pci_stop_and_remove_bus_device_locked+0x1b/0x40 [ +0.000005] remove_store+0xd7/0xf0 [ +0.000005] ? __pfx_remove_store+0x10/0x10 [ +0.000006] ? __pfx__copy_from_iter+0x10/0x10 [ +0.000006] ? __pfx_dev_attr_store+0x10/0x10 [ +0.000006] dev_attr_store+0x3f/0x80 [ +0.000006] sysfs_kf_write+0x125/0x1d0 [ +0.000004] ? srso_return_thunk+0x5/0x5f [ +0.000005] ? __kasan_check_write+0x14/0x30 [ +0.000005] kernfs_fop_write_iter+0x2ea/0x490 [ +0.000005] ? rw_verify_area+0x70/0x420 [ +0.000005] ? __pfx_kernfs_fop_write_iter+0x10/0x10 [ +0.000006] vfs_write+0x90d/0xe70 [ +0.000005] ? srso_return_thunk+0x5/0x5f [ +0.000005] ? __pfx_vfs_write+0x10/0x10 [ +0.000004] ? local_clock+0x15/0x30 [ +0.000008] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? __kasan_slab_free+0x5f/0x80 [ +0.000005] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? __kasan_check_read+0x11/0x20 [ +0.000004] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? fdget_pos+0x1d3/0x500 [ +0.000007] ksys_write+0x119/0x220 [ +0.000005] ? putname+0x1c/0x30 [ +0.000006] ? __pfx_ksys_write+0x10/0x10 [ +0.000007] __x64_sys_write+0x72/0xc0 [ +0.000006] x64_sys_call+0x18ab/0x26f0 [ +0.000006] do_syscall_64+0x7c/0x170 [ +0.000004] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? __pfx___x64_sys_openat+0x10/0x10 [ +0.000006] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? __kasan_check_read+0x11/0x20 [ +0.000003] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? fpregs_assert_state_consistent+0x21/0xb0 [ +0.000006] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? syscall_exit_to_user_mode+0x4e/0x240 [ +0.000005] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? do_syscall_64+0x88/0x170 [ +0.000003] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? irqentry_exit+0x43/0x50 [ +0.000004] ? srso_return_thunk+0x5/0x5f [ +0.000004] ? exc_page_fault+0x7c/0x110 [ +0.000006] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ +0.000006] RIP: 0033:0x7480c0b14887 [ +0.000005] Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 [ +0.000005] RSP: 002b:00007fff142b0058 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [ +0.000006] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007480c0b14887 [ +0.000003] RDX: 0000000000000001 RSI: 00007480c0e7365a RDI: 0000000000000004 [ +0.000003] RBP: 00007fff142b0080 R08: 0000563b2e73c170 R09: 0000000000000000 [ +0.000003] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff142b02f8 [ +0.000003] R13: 0000563b159a72a9 R14: 0000563b159a9d48 R15: 00007480c0f19040 [ +0.000008] [ +0.000445] Allocated by task 427 on cpu 5 at 29.342331s: [ +0.000011] kasan_save_stack+0x28/0x60 [ +0.000006] kasan_save_track+0x18/0x70 [ +0.000006] kasan_save_alloc_info+0x38/0x60 [ +0.000005] __kasan_kmalloc+0xc1/0xd0 [ +0.000006] __kmalloc_cache_noprof+0x1bd/0x430 [ +0.000007] amdgpu_driver_open_kms+0x172/0x760 [amdgpu] [ +0.000493] drm_file_alloc+0x569/0x9a0 [ +0.000007] drm_client_init+0x1b7/0x410 [ +0.000007] drm_fbdev_client_setup+0x174/0x470 [ +0.000006] drm_client_setup+0x8a/0xf0 [ +0.000006] amdgpu_pci_probe+0x510/0x10c0 [amdgpu] [ +0.000483] local_pci_probe+0xe7/0x1b0 [ +0.000006] pci_device_probe+0x5bf/0x890 [ +0.000006] really_probe+0x1fd/0x950 [ +0.000005] __driver_probe_device+0x307/0x410 [ +0.000006] driver_probe_device+0x4e/0x150 [ +0.000005] __driver_attach+0x223/0x510 [ +0.000006] bus_for_each_dev+0x102/0x1a0 [ +0.000005] driver_attach+0x3d/0x60 [ +0.000006] bus_add_driver+0x309/0x650 [ +0.000005] driver_register+0x13d/0x490 [ +0.000006] __pci_register_driver+0x1ee/0x2b0 [ +0.000006] rfcomm_dlc_clear_state+0x69/0x220 [rfcomm] [ +0.000011] do_one_initcall+0x9c/0x3e0 [ +0.000007] do_init_module+0x29e/0x7f0 [ +0.000006] load_module+0x5c75/0x7c80 [ +0.000006] init_module_from_file+0x106/0x180 [ +0.000006] idempotent_init_module+0x377/0x740 [ +0.000006] __x64_sys_finit_module+0xd7/0x180 [ +0.000006] x64_sys_call+0x1f0b/0x26f0 [ +0.000006] do_syscall_64+0x7c/0x170 [ +0.000005] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ +0.000013] Freed by task 1733 on cpu 5 at 59.907086s: [ +0.000011] kasan_save_stack+0x28/0x60 [ +0.000006] kasan_save_track+0x18/0x70 [ +0.000005] kasan_save_free_info+0x3b/0x60 [ +0.000005] __kasan_slab_free+0x54/0x80 [ +0.000006] kfree+0x127/0x470 [ +0.000006] amdgpu_driver_postclose_kms+0x455/0x760 [amdgpu] [ +0.000493] drm_file_free.part.0+0x5b1/0xba0 [ +0.000006] drm_file_free+0x13/0x30 [ +0.000006] drm_client_release+0x1c4/0x2b0 [ +0.000006] drm_fbdev_ttm_fb_destroy+0xd2/0x120 [drm_ttm_helper] [ +0.000007] put_fb_info+0x97/0xe0 [ +0.000007] unregister_framebuffer+0x197/0x380 [ +0.000005] drm_fb_helper_unregister_info+0x94/0x100 [ +0.000005] drm_fbdev_client_unregister+0x3c/0x80 [ +0.000007] drm_client_dev_unregister+0x144/0x330 [ +0.000006] drm_dev_unregister+0x49/0x1b0 [ +0.000006] drm_dev_unplug+0x4c/0xd0 [ +0.000006] amdgpu_pci_remove+0x58/0x130 [amdgpu] [ +0.000484] pci_device_remove+0xae/0x1e0 [ +0.000008] device_remove+0xc7/0x180 [ +0.000007] device_release_driver_internal+0x3d4/0x5a0 [ +0.000006] device_release_driver+0x12/0x20 [ +0.000007] pci_stop_bus_device+0x104/0x150 [ +0.000006] pci_stop_and_remove_bus_device_locked+0x1b/0x40 [ +0.000006] remove_store+0xd7/0xf0 [ +0.000006] dev_attr_store+0x3f/0x80 [ +0.000005] sysfs_kf_write+0x125/0x1d0 [ +0.000006] kernfs_fop_write_iter+0x2ea/0x490 [ +0.000006] vfs_write+0x90d/0xe70 [ +0.000006] ksys_write+0x119/0x220 [ +0.000006] __x64_sys_write+0x72/0xc0 [ +0.000006] x64_sys_call+0x18ab/0x26f0 [ +0.000005] do_syscall_64+0x7c/0x170 [ +0.000006] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ +0.000012] The buggy address belongs to the object at ffff88812eec8000 which belongs to the cache kmalloc-rnd-07-4k of size 4096 [ +0.000016] The buggy address is located 3160 bytes inside of freed 4096-byte region [ffff88812eec8000, ffff88812eec9000) [ +0.000023] The buggy address belongs to the physical page: [ +0.000009] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x12eec8 [ +0.000007] head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ +0.000005] flags: 0x17ffffc0000040(head|node=0|zone=2|lastcpupid=0x1fffff) [ +0.000007] page_type: f5(slab) [ +0.000008] raw: 0017ffffc0000040 ffff888100054500 dead000000000122 0000000000000000 [ +0.000005] raw: 0000000000000000 0000000080040004 00000000f5000000 0000000000000000 [ +0.000006] head: 0017ffffc0000040 ffff888100054500 dead000000000122 0000000000000000 [ +0.000005] head: 0000000000000000 0000000080040004 00000000f5000000 0000000000000000 [ +0.000006] head: 0017ffffc0000003 ffffea0004bbb201 ffffffffffffffff 0000000000000000 [ +0.000005] head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000 [ +0.000005] page dumped because: kasan: bad access detected [ +0.000010] Memory state around the buggy address: [ +0.000009] ffff88812eec8b00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000012] ffff88812eec8b80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000011] >ffff88812eec8c00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000011] ^ [ +0.000010] ffff88812eec8c80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000011] ffff88812eec8d00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ +0.000011] ================================================================== The use-after-free occurs because a delayed work item (`suspend_work`) may still be pending or running when resources it accesses are freed during device removal or file close. The previous code used `flush_work(&fpriv->evf_mgr.suspend_work.work)`, which does not wait for delayed work that has not yet started. As a result, the delayed work could run after its memory was freed, causing a use-after-free. By switching to `flush_delayed_work(&fpriv->evf_mgr.suspend_work)`, we ensure that the kernel waits for both queued and delayed work to finish before freeing memory, closing this race. Fixes: adba0929736a ("drm/amdgpu: Fix Illegal opcode in command stream Error") Signed-off-by: Vitaly Prosyak Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index 295e7186e156..aac0de86f3e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -664,7 +664,7 @@ static void amdgpu_userq_restore_worker(struct work_struct *work) struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr); int ret; - flush_work(&fpriv->evf_mgr.suspend_work.work); + flush_delayed_work(&fpriv->evf_mgr.suspend_work); mutex_lock(&uq_mgr->userq_mutex); From 94de1dfd4729c21c156051ffd1ee30cfdab1b58e Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Mon, 7 Jul 2025 13:19:59 -0700 Subject: [PATCH 104/358] drm/xe/ptl: Drop force_probe requirement Panther Lake has proven to be stable through testing and use. Remove the force_probe requirement and enable the platform by default. Signed-off-by: Matt Atwood Link: https://lore.kernel.org/r/20250707201959.319406-1-matthew.s.atwood@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 755e335c8e18..3614fcf3f088 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -337,7 +337,6 @@ static const struct xe_device_desc ptl_desc = { .has_display = true, .has_sriov = true, .max_gt_per_tile = 2, - .require_force_probe = true, .needs_scratch = true, }; From 83dcee17855c4e5af037ae3262809036de127903 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 8 Jul 2025 03:54:25 +0000 Subject: [PATCH 105/358] drm/xe/pm: Restore display pm if there is error after display suspend xe_bo_evict_all() is called after xe_display_pm_suspend(). So if there is error with xe_bo_evict_all(), display pm should be restored. Fixes: 51462211f4a9 ("drm/xe/pxp: add PXP PM support") Fixes: cb8f81c17531 ("drm/xe/display: Make display suspend/resume work on discrete") Cc: Maarten Lankhorst Cc: Daniele Ceraolo Spurio Cc: John Harrison Signed-off-by: Shuicheng Lin Reviewed-by: Daniele Ceraolo Spurio Link: https://lore.kernel.org/r/20250708035424.3608190-2-shuicheng.lin@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index ff749edc005b..bcfda545e74f 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -134,7 +134,7 @@ int xe_pm_suspend(struct xe_device *xe) /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) - goto err_pxp; + goto err_display; for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); @@ -151,7 +151,6 @@ int xe_pm_suspend(struct xe_device *xe) err_display: xe_display_pm_resume(xe); -err_pxp: xe_pxp_pm_resume(xe->pxp); err: drm_dbg(&xe->drm, "Device suspend failed %d\n", err); From b2c4ac219fa46bb6535a8147ac1611b3d6481aa2 Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Mon, 7 Jul 2025 19:11:08 -0400 Subject: [PATCH 106/358] drm/xe/uc: Disable GuC communication on hardware initialization error Disable GuC communication on Xe micro controller hardware initialization error. Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4917 Reviewed-by: Jonathan Cavitt Reviewed-by: Matthew Brost Signed-off-by: Zhanjun Dong Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250707231108.3217573-1-zhanjun.dong@intel.com --- drivers/gpu/drm/xe/xe_guc.c | 8 ++++++-- drivers/gpu/drm/xe/xe_uc.c | 18 +++++++++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 8573957facae..b1d1d6da3758 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1219,13 +1219,17 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc) ret = xe_gt_sriov_vf_connect(gt); if (ret) - return ret; + goto err_out; ret = xe_gt_sriov_vf_query_runtime(gt); if (ret) - return ret; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(guc); + return ret; } /** diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 6431ba3a2c53..3e0c3af235f2 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -158,7 +158,7 @@ static int vf_uc_load_hw(struct xe_uc *uc) err = xe_gt_sriov_vf_connect(uc_to_gt(uc)); if (err) - return err; + goto err_out; uc->guc.submission_state.enabled = true; @@ -168,9 +168,13 @@ static int vf_uc_load_hw(struct xe_uc *uc) err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) - return err; + goto err_out; return 0; + +err_out: + xe_guc_sanitize(&uc->guc); + return err; } /* @@ -202,15 +206,15 @@ int xe_uc_load_hw(struct xe_uc *uc) ret = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (ret) - return ret; + goto err_out; ret = xe_guc_post_load_init(&uc->guc); if (ret) - return ret; + goto err_out; ret = xe_guc_pc_start(&uc->guc.pc); if (ret) - return ret; + goto err_out; xe_guc_engine_activity_enable_stats(&uc->guc); @@ -222,6 +226,10 @@ int xe_uc_load_hw(struct xe_uc *uc) xe_gsc_load_start(&uc->gsc); return 0; + +err_out: + xe_guc_sanitize(&uc->guc); + return ret; } int xe_uc_reset_prepare(struct xe_uc *uc) From 4a4d4e320a4d0b51f56c4a4bede11d595aa502da Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 8 Jul 2025 15:49:41 +0900 Subject: [PATCH 107/358] gpu: nova-core: Add code comments related to devinit Add several code comments to reduce acronym soup and explain how devinit magic and bootflow works before driver loads. These are essential for debug and development of the nova driver. [acourbot@nvidia.com: reformat and reword a couple of sentences] Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708-nova-docs-v4-1-9d188772c4c7@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/gfw.rs | 39 +++++++++++++++++++++++++++++++---- drivers/gpu/nova-core/regs.rs | 14 +++++++++++-- 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nova-core/gfw.rs b/drivers/gpu/nova-core/gfw.rs index d5b68e02d405..8ac1ed187199 100644 --- a/drivers/gpu/nova-core/gfw.rs +++ b/drivers/gpu/nova-core/gfw.rs @@ -1,10 +1,22 @@ // SPDX-License-Identifier: GPL-2.0 -//! GPU Firmware (GFW) support. +//! GPU Firmware (`GFW`) support, a.k.a `devinit`. //! //! Upon reset, the GPU runs some firmware code from the BIOS to setup its core parameters. Most of //! the GPU is considered unusable until this step is completed, so we must wait on it before //! performing driver initialization. +//! +//! A clarification about devinit terminology: devinit is a sequence of register read/writes after +//! reset that performs tasks such as: +//! 1. Programming VRAM memory controller timings. +//! 2. Power sequencing. +//! 3. Clock and PLL configuration. +//! 4. Thermal management. +//! +//! devinit itself is a 'script' which is interpreted by an interpreter program typically running +//! on the PMU microcontroller. +//! +//! Note that the devinit sequence also needs to run during suspend/resume. use kernel::bindings; use kernel::prelude::*; @@ -14,13 +26,32 @@ use crate::regs; use crate::util; -/// Wait until `GFW` (GPU Firmware) completes, or a 4 seconds timeout elapses. +/// Wait for the `GFW` (GPU firmware) boot completion signal (`GFW_BOOT`), or a 4 seconds timeout. +/// +/// Upon GPU reset, several microcontrollers (such as PMU, SEC2, GSP etc) run some firmware code to +/// setup its core parameters. Most of the GPU is considered unusable until this step is completed, +/// so it must be waited on very early during driver initialization. +/// +/// The `GFW` code includes several components that need to execute before the driver loads. These +/// components are located in the VBIOS ROM and executed in a sequence on these different +/// microcontrollers. The devinit sequence typically runs on the PMU, and the FWSEC runs on the +/// GSP. +/// +/// This function waits for a signal indicating that core initialization is complete. Before this +/// signal is received, little can be done with the GPU. This signal is set by the FWSEC running on +/// the GSP in Heavy-secured mode. pub(crate) fn wait_gfw_boot_completion(bar: &Bar0) -> Result { + // Before accessing the completion status in `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05`, we must + // first check `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK`. This is because + // `NV_PGC6_AON_SECURE_SCRATCH_GROUP_05` becomes accessible only after the secure firmware + // (FWSEC) lowers the privilege level to allow CPU (LS/Light-secured) access. We can only + // safely read the status register from CPU (LS/Light-secured) once the mask indicates + // that the privilege level has been lowered. + // // TIMEOUT: arbitrarily large value. GFW starts running immediately after the GPU is put out of // reset, and should complete in less time than that. util::wait_on(Delta::from_secs(4), || { - // Check that FWSEC has lowered its protection level before reading the GFW_BOOT - // status. + // Check that FWSEC has lowered its protection level before reading the GFW_BOOT status. let gfw_booted = regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK::read(bar) .read_protection_level0() && regs::NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_0_GFW_BOOT::read(bar).completed(); diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index e8b8aabce3f3..3bb38197a890 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -104,9 +104,19 @@ pub(crate) fn higher_bound(self) -> u64 { } } -/* PGC6 */ +// PGC6 register space. +// +// `GC6` is a GPU low-power state where VRAM is in self-refresh and the GPU is powered down (except +// for power rails needed to keep self-refresh working and important registers and hardware +// blocks). +// +// These scratch registers remain powered on even in a low-power state and have a designated group +// number. -register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128 { +// Privilege level mask register. It dictates whether the host CPU has privilege to access the +// `PGC6_AON_SECURE_SCRATCH_GROUP_05` register (which it needs to read GFW_BOOT). +register!(NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK @ 0x00118128, + "Privilege level mask register" { 0:0 read_protection_level0 as bool, "Set after FWSEC lowers its protection level"; }); From 8ce560d8e1c6449adb9dc5517ef91404c7810c94 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Mon, 7 Jul 2025 00:49:13 +0000 Subject: [PATCH 108/358] drm/xe: Remove unused code in devcoredump_snapshot() The deleted code is no longer needed because patch "drm/xe/guc: Plumb GuC-capture into dev coredump" has removed the related usage code. Remove the code to tidy up the function. v2: s/bacause/because Reviewed-by: Zhanjun Dong Reviewed-by: Matthew Brost Cc: Rodrigo Vivi Signed-off-by: Shuicheng Lin Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250707004911.3502904-5-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 7a8af2311318..94625010abc4 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -313,13 +313,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_guc *guc = exec_queue_to_guc(q); - u32 adj_logical_mask = q->logical_mask; - u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - unsigned int fw_ref; bool cookie; - int i; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); @@ -335,14 +331,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { - if (adj_logical_mask & BIT(i)) { - adj_logical_mask |= width_mask << i; - i += q->width; - } else { - ++i; - } - } /* keep going if fw fails as we still want to save the memory and SW data */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); From 017ef1228d735965419ff118fe1b89089e772c42 Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Mon, 7 Jul 2025 00:49:14 +0000 Subject: [PATCH 109/358] drm/xe: Release runtime pm for error path of xe_devcoredump_read() xe_pm_runtime_put() is missed to be called for the error path in xe_devcoredump_read(). Add function description comments for xe_devcoredump_read() to help understand it. v2: more detail function comments and refine goto logic (Matt) Fixes: c4a2e5f865b7 ("drm/xe: Add devcoredump chunking") Cc: stable@vger.kernel.org Reviewed-by: Matthew Brost Signed-off-by: Shuicheng Lin Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250707004911.3502904-6-shuicheng.lin@intel.com --- drivers/gpu/drm/xe/xe_devcoredump.c | 38 +++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 94625010abc4..203e3038cc81 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) #define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; u32 chunk_offset; ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) xe_pm_runtime_get(gt_to_xe(ss->gt)); mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; - } + if (offset >= ss->read.size) + goto unlock; new_chunk_position = div_u64_rem(offset, XE_DEVCOREDUMP_CHUNK_MAX, @@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, ss->read.size - offset; memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) + if (pm_needed) xe_pm_runtime_put(gt_to_xe(ss->gt)); - return byte_copied; + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) From 4d3e891388ee8fa6b35d8a528ec0f8f9239799df Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 8 Jul 2025 15:49:42 +0900 Subject: [PATCH 110/358] gpu: nova-core: Clarify sysmembar operations sysmembar is a critical operation that the GSP falcon needs to perform in the reset sequence. Add some code comments to clarify. [acourbot@nvdidia.com: move relevant documentation to SysmemFlush type] Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708-nova-docs-v4-2-9d188772c4c7@nvidia.com [ Minor grammar fix in the PFB register documentation. - Danilo ] Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/fb.rs | 10 ++++++++++ drivers/gpu/nova-core/gpu.rs | 3 +-- drivers/gpu/nova-core/regs.rs | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nova-core/fb.rs b/drivers/gpu/nova-core/fb.rs index 172b4a12ba2a..4a702525fff4 100644 --- a/drivers/gpu/nova-core/fb.rs +++ b/drivers/gpu/nova-core/fb.rs @@ -17,6 +17,16 @@ /// Type holding the sysmem flush memory page, a page of memory to be written into the /// `NV_PFB_NISO_FLUSH_SYSMEM_ADDR*` registers and used to maintain memory coherency. /// +/// A system memory page is required for `sysmembar`, which is a GPU-initiated hardware +/// memory-barrier operation that flushes all pending GPU-side memory writes that were done through +/// PCIE to system memory. It is required for falcons to be reset as the reset operation involves a +/// reset handshake. When the falcon acknowledges a reset, it writes into system memory. To ensure +/// this write is visible to the host and prevent driver timeouts, the falcon must perform a +/// sysmembar operation to flush its writes. +/// +/// Because of this, the sysmem flush memory page must be registered as early as possible during +/// driver initialization, and before any falcon is reset. +/// /// Users are responsible for manually calling [`Self::unregister`] before dropping this object, /// otherwise the GPU might still use it even after it has been freed. pub(crate) struct SysmemFlush { diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs index 8e32af16b669..72d40b0124f0 100644 --- a/drivers/gpu/nova-core/gpu.rs +++ b/drivers/gpu/nova-core/gpu.rs @@ -170,7 +170,7 @@ pub(crate) struct Gpu { bar: Devres, fw: Firmware, /// System memory page required for flushing all pending GPU-side memory writes done through - /// PCIE into system memory. + /// PCIE into system memory, via sysmembar (A GPU-initiated HW memory-barrier operation). sysmem_flush: SysmemFlush, } @@ -283,7 +283,6 @@ pub(crate) fn new( gfw::wait_gfw_boot_completion(bar) .inspect_err(|_| dev_err!(pdev.as_ref(), "GFW boot did not complete"))?; - // System memory page required for sysmembar to properly flush into system memory. let sysmem_flush = SysmemFlush::register(pdev.as_ref(), bar, spec.chipset)?; let gsp_falcon = Falcon::::new( diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 3bb38197a890..317da64afa91 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -51,6 +51,9 @@ pub(crate) fn chipset(self) -> Result { /* PFB */ +// The following two registers together hold the physical system memory address that is used by the +// GPU to perform sysmembar operations (see `fb::SysmemFlush`). + register!(NV_PFB_NISO_FLUSH_SYSMEM_ADDR @ 0x00100c10 { 31:0 adr_39_08 as u32; }); From 0b980688f630a69f510556eea6f8c21e250f1a02 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 8 Jul 2025 15:49:43 +0900 Subject: [PATCH 111/358] gpu: nova-core: Clarify falcon code Add documentation strings, comments and AES mode for completeness to the Falcon signatures. Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708-nova-docs-v4-3-9d188772c4c7@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/falcon.rs | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nova-core/falcon.rs b/drivers/gpu/nova-core/falcon.rs index c2c6f9eb380a..50437c67c14a 100644 --- a/drivers/gpu/nova-core/falcon.rs +++ b/drivers/gpu/nova-core/falcon.rs @@ -105,14 +105,23 @@ fn try_from(value: u8) -> Result { /// register. #[repr(u8)] #[derive(Debug, Default, Copy, Clone)] +/// Security mode of the Falcon microprocessor. +/// +/// See `falcon.rst` for more details. pub(crate) enum FalconSecurityModel { /// Non-Secure: runs unsigned code without privileges. #[default] None = 0, - /// Low-Secure: runs code with some privileges. Can only be entered from `Heavy` mode, which - /// will typically validate the LS code through some signature. + /// Light-Secured (LS): Runs signed code with some privileges. + /// Entry into this mode is only possible from 'Heavy-secure' mode, which verifies the code's + /// signature. + /// + /// Also known as Low-Secure, Privilege Level 2 or PL2. Light = 2, - /// High-Secure: runs signed code with full privileges. Signature is validated by boot ROM. + /// Heavy-Secured (HS): Runs signed code with full privileges. + /// The code's signature is verified by the Falcon Boot ROM (BROM). + /// + /// Also known as High-Secure, Privilege Level 3 or PL3. Heavy = 3, } impl_from_enum_to_u32!(FalconSecurityModel); @@ -136,10 +145,13 @@ fn try_from(value: u8) -> Result { } /// Signing algorithm for a given firmware, used in the [`crate::regs::NV_PFALCON2_FALCON_MOD_SEL`] -/// register. +/// register. It is passed to the Falcon Boot ROM (BROM) as a parameter. #[repr(u8)] #[derive(Debug, Default, Copy, Clone, PartialEq, Eq)] pub(crate) enum FalconModSelAlgo { + /// AES. + #[expect(dead_code)] + Aes = 0, /// RSA3K. #[default] Rsa3k = 1, @@ -209,15 +221,18 @@ pub(crate) enum FalconMem { Dmem, } -/// Target/source of a DMA transfer to/from falcon memory. +/// Defines the Framebuffer Interface (FBIF) aperture type. +/// This determines the memory type for external memory access during a DMA transfer, which is +/// performed by the Falcon's Framebuffer DMA (FBDMA) engine. See falcon.rst for more details. #[derive(Debug, Clone, Default)] pub(crate) enum FalconFbifTarget { /// VRAM. #[default] + /// Local Framebuffer (GPU's VRAM memory). LocalFb = 0, - /// Coherent system memory. + /// Coherent system memory (System DRAM). CoherentSysmem = 1, - /// Non-coherent system memory. + /// Non-coherent system memory (System DRAM). NoncoherentSysmem = 2, } impl_from_enum_to_u32!(FalconFbifTarget); From 7feefbd2a28bf125c4155e49f6c5f7dcd18f9a1b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 8 Jul 2025 15:49:44 +0900 Subject: [PATCH 112/358] gpu: nova-core: convert `/*` comments to `//` The second form is preferred, and there was no reason to use the first. Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708-nova-docs-v4-4-9d188772c4c7@nvidia.com Signed-off-by: Danilo Krummrich --- drivers/gpu/nova-core/regs.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/nova-core/regs.rs b/drivers/gpu/nova-core/regs.rs index 317da64afa91..5ccfb61f850a 100644 --- a/drivers/gpu/nova-core/regs.rs +++ b/drivers/gpu/nova-core/regs.rs @@ -14,7 +14,7 @@ use crate::gpu::{Architecture, Chipset}; use kernel::prelude::*; -/* PMC */ +// PMC register!(NV_PMC_BOOT_0 @ 0x00000000, "Basic revision information about the GPU" { 3:0 minor_revision as u8, "Minor revision of the chip"; @@ -42,14 +42,14 @@ pub(crate) fn chipset(self) -> Result { } } -/* PBUS */ +// PBUS // TODO[REGA]: this is an array of registers. register!(NV_PBUS_SW_SCRATCH_0E@0x00001438 { 31:16 frts_err_code as u16; }); -/* PFB */ +// PFB // The following two registers together hold the physical system memory address that is used by the // GPU to perform sysmembar operations (see `fb::SysmemFlush`). @@ -160,7 +160,7 @@ pub(crate) fn usable_fb_size(self) -> u64 { } } -/* PDISP */ +// PDISP register!(NV_PDISP_VGA_WORKSPACE_BASE @ 0x00625f04 { 3:3 status_valid as bool, "Set if the `addr` field is valid"; @@ -178,7 +178,7 @@ pub(crate) fn vga_workspace_addr(self) -> Option { } } -/* FUSE */ +// FUSE register!(NV_FUSE_OPT_FPF_NVDEC_UCODE1_VERSION @ 0x00824100 { 15:0 data as u16; @@ -192,7 +192,7 @@ pub(crate) fn vga_workspace_addr(self) -> Option { 15:0 data as u16; }); -/* PFALCON */ +// PFALCON register!(NV_PFALCON_FALCON_IRQSCLR @ +0x00000004 { 4:4 halt as bool; @@ -312,7 +312,7 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { 31:0 value as u32; }); -/* PRISCV */ +// PRISCV register!(NV_PRISCV_RISCV_BCR_CTRL @ +0x00001668 { 0:0 valid as bool; @@ -324,7 +324,7 @@ pub(crate) fn mem_scrubbing_done(self) -> bool { // only be used in HAL modules. pub(crate) mod gm107 { - /* FUSE */ + // FUSE register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00021c04 { 0:0 display_disabled as bool; @@ -332,7 +332,7 @@ pub(crate) mod gm107 { } pub(crate) mod ga100 { - /* FUSE */ + // FUSE register!(NV_FUSE_STATUS_OPT_DISPLAY @ 0x00820c04 { 0:0 display_disabled as bool; From 5ec879e29948d57f71a54cc943296e5ddc6e8fcb Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 8 Jul 2025 15:49:45 +0900 Subject: [PATCH 113/358] Documentation: gpu: nova-core: Document vbios layout Add detailed explanation and block diagrams of the layout of the vBIOS on Nvidia GPUs. This is important to understand how nova-core boots an Nvidia GPU. [ Applied Timur Tabi's feedback on providing link to BIT documentation. ] Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708-nova-docs-v4-5-9d188772c4c7@nvidia.com Signed-off-by: Danilo Krummrich --- Documentation/gpu/nova/core/vbios.rst | 181 ++++++++++++++++++++++++++ Documentation/gpu/nova/index.rst | 1 + 2 files changed, 182 insertions(+) create mode 100644 Documentation/gpu/nova/core/vbios.rst diff --git a/Documentation/gpu/nova/core/vbios.rst b/Documentation/gpu/nova/core/vbios.rst new file mode 100644 index 000000000000..efd40087480c --- /dev/null +++ b/Documentation/gpu/nova/core/vbios.rst @@ -0,0 +1,181 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========== +VBIOS +========== +This document describes the layout of the VBIOS image which is a series of concatenated +images in the ROM of the GPU. The VBIOS is mirrored onto the BAR 0 space and is read +by both Boot ROM firmware (also known as IFR or init-from-rom firmware) on the GPU to +bootstrap various microcontrollers (PMU, SEC, GSP) with critical initialization before +the driver loads, as well as by the nova-core driver in the kernel to boot the GSP. + +The format of the images in the ROM follow the "BIOS Specification" part of the +PCI specification, with Nvidia-specific extensions. The ROM images of type FwSec +are the ones that contain Falcon ucode and what we are mainly looking for. + +As an example, the following are the different image types that can be found in the +VBIOS of an Ampere GA102 GPU which is supported by the nova-core driver. + +- PciAt Image (Type 0x00) - This is the standard PCI BIOS image, whose name + likely comes from the "IBM PC/AT" architecture. + +- EFI Image (Type 0x03) - This is the EFI BIOS image. It contains the UEFI GOP + driver that is used to display UEFI graphics output. + +- First FwSec Image (Type 0xE0) - The first FwSec image (Secure Firmware) + +- Second FwSec Image (Type 0xE0) - The second FwSec image (Secure Firmware) + contains various microcodes (also known as an applications) that do a range + of different functions. The FWSEC ucode is run in heavy-secure mode and + typically runs directly on the GSP (it could be running on a different + designated processor in future generations but as of Ampere, it is the GSP). + This firmware then loads other firmware ucodes onto the PMU and SEC2 + microcontrollers for gfw initialization after GPU reset and before the driver + loads (see devinit.rst). The DEVINIT ucode is itself another ucode that is + stored in this ROM partition. + +Once located, the Falcon ucodes have "Application Interfaces" in their data +memory (DMEM). For FWSEC, the application interface we use for FWSEC is the +"DMEM mapper" interface which is configured to run the "FRTS" command. This +command carves out the WPR2 (Write-Protected Region) in VRAM. It then places +important power-management data, called 'FRTS', into this region. The WPR2 +region is only accessible to heavy-secure ucode. + +.. note:: + It is not clear why FwSec has 2 different partitions in the ROM, but they both + are of type 0xE0 and can be identified as such. This could be subject to change + in future generations. + +VBIOS ROM Layout +---------------- +The VBIOS layout is roughly a series of concatenated images laid out as follows:: + + +----------------------------------------------------------------------------+ + | VBIOS (Starting at ROM_OFFSET: 0x300000) | + +----------------------------------------------------------------------------+ + | +-----------------------------------------------+ | + | | PciAt Image (Type 0x00) | | + | +-----------------------------------------------+ | + | | +-------------------+ | | + | | | ROM Header | | | + | | | (Signature 0xAA55)| | | + | | +-------------------+ | | + | | | rom header's pci_data_struct_offset | | + | | | points to the PCIR structure | | + | | V | | + | | +-------------------+ | | + | | | PCIR Structure | | | + | | | (Signature "PCIR")| | | + | | | last_image: 0x80 | | | + | | | image_len: size | | | + | | | in 512-byte units | | | + | | +-------------------+ | | + | | | | | + | | | NPDE immediately follows PCIR | | + | | V | | + | | +-------------------+ | | + | | | NPDE Structure | | | + | | | (Signature "NPDE")| | | + | | | last_image: 0x00 | | | + | | +-------------------+ | | + | | | | + | | +-------------------+ | | + | | | BIT Header | (Signature scanning | | + | | | (Signature "BIT") | provides the location | | + | | +-------------------+ of the BIT table) | | + | | | header is | | + | | | followed by a table of tokens | | + | | V one of which is for falcon data. | | + | | +-------------------+ | | + | | | BIT Tokens | | | + | | | ______________ | | | + | | | | Falcon Data | | | | + | | | | Token (0x70)|---+------------>------------+--+ | + | | | +-------------+ | falcon_data_ptr() | | | + | | +-------------------+ | V | + | +-----------------------------------------------+ | | + | (no gap between images) | | + | +-----------------------------------------------+ | | + | | EFI Image (Type 0x03) | | | + | +-----------------------------------------------+ | | + | | Contains the UEFI GOP driver (Graphics Output)| | | + | | +-------------------+ | | | + | | | ROM Header | | | | + | | +-------------------+ | | | + | | | PCIR Structure | | | | + | | +-------------------+ | | | + | | | NPDE Structure | | | | + | | +-------------------+ | | | + | | | Image data | | | | + | | +-------------------+ | | | + | +-----------------------------------------------+ | | + | (no gap between images) | | + | +-----------------------------------------------+ | | + | | First FwSec Image (Type 0xE0) | | | + | +-----------------------------------------------+ | | + | | +-------------------+ | | | + | | | ROM Header | | | | + | | +-------------------+ | | | + | | | PCIR Structure | | | | + | | +-------------------+ | | | + | | | NPDE Structure | | | | + | | +-------------------+ | | | + | | | Image data | | | | + | | +-------------------+ | | | + | +-----------------------------------------------+ | | + | (no gap between images) | | + | +-----------------------------------------------+ | | + | | Second FwSec Image (Type 0xE0) | | | + | +-----------------------------------------------+ | | + | | +-------------------+ | | | + | | | ROM Header | | | | + | | +-------------------+ | | | + | | | PCIR Structure | | | | + | | +-------------------+ | | | + | | | NPDE Structure | | | | + | | +-------------------+ | | | + | | | | | + | | +-------------------+ | | | + | | | PMU Lookup Table | <- falcon_data_offset <----+ | + | | | +-------------+ | pmu_lookup_table | | + | | | | Entry 0x85 | | | | + | | | | FWSEC_PROD | | | | + | | | +-------------+ | | | + | | +-------------------+ | | + | | | | | + | | | points to | | + | | V | | + | | +-------------------+ | | + | | | FalconUCodeDescV3 | <- falcon_ucode_offset | | + | | | (FWSEC Firmware) | fwsec_header() | | + | | +-------------------+ | | + | | | immediately followed by... | | + | | V | | + | | +----------------------------+ | | + | | | Signatures + FWSEC Ucode | | | + | | | fwsec_sigs(), fwsec_ucode()| | | + | | +----------------------------+ | | + | +-----------------------------------------------+ | + | | + +----------------------------------------------------------------------------+ + +.. note:: + This diagram is created based on an GA-102 Ampere GPU as an example and could + vary for future or other GPUs. + +.. note:: + For more explanations of acronyms, see the detailed descriptions in `vbios.rs`. + +Falcon data Lookup +------------------ +A key part of the VBIOS extraction code (vbios.rs) is to find the location of the +Falcon data in the VBIOS which contains the PMU lookup table. This lookup table is +used to find the required Falcon ucode based on an application ID. + +The location of the PMU lookup table is found by scanning the BIT (`BIOS Information Table`_) +tokens for a token with the id `BIT_TOKEN_ID_FALCON_DATA` (0x70) which indicates the +offset of the same from the start of the VBIOS image. Unfortunately, the offset +does not account for the EFI image located between the PciAt and FwSec images. +The `vbios.rs` code compensates for this with appropriate arithmetic. + +.. _`BIOS Information Table`: https://download.nvidia.com/open-gpu-doc/BIOS-Information-Table/1/BIOS-Information-Table.html diff --git a/Documentation/gpu/nova/index.rst b/Documentation/gpu/nova/index.rst index 2701b3f4af35..f38041fcd595 100644 --- a/Documentation/gpu/nova/index.rst +++ b/Documentation/gpu/nova/index.rst @@ -28,3 +28,4 @@ vGPU manager VFIO driver and the nova-drm driver. core/guidelines core/todo + core/vbios From 952e62240738437dff23f9561e9f3d92a2fc8b99 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 8 Jul 2025 15:49:46 +0900 Subject: [PATCH 114/358] Documentation: gpu: nova-core: Document devinit process devinit is mentioned in the code. This patch explains it so it is clear what it does. devinit is not only essential at boot-time, but also at runtime due to suspend-resume and things like re-clocking. Signed-off-by: Joel Fernandes Reviewed-by: Bagas Sanjaya Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708-nova-docs-v4-6-9d188772c4c7@nvidia.com Signed-off-by: Danilo Krummrich --- Documentation/gpu/nova/core/devinit.rst | 61 +++++++++++++++++++++++++ Documentation/gpu/nova/index.rst | 1 + 2 files changed, 62 insertions(+) create mode 100644 Documentation/gpu/nova/core/devinit.rst diff --git a/Documentation/gpu/nova/core/devinit.rst b/Documentation/gpu/nova/core/devinit.rst new file mode 100644 index 000000000000..70c819a96a00 --- /dev/null +++ b/Documentation/gpu/nova/core/devinit.rst @@ -0,0 +1,61 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +Device Initialization (devinit) +================================== +The devinit process is complex and subject to change. This document provides a high-level +overview using the Ampere GPU family as an example. The goal is to provide a conceptual +overview of the process to aid in understanding the corresponding kernel code. + +Device initialization (devinit) is a crucial sequence of register read/write operations +that occur after a GPU reset. The devinit sequence is essential for properly configuring +the GPU hardware before it can be used. + +The devinit engine is an interpreter program that typically runs on the PMU (Power Management +Unit) microcontroller of the GPU. This interpreter executes a "script" of initialization +commands. The devinit engine itself is part of the VBIOS ROM in the same ROM image as the +FWSEC (Firmware Security) image (see fwsec.rst and vbios.rst) and it runs before the +nova-core driver is even loaded. On an Ampere GPU, the devinit ucode is separate from the +FWSEC ucode. It is launched by FWSEC, which runs on the GSP in 'heavy-secure' mode, while +devinit runs on the PMU in 'light-secure' mode. + +Key Functions of devinit +------------------------ +devinit performs several critical tasks: + +1. Programming VRAM memory controller timings +2. Power sequencing +3. Clock and PLL (Phase-Locked Loop) configuration +4. Thermal management + +Low-level Firmware Initialization Flow +-------------------------------------- +Upon reset, several microcontrollers on the GPU (such as PMU, SEC2, GSP, etc.) run GPU +firmware (gfw) code to set up the GPU and its core parameters. Most of the GPU is +considered unusable until this initialization process completes. + +These low-level GPU firmware components are typically: + +1. Located in the VBIOS ROM in the same ROM partition (see vbios.rst and fwsec.rst). +2. Executed in sequence on different microcontrollers: + + - The devinit engine typically but not necessarily runs on the PMU. + - On an Ampere GPU, the FWSEC typically runs on the GSP (GPU System Processor) in + heavy-secure mode. + +Before the driver can proceed with further initialization, it must wait for a signal +indicating that core initialization is complete (known as GFW_BOOT). This signal is +asserted by the FWSEC running on the GSP in heavy-secure mode. + +Runtime Considerations +---------------------- +It's important to note that the devinit sequence also needs to run during suspend/resume +operations at runtime, not just during initial boot, as it is critical to power management. + +Security and Access Control +--------------------------- +The initialization process involves careful privilege management. For example, before +accessing certain completion status registers, the driver must check privilege level +masks. Some registers are only accessible after secure firmware (FWSEC) lowers the +privilege level to allow CPU (LS/low-secure) access. This is the case, for example, +when receiving the GFW_BOOT signal. \ No newline at end of file diff --git a/Documentation/gpu/nova/index.rst b/Documentation/gpu/nova/index.rst index f38041fcd595..e4e017d92676 100644 --- a/Documentation/gpu/nova/index.rst +++ b/Documentation/gpu/nova/index.rst @@ -29,3 +29,4 @@ vGPU manager VFIO driver and the nova-drm driver. core/guidelines core/todo core/vbios + core/devinit From e5e716dbfc8200be57408b2b3294f152913bd8bd Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 8 Jul 2025 15:49:47 +0900 Subject: [PATCH 115/358] Documentation: gpu: nova-core: Document fwsec operation and layout Add explanation of fwsec with diagrams. This helps clarify how the nova-core falcon boot works. Signed-off-by: Joel Fernandes Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708-nova-docs-v4-7-9d188772c4c7@nvidia.com Signed-off-by: Danilo Krummrich --- Documentation/gpu/nova/core/fwsec.rst | 181 ++++++++++++++++++++++++++ Documentation/gpu/nova/index.rst | 1 + 2 files changed, 182 insertions(+) create mode 100644 Documentation/gpu/nova/core/fwsec.rst diff --git a/Documentation/gpu/nova/core/fwsec.rst b/Documentation/gpu/nova/core/fwsec.rst new file mode 100644 index 000000000000..c440edbe420c --- /dev/null +++ b/Documentation/gpu/nova/core/fwsec.rst @@ -0,0 +1,181 @@ +.. SPDX-License-Identifier: (GPL-2.0+ OR MIT) + +========================= +FWSEC (Firmware Security) +========================= +This document briefly/conceptually describes the FWSEC (Firmware Security) image +and its role in the GPU boot sequence. As such, this information is subject to +change in the future and is only current as of the Ampere GPU family. However, +hopefully the concepts described will be useful for understanding the kernel code +that deals with it. All the information is derived from publicly available +sources such as public drivers and documentation. + +The role of FWSEC is to provide a secure boot process. It runs in +'Heavy-secure' mode, and performs firmware verification after a GPU reset +before loading various ucode images onto other microcontrollers on the GPU, +such as the PMU and GSP. + +FWSEC itself is an application stored in the VBIOS ROM in the FWSEC partition of +ROM (see vbios.rst for more details). It contains different commands like FRTS +(Firmware Runtime Services) and SB (Secure Booting other microcontrollers after +reset and loading them with other non-FWSEC ucode). The kernel driver only needs +to perform FRTS, since Secure Boot (SB) has already completed by the time the driver +is loaded. + +The FRTS command carves out the WPR2 region (Write protected region) which contains +data required for power management. Once setup, only HS mode ucode can access it +(see falcon.rst for privilege levels). + +The FWSEC image is located in the VBIOS ROM in the partition of the ROM that contains +various ucode images (also known as applications) -- one of them being FWSEC. For how +it is extracted, see vbios.rst and the vbios.rs source code. + +The Falcon data for each ucode images (including the FWSEC image) is a combination +of headers, data sections (DMEM) and instruction code sections (IMEM). All these +ucode images are stored in the same ROM partition and the PMU table is used to look +up the application to load it based on its application ID (see vbios.rs). + +For the nova-core driver, the FWSEC contains an 'application interface' called +DMEMMAPPER. This interface is used to execute the 'FWSEC-FRTS' command, among others. +For Ampere, FWSEC is running on the GSP in Heavy-secure mode and runs FRTS. + +FWSEC Memory Layout +------------------- +The memory layout of the FWSEC image is as follows:: + + +---------------------------------------------------------------+ + | FWSEC ROM image (type 0xE0) | + | | + | +---------------------------------+ | + | | PMU Falcon Ucode Table | | + | | (PmuLookupTable) | | + | | +-------------------------+ | | + | | | Table Header | | | + | | | - version: 0x01 | | | + | | | - header_size: 6 | | | + | | | - entry_size: 6 | | | + | | | - entry_count: N | | | + | | | - desc_version:3(unused)| | | + | | +-------------------------+ | | + | | ... | | + | | +-------------------------+ | | + | | | Entry for FWSEC (0x85) | | | + | | | (PmuLookupTableEntry) | | | + | | | - app_id: 0x85 (FWSEC) |----|----+ | + | | | - target_id: 0x01 (PMU) | | | | + | | | - data: offset ---------|----|----|---+ look up FWSEC | + | | +-------------------------+ | | | | + | +---------------------------------+ | | | + | | | | + | | | | + | +---------------------------------+ | | | + | | FWSEC Ucode Component |<---+ | | + | | (aka Falcon data) | | | + | | +-------------------------+ | | | + | | | FalconUCodeDescV3 |<---|--------+ | + | | | - hdr | | | + | | | - stored_size | | | + | | | - pkc_data_offset | | | + | | | - interface_offset -----|----|----------------+ | + | | | - imem_phys_base | | | | + | | | - imem_load_size | | | | + | | | - imem_virt_base | | | | + | | | - dmem_phys_base | | | | + | | | - dmem_load_size | | | | + | | | - engine_id_mask | | | | + | | | - ucode_id | | | | + | | | - signature_count | | look up sig | | + | | | - signature_versions --------------+ | | + | | +-------------------------+ | | | | + | | (no gap) | | | | + | | +-------------------------+ | | | | + | | | Signatures Section |<---|-----+ | | + | | | (384 bytes per sig) | | | | + | | | - RSA-3K Signature 1 | | | | + | | | - RSA-3K Signature 2 | | | | + | | | ... | | | | + | | +-------------------------+ | | | + | | | | | + | | +-------------------------+ | | | + | | | IMEM Section (Code) | | | | + | | | | | | | + | | | Contains instruction | | | | + | | | code etc. | | | | + | | +-------------------------+ | | | + | | | | | + | | +-------------------------+ | | | + | | | DMEM Section (Data) | | | | + | | | | | | | + | | | +---------------------+ | | | | + | | | | Application | |<---|----------------+ | + | | | | Interface Table | | | | + | | | | (FalconAppifHdrV1) | | | | + | | | | Header: | | | | + | | | | - version: 0x01 | | | | + | | | | - header_size: 4 | | | | + | | | | - entry_size: 8 | | | | + | | | | - entry_count: N | | | | + | | | | | | | | + | | | | Entries: | | | | + | | | | +-----------------+ | | | | + | | | | | DEVINIT (ID 1) | | | | | + | | | | | - id: 0x01 | | | | | + | | | | | - dmemOffset X -|-|-|----+ | + | | | | +-----------------+ | | | | + | | | | +-----------------+ | | | | + | | | | | DMEMMAPPER(ID 4)| | | | | + | | | | | - id: 0x04 | | | | Used only for DevInit | + | | | | | (NVFW_FALCON_ | | | | application (not FWSEC) | + | | | | | APPIF_ID_DMEMMAPPER) | | + | | | | | - dmemOffset Y -|-|-|----|-----+ | + | | | | +-----------------+ | | | | | + | | | +---------------------+ | | | | + | | | | | | | + | | | +---------------------+ | | | | + | | | | DEVINIT Engine |<|----+ | Used by FWSEC | + | | | | Interface | | | | app. | + | | | +---------------------+ | | | | + | | | | | | | + | | | +---------------------+ | | | | + | | | | DMEM Mapper (ID 4) |<|----+-----+ | + | | | | (FalconAppifDmemmapperV3) | | + | | | | - signature: "DMAP" | | | | + | | | | - version: 0x0003 | | | | + | | | | - Size: 64 bytes | | | | + | | | | - cmd_in_buffer_off | |----|------------+ | + | | | | - cmd_in_buffer_size| | | | | + | | | | - cmd_out_buffer_off| |----|------------|-----+ | + | | | | - cmd_out_buffer_sz | | | | | | + | | | | - init_cmd | | | | | | + | | | | - features | | | | | | + | | | | - cmd_mask0/1 | | | | | | + | | | +---------------------+ | | | | | + | | | | | | | | + | | | +---------------------+ | | | | | + | | | | Command Input Buffer|<|----|------------+ | | + | | | | - Command data | | | | | + | | | | - Arguments | | | | | + | | | +---------------------+ | | | | + | | | | | | | + | | | +---------------------+ | | | | + | | | | Command Output |<|----|------------------+ | + | | | | Buffer | | | | + | | | | - Results | | | | + | | | | - Status | | | | + | | | +---------------------+ | | | + | | +-------------------------+ | | + | +---------------------------------+ | + | | + +---------------------------------------------------------------+ + +.. note:: + This is using an GA-102 Ampere GPU as an example and could vary for future GPUs. + +.. note:: + The FWSEC image also plays a role in memory scrubbing (ECC initialization) and VPR + (Video Protected Region) initialization as well. Before the nova-core driver is even + loaded, the FWSEC image is running on the GSP in heavy-secure mode. After the devinit + sequence completes, it does VRAM memory scrubbing (ECC initialization). On consumer + GPUs, it scrubs only part of memory and then initiates 'async scrubbing'. Before this + async scrubbing completes, the unscrubbed VRAM cannot be used for allocation (thus DRM + memory allocators need to wait for this scrubbing to complete). diff --git a/Documentation/gpu/nova/index.rst b/Documentation/gpu/nova/index.rst index e4e017d92676..e3650f53ff53 100644 --- a/Documentation/gpu/nova/index.rst +++ b/Documentation/gpu/nova/index.rst @@ -30,3 +30,4 @@ vGPU manager VFIO driver and the nova-drm driver. core/todo core/vbios core/devinit + core/fwsec From 215a3f91713383a3c0d2da82d223a608a3c17ac1 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 8 Jul 2025 15:49:48 +0900 Subject: [PATCH 116/358] Documentation: gpu: nova-core: Document basics of the Falcon Instances of the Falcon microcontroller appear in modern Nvidia GPUs and are crucial to the GPU boot process. Document some concepts which will make nova-core boot code easier to digest. All the information is derived from public sources such as public documents, OpenRM and Nouveau code. Signed-off-by: Joel Fernandes Reviewed-by: Bagas Sanjaya Signed-off-by: Alexandre Courbot Link: https://lore.kernel.org/r/20250708-nova-docs-v4-8-9d188772c4c7@nvidia.com Signed-off-by: Danilo Krummrich --- Documentation/gpu/nova/core/falcon.rst | 158 +++++++++++++++++++++++++ Documentation/gpu/nova/index.rst | 1 + 2 files changed, 159 insertions(+) create mode 100644 Documentation/gpu/nova/core/falcon.rst diff --git a/Documentation/gpu/nova/core/falcon.rst b/Documentation/gpu/nova/core/falcon.rst new file mode 100644 index 000000000000..33137082eb6c --- /dev/null +++ b/Documentation/gpu/nova/core/falcon.rst @@ -0,0 +1,158 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================== +Falcon (FAst Logic Controller) +============================== +The following sections describe the Falcon core and the ucode running on it. +The descriptions are based on the Ampere GPU or earlier designs; however, they +should mostly apply to future designs as well, but everything is subject to +change. The overview provided here is mainly tailored towards understanding the +interactions of nova-core driver with the Falcon. + +NVIDIA GPUs embed small RISC-like microcontrollers called Falcon cores, which +handle secure firmware tasks, initialization, and power management. Modern +NVIDIA GPUs may have multiple such Falcon instances (e.g., GSP (the GPU system +processor) and SEC2 (the security engine)) and also may integrate a RISC-V core. +This core is capable of running both RISC-V and Falcon code. + +The code running on the Falcon cores is also called 'ucode', and will be +referred to as such in the following sections. + +Falcons have separate instruction and data memories (IMEM/DMEM) and provide a +small DMA engine (via the FBIF - "Frame Buffer Interface") to load code from +system memory. The nova-core driver must reset and configure the Falcon, load +its firmware via DMA, and start its CPU. + +Falcon security levels +====================== +Falcons can run in Non-secure (NS), Light Secure (LS), or Heavy Secure (HS) +modes. + +Heavy Secured (HS) also known as Privilege Level 3 (PL3) +-------------------------------------------------------- +HS ucode is the most trusted code and has access to pretty much everything on +the chip. The HS binary includes a signature in it which is verified at boot. +This signature verification is done by the hardware itself, thus establishing a +root of trust. For example, the FWSEC-FRTS command (see fwsec.rst) runs on the +GSP in HS mode. FRTS, which involves setting up and loading content into the WPR +(Write Protect Region), has to be done by the HS ucode and cannot be done by the +host CPU or LS ucode. + +Light Secured (LS or PL2) and Non Secured (NS or PL0) +----------------------------------------------------- +These modes are less secure than HS. Like HS, the LS or NS ucode binary also +typically includes a signature in it. To load firmware in LS or NS mode onto a +Falcon, another Falcon needs to be running in HS mode, which also establishes the +root of trust. For example, in the case of an Ampere GPU, the CPU runs the "Booter" +ucode in HS mode on the SEC2 Falcon, which then authenticates and runs the +run-time GSP binary (GSP-RM) in LS mode on the GSP Falcon. Similarly, as an +example, after reset on an Ampere, FWSEC runs on the GSP which then loads the +devinit engine onto the PMU in LS mode. + +Root of trust establishment +--------------------------- +To establish a root of trust, the code running on a Falcon must be immutable and +hardwired into a read-only memory (ROM). This follows industry norms for +verification of firmware. This code is called the Boot ROM (BROM). The nova-core +driver on the CPU communicates with Falcon's Boot ROM through various Falcon +registers prefixed with "BROM" (see regs.rs). + +After nova-core driver reads the necessary ucode from VBIOS, it programs the +BROM and DMA registers to trigger the Falcon to load the HS ucode from the system +memory into the Falcon's IMEM/DMEM. Once the HS ucode is loaded, it is verified +by the Falcon's Boot ROM. + +Once the verified HS code is running on a Falcon, it can verify and load other +LS/NS ucode binaries onto other Falcons and start them. The process of signature +verification is the same as HS; just in this case, the hardware (BROM) doesn't +compute the signature, but the HS ucode does. + +The root of trust is therefore established as follows: + Hardware (Boot ROM running on the Falcon) -> HS ucode -> LS/NS ucode. + +On an Ampere GPU, for example, the boot verification flow is: + Hardware (Boot ROM running on the SEC2) -> + HS ucode (Booter running on the SEC2) -> + LS ucode (GSP-RM running on the GSP) + +.. note:: + While the CPU can load HS ucode onto a Falcon microcontroller and have it + verified by the hardware and run, the CPU itself typically does not load + LS or NS ucode and run it. Loading of LS or NS ucode is done mainly by the + HS ucode. For example, on an Ampere GPU, after the Booter ucode runs on the + SEC2 in HS mode and loads the GSP-RM binary onto the GSP, it needs to run + the "SEC2-RTOS" ucode at runtime. This presents a problem: there is no + component to load the SEC2-RTOS ucode onto the SEC2. The CPU cannot load + LS code, and GSP-RM must run in LS mode. To overcome this, the GSP is + temporarily made to run HS ucode (which is itself loaded by the CPU via + the nova-core driver using a "GSP-provided sequencer") which then loads + the SEC2-RTOS ucode onto the SEC2 in LS mode. The GSP then resumes + running its own GSP-RM LS ucode. + +Falcon memory subsystem and DMA engine +====================================== +Falcons have separate instruction and data memories (IMEM/DMEM) +and contains a small DMA engine called FBDMA (Framebuffer DMA) which does +DMA transfers to/from the IMEM/DMEM memory inside the Falcon via the FBIF +(Framebuffer Interface), to external memory. + +DMA transfers are possible from the Falcon's memory to both the system memory +and the framebuffer memory (VRAM). + +To perform a DMA via the FBDMA, the FBIF is configured to decide how the memory +is accessed (also known as aperture type). In the nova-core driver, this is +determined by the `FalconFbifTarget` enum. + +The IO-PMP block (Input/Output Physical Memory Protection) unit in the Falcon +controls access by the FBDMA to the external memory. + +Conceptual diagram (not exact) of the Falcon and its memory subsystem is as follows:: + + External Memory (Framebuffer / System DRAM) + ^ | + | | + | v + +-----------------------------------------------------+ + | | | + | +---------------+ | | + | | FBIF |-------+ | FALCON + | | (FrameBuffer | Memory Interface | PROCESSOR + | | InterFace) | | + | | Apertures | | + | | Configures | | + | | mem access | | + | +-------^-------+ | + | | | + | | FBDMA uses configured FBIF apertures | + | | to access External Memory + | | + | +-------v--------+ +---------------+ + | | FBDMA | cfg | RISC | + | | (FrameBuffer |<---->| CORE |----->. Direct Core Access + | | DMA Engine) | | | | + | | - Master dev. | | (can run both | | + | +-------^--------+ | Falcon and | | + | | cfg--->| RISC-V code) | | + | | / | | | + | | | +---------------+ | +------------+ + | | | | | BROM | + | | | <--->| (Boot ROM) | + | | / | +------------+ + | | v | + | +---------------+ | + | | IO-PMP | Controls access by FBDMA | + | | (IO Physical | and other IO Masters | + | | Memory Protect) | + | +-------^-------+ | + | | | + | | Protected Access Path for FBDMA | + | v | + | +---------------------------------------+ | + | | Memory | | + | | +---------------+ +------------+ | | + | | | IMEM | | DMEM | |<-----+ + | | | (Instruction | | (Data | | + | | | Memory) | | Memory) | | + | | +---------------+ +------------+ | + | +---------------------------------------+ + +-----------------------------------------------------+ diff --git a/Documentation/gpu/nova/index.rst b/Documentation/gpu/nova/index.rst index e3650f53ff53..e39cb3163581 100644 --- a/Documentation/gpu/nova/index.rst +++ b/Documentation/gpu/nova/index.rst @@ -31,3 +31,4 @@ vGPU manager VFIO driver and the nova-drm driver. core/vbios core/devinit core/fwsec + core/falcon From cdc36b66cd41d0f6e18e86d7aa50554c852f97e2 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Wed, 9 Jul 2025 22:12:24 +0530 Subject: [PATCH 117/358] drm/xe: Expose fan control and voltage regulator version Add sysfs attributes for late binding features which expose bound version to the user. v2: Rework attribute and macro naming (Badal) v3: Drop fancy formatting (Rodrigo) v4: Form version string using local variables (Rodrigo) Signed-off-by: Raag Jadav Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250709164224.2676086-1-raag.jadav@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_sysfs.c | 143 ++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_pcode_api.h | 15 +++ 2 files changed, 157 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index b9440f8c781e..e5fd0cd537bc 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -24,6 +24,12 @@ * * vram_d3cold_threshold - Report/change vram used threshold(in MB) below * which vram save/restore is permissible during runtime D3cold entry/exit. + * + * lb_fan_control_version - Fan control version provisioned by late binding. + * Exposed only if supported by the device. + * + * lb_voltage_regulator_version - Voltage regulator version provisioned by late + * binding. Exposed only if supported by the device. */ static ssize_t @@ -65,6 +71,135 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RW(vram_d3cold_threshold); +static ssize_t +lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap, ver_low = FAN_TABLE, ver_high = FAN_TABLE; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version); + +static ssize_t +lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap, ver_low = VR_CONFIG, ver_high = VR_CONFIG; + u16 major = 0, minor = 0, hotfix = 0, build = 0; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) { + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0), + &ver_low, NULL); + if (ret) + goto out; + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0), + &ver_high, NULL); + if (ret) + goto out; + + major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low); + minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low); + hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high); + build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high); + } +out: + xe_pm_runtime_put(xe); + + return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build); +} +static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version); + +static int late_bind_create_files(struct device *dev) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); + if (ret) + goto out; + } + + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); +out: + xe_pm_runtime_put(xe); + + return ret; +} + +static void late_bind_remove_files(struct device *dev) +{ + struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev)); + struct xe_tile *root = xe_device_get_root_tile(xe); + u32 cap; + int ret; + + xe_pm_runtime_get(xe); + + ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), + &cap, NULL); + if (ret) + goto out; + + if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) + sysfs_remove_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); + + if (REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap)) + sysfs_remove_file(&dev->kobj, &dev_attr_lb_voltage_regulator_version.attr); +out: + xe_pm_runtime_put(xe); +} + /** * DOC: PCIe Gen5 Limitations * @@ -151,8 +286,10 @@ static void xe_device_sysfs_fini(void *arg) if (xe->d3cold.capable) sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr); - if (xe->info.platform == XE_BATTLEMAGE) + if (xe->info.platform == XE_BATTLEMAGE) { sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs); + late_bind_remove_files(xe->drm.dev); + } } int xe_device_sysfs_init(struct xe_device *xe) @@ -170,6 +307,10 @@ int xe_device_sysfs_init(struct xe_device *xe) ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs); if (ret) return ret; + + ret = late_bind_create_files(dev); + if (ret) + return ret; } return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe); diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h index 0befdea77db1..92bfcba51e19 100644 --- a/drivers/gpu/drm/xe/xe_pcode_api.h +++ b/drivers/gpu/drm/xe/xe_pcode_api.h @@ -50,6 +50,21 @@ #define READ_PL_FROM_FW 0x1 #define READ_PL_FROM_PCODE 0x0 +#define PCODE_LATE_BINDING 0x5C +#define GET_CAPABILITY_STATUS 0x0 +#define V1_FAN_SUPPORTED REG_BIT(0) +#define VR_PARAMS_SUPPORTED REG_BIT(3) +#define V1_FAN_PROVISIONED REG_BIT(16) +#define VR_PARAMS_PROVISIONED REG_BIT(19) +#define GET_VERSION_LOW 0x1 +#define GET_VERSION_HIGH 0x2 +#define MAJOR_VERSION_MASK REG_GENMASK(31, 16) +#define MINOR_VERSION_MASK REG_GENMASK(15, 0) +#define HOTFIX_VERSION_MASK REG_GENMASK(31, 16) +#define BUILD_VERSION_MASK REG_GENMASK(15, 0) +#define FAN_TABLE 1 +#define VR_CONFIG 2 + #define PCODE_FREQUENCY_CONFIG 0x6e /* Frequency Config Sub Commands (param1) */ #define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0 From 89cd027c94ab8ede68f61920c84478c5becf07ca Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 4 Jun 2025 21:00:20 +0200 Subject: [PATCH 118/358] drm/xe/pf: Print runtime registers using debug printer While we already print VF's runtime registers only under DEBUG_SRIOV config, we should be still doing it at debug level, not info. Signed-off-by: Michal Wajdeczko Reviewed-by: Lukasz Laguna Link: https://lore.kernel.org/r/20250604190021.725-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index 821cfcc34e6b..fa74b3e1a964 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -266,7 +266,7 @@ static void pf_prepare_runtime_info(struct xe_gt *gt) read_many(gt, size, regs, values); if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); xe_gt_sriov_pf_service_print_runtime(gt, &p); } From 1fbe023d30da84d11299dfab496f40daae423940 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 3 Jul 2025 16:57:09 +0200 Subject: [PATCH 119/358] drm/xe/pf: Print configuration KLVs using debug printer While we print VF's configuration KLVs only under DEBUG_SRIOV config, we should be doing it at debug level, not info level. Signed-off-by: Michal Wajdeczko Cc: Lukasz Laguna Reviewed-by: Lukasz Laguna Link: https://lore.kernel.org/r/20250703145709.1832-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index d186f780885d..d0cf1d80be07 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -104,13 +104,13 @@ static int pf_push_vf_buf_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs } if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); void *klvs = xe_guc_buf_cpu_ptr(buf); char name[8]; - xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n", - xe_sriov_function_name(vfid, name, sizeof(name)), - num_klvs, str_plural(num_klvs)); + xe_gt_sriov_dbg(gt, "pushed %s config with %u KLV%s:\n", + xe_sriov_function_name(vfid, name, sizeof(name)), + num_klvs, str_plural(num_klvs)); xe_guc_klv_print(klvs, num_dwords, &p); } @@ -2349,7 +2349,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return -EINVAL; if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) { - struct drm_printer p = xe_gt_info_printer(gt); + struct drm_printer p = xe_gt_dbg_printer(gt); drm_printf(&p, "restoring VF%u config:\n", vfid); xe_guc_klv_print(buf, size / sizeof(u32), &p); From 621a4220793b6d7bf29c66e2d82b9290225b2cf6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 2 Jul 2025 16:25:04 +0200 Subject: [PATCH 120/358] drm/xe/guc: Don't allocate temporary policies object Since we are already using reusable buffer objects from the GuC buffer cache, we can directly write into their CPU pointers and spare unnecessary temporary allocation. While around, also make sure to clear obtained buffer, to avoid sending some stale data. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250702142504.1656-1-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ads.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index b4d81f4bd548..131cfc56be00 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -995,16 +995,6 @@ static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_off return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0); } -static int guc_ads_update_policies(struct xe_guc_ads *ads, const struct guc_policies *policies) -{ - CLASS(xe_guc_buf_from_data, buf)(&ads_to_guc(ads)->buf, policies, sizeof(*policies)); - - if (!xe_guc_buf_is_valid(buf)) - return -ENOBUFS; - - return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); -} - /** * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy * @ads: Additional data structures object @@ -1015,13 +1005,16 @@ static int guc_ads_update_policies(struct xe_guc_ads *ads, const struct guc_poli */ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) { - struct xe_device *xe = ads_to_xe(ads); struct guc_policies *policies; - int ret; + struct xe_guc *guc = ads_to_guc(ads); + struct xe_device *xe = ads_to_xe(ads); + CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies)); - policies = kmalloc(sizeof(*policies), GFP_KERNEL); - if (!policies) - return -ENOMEM; + if (!xe_guc_buf_is_valid(buf)) + return -ENOBUFS; + + policies = xe_guc_buf_cpu_ptr(buf); + memset(policies, 0, sizeof(*policies)); policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time); policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items); @@ -1031,7 +1024,5 @@ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads) else policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET; - ret = guc_ads_update_policies(ads, policies); - kfree(policies); - return ret; + return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf)); } From 22290cc904d9f5b882748573a16ccf8c6d632d92 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 1 Jul 2025 15:22:48 +0300 Subject: [PATCH 121/358] i2c: designware: Use polling by default when there is no irq resource The irq resource itself can be used as a generic way to determine when polling is needed. This not only removes the need for special additional device properties that would soon be needed when the platform may or may not have the irq, but it also removes the need to check the platform in the first place in order to determine is polling needed or not. Signed-off-by: Heikki Krogerus Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20250701122252.2590230-2-heikki.krogerus@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/i2c/busses/i2c-designware-platdrv.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 879719e91df2..3104f52e32be 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -205,25 +205,28 @@ static void i2c_dw_remove_lock_support(struct dw_i2c_dev *dev) static int dw_i2c_plat_probe(struct platform_device *pdev) { + u32 flags = (uintptr_t)device_get_match_data(&pdev->dev); struct device *device = &pdev->dev; struct i2c_adapter *adap; struct dw_i2c_dev *dev; int irq, ret; - irq = platform_get_irq(pdev, 0); - if (irq < 0) + irq = platform_get_irq_optional(pdev, 0); + if (irq == -ENXIO) + flags |= ACCESS_POLLING; + else if (irq < 0) return irq; dev = devm_kzalloc(device, sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; - dev->flags = (uintptr_t)device_get_match_data(device); if (device_property_present(device, "wx,i2c-snps-model")) - dev->flags = MODEL_WANGXUN_SP | ACCESS_POLLING; + flags = MODEL_WANGXUN_SP | ACCESS_POLLING; dev->dev = device; dev->irq = irq; + dev->flags = flags; platform_set_drvdata(pdev, dev); ret = dw_i2c_plat_request_regs(dev); From f6a8e9f3de4567c71ef9f5f13719df69a8b96081 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 1 Jul 2025 15:22:49 +0300 Subject: [PATCH 122/358] i2c: designware: Add quirk for Intel Xe The regmap is coming from the parent also in case of Xe GPUs. Reusing the Wangxun quirk for that. Acked-by: Jarkko Nikula Co-developed-by: Michael J. Ruhl Signed-off-by: Michael J. Ruhl Signed-off-by: Heikki Krogerus Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20250701122252.2590230-3-heikki.krogerus@linux.intel.com Signed-off-by: Rodrigo Vivi [Rodrigo fixed the co-developed tags while merging] --- drivers/i2c/busses/i2c-designware-platdrv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 3104f52e32be..a35e4c64a1d4 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -101,7 +101,7 @@ static int bt1_i2c_request_regs(struct dw_i2c_dev *dev) } #endif -static int txgbe_i2c_request_regs(struct dw_i2c_dev *dev) +static int dw_i2c_get_parent_regmap(struct dw_i2c_dev *dev) { dev->map = dev_get_regmap(dev->dev->parent, NULL); if (!dev->map) @@ -123,12 +123,15 @@ static int dw_i2c_plat_request_regs(struct dw_i2c_dev *dev) struct platform_device *pdev = to_platform_device(dev->dev); int ret; + if (device_is_compatible(dev->dev, "intel,xe-i2c")) + return dw_i2c_get_parent_regmap(dev); + switch (dev->flags & MODEL_MASK) { case MODEL_BAIKAL_BT1: ret = bt1_i2c_request_regs(dev); break; case MODEL_WANGXUN_SP: - ret = txgbe_i2c_request_regs(dev); + ret = dw_i2c_get_parent_regmap(dev); break; default: dev->base = devm_platform_ioremap_resource(pdev, 0); From f0e53aadd702c64b2c2090996751c9be043f9e80 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 1 Jul 2025 15:22:50 +0300 Subject: [PATCH 123/358] drm/xe: Support for I2C attached MCUs Adding adaption/glue layer where the I2C host adapter (Synopsys DesignWare I2C adapter) and the I2C clients (the microcontroller units) are enumerated. The microcontroller units (MCU) that are attached to the GPU depend on the OEM. The initially supported MCU will be the Add-In Management Controller (AMC). Co-developed-by: Michael J. Ruhl Signed-off-by: Michael J. Ruhl Signed-off-by: Heikki Krogerus Reviewed-by: Rodrigo Vivi Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20250701122252.2590230-4-heikki.krogerus@linux.intel.com Signed-off-by: Rodrigo Vivi [Rodrigo fixed the co-developed tags and SPDX format in the .c file] --- drivers/gpu/drm/xe/Kconfig | 1 + drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/regs/xe_i2c_regs.h | 15 ++ drivers/gpu/drm/xe/regs/xe_irq_regs.h | 1 + drivers/gpu/drm/xe/regs/xe_pmt.h | 2 +- drivers/gpu/drm/xe/regs/xe_regs.h | 2 + drivers/gpu/drm/xe/xe_device.c | 5 + drivers/gpu/drm/xe/xe_device_types.h | 4 + drivers/gpu/drm/xe/xe_i2c.c | 300 ++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_i2c.h | 58 +++++ drivers/gpu/drm/xe/xe_irq.c | 2 + 11 files changed, 390 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/regs/xe_i2c_regs.h create mode 100644 drivers/gpu/drm/xe/xe_i2c.c create mode 100644 drivers/gpu/drm/xe/xe_i2c.h diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig index f66e6d39e319..2bb2bc052120 100644 --- a/drivers/gpu/drm/xe/Kconfig +++ b/drivers/gpu/drm/xe/Kconfig @@ -45,6 +45,7 @@ config DRM_XE select WANT_DEV_COREDUMP select AUXILIARY_BUS select HMM_MIRROR + select REGMAP if I2C help Driver for Intel Xe2 series GPUs and later. Experimental support for Xe series is also available. diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 7c039caefd00..d52cf5808d6f 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -125,6 +125,7 @@ xe-y += xe_bb.o \ xe_wait_user_fence.o \ xe_wopcm.o +xe-$(CONFIG_I2C) += xe_i2c.o xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o diff --git a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h new file mode 100644 index 000000000000..92dae4487614 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_REGS_H_ +#define _XE_I2C_REGS_H_ + +#include "xe_reg_defs.h" +#include "xe_regs.h" + +#define I2C_BRIDGE_OFFSET (SOC_BASE + 0xd9000) +#define I2C_CONFIG_SPACE_OFFSET (SOC_BASE + 0xf6000) +#define I2C_MEM_SPACE_OFFSET (SOC_BASE + 0xf7400) + +#define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164) +#define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168) + +#endif /* _XE_I2C_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h index f0ecfcac4003..13635e4331d4 100644 --- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h @@ -19,6 +19,7 @@ #define MASTER_IRQ REG_BIT(31) #define GU_MISC_IRQ REG_BIT(29) #define DISPLAY_IRQ REG_BIT(16) +#define I2C_IRQ REG_BIT(12) #define GT_DW_IRQ(x) REG_BIT(x) /* diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h index b0efd9b48d1e..2995d72c3f78 100644 --- a/drivers/gpu/drm/xe/regs/xe_pmt.h +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -5,7 +5,7 @@ #ifndef _XE_PMT_H_ #define _XE_PMT_H_ -#define SOC_BASE 0x280000 +#include "xe_regs.h" #define BMG_PMT_BASE_OFFSET 0xDB000 #define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h index 3abb17d2ca33..1926b4044314 100644 --- a/drivers/gpu/drm/xe/regs/xe_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_regs.h @@ -7,6 +7,8 @@ #include "regs/xe_reg_defs.h" +#define SOC_BASE 0x280000 + #define GU_CNTL_PROTECTED XE_REG(0x10100C) #define DRIVERINT_FLR_DIS REG_BIT(31) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 0b73cb72bad1..6db09cfc8eb8 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -43,6 +43,7 @@ #include "xe_guc_pc.h" #include "xe_hw_engine_group.h" #include "xe_hwmon.h" +#include "xe_i2c.h" #include "xe_irq.h" #include "xe_mmio.h" #include "xe_module.h" @@ -902,6 +903,10 @@ int xe_device_probe(struct xe_device *xe) if (err) goto err_unregister_display; + err = xe_i2c_probe(xe); + if (err) + goto err_unregister_display; + for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 78c4acafd268..decc749fbf70 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -34,6 +34,7 @@ struct dram_info; struct intel_display; struct intel_dg_nvm_dev; struct xe_ggtt; +struct xe_i2c; struct xe_pat_ops; struct xe_pxp; @@ -585,6 +586,9 @@ struct xe_device { /** @pmu: performance monitoring unit */ struct xe_pmu pmu; + /** @i2c: I2C host controller */ + struct xe_i2c *i2c; + /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */ u32 atomic_svm_timeslice_ms; diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c new file mode 100644 index 000000000000..6f05142a8abf --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Intel Xe I2C attached Microcontroller Units (MCU) + * + * Copyright (C) 2025 Intel Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "regs/xe_i2c_regs.h" +#include "regs/xe_irq_regs.h" + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_i2c.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" + +/** + * DOC: Xe I2C devices + * + * Register a platform device for the I2C host controller (Synpsys DesignWare + * I2C) if the registers of that controller are mapped to the MMIO, and also the + * I2C client device for the Add-In Management Controller (the MCU) attached to + * the host controller. + * + * See drivers/i2c/busses/i2c-designware-* for more information on the I2C host + * controller. + */ + +static const char adapter_name[] = "i2c_designware"; + +static const struct property_entry xe_i2c_adapter_properties[] = { + PROPERTY_ENTRY_STRING("compatible", "intel,xe-i2c"), + PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_FAST_MODE_PLUS_FREQ), + { } +}; + +static inline void xe_i2c_read_endpoint(struct xe_mmio *mmio, void *ep) +{ + u32 *val = ep; + + val[0] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_PREFIX); + val[1] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_POSTFIX); +} + +static void xe_i2c_client_work(struct work_struct *work) +{ + struct xe_i2c *i2c = container_of(work, struct xe_i2c, work); + struct i2c_board_info info = { + .type = "amc", + .flags = I2C_CLIENT_HOST_NOTIFY, + .addr = i2c->ep.addr[1], + }; + + i2c->client[0] = i2c_new_client_device(i2c->adapter, &info); +} + +static int xe_i2c_notifier(struct notifier_block *nb, unsigned long action, void *data) +{ + struct xe_i2c *i2c = container_of(nb, struct xe_i2c, bus_notifier); + struct i2c_adapter *adapter = i2c_verify_adapter(data); + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE && + adapter && dev->parent == &i2c->pdev->dev) { + i2c->adapter = adapter; + schedule_work(&i2c->work); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static int xe_i2c_register_adapter(struct xe_i2c *i2c) +{ + struct pci_dev *pci = to_pci_dev(i2c->drm_dev); + struct platform_device *pdev; + struct fwnode_handle *fwnode; + int ret; + + fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); + if (!fwnode) + return -ENOMEM; + + /* + * Not using platform_device_register_full() here because we don't have + * a handle to the platform_device before it returns. xe_i2c_notifier() + * uses that handle, but it may be called before + * platform_device_register_full() is done. + */ + pdev = platform_device_alloc(adapter_name, pci_dev_id(pci)); + if (!pdev) { + ret = -ENOMEM; + goto err_fwnode_remove; + } + + if (i2c->adapter_irq) { + struct resource res; + + res = DEFINE_RES_IRQ_NAMED(i2c->adapter_irq, "xe_i2c"); + + ret = platform_device_add_resources(pdev, &res, 1); + if (ret) + goto err_pdev_put; + } + + pdev->dev.parent = i2c->drm_dev; + pdev->dev.fwnode = fwnode; + i2c->adapter_node = fwnode; + i2c->pdev = pdev; + + ret = platform_device_add(pdev); + if (ret) + goto err_pdev_put; + + return 0; + +err_pdev_put: + platform_device_put(pdev); +err_fwnode_remove: + fwnode_remove_software_node(fwnode); + + return ret; +} + +static void xe_i2c_unregister_adapter(struct xe_i2c *i2c) +{ + platform_device_unregister(i2c->pdev); + fwnode_remove_software_node(i2c->adapter_node); +} + +/** + * xe_i2c_irq_handler: Handler for I2C interrupts + * @xe: xe device instance + * @master_ctl: interrupt register + * + * Forward interrupts generated by the I2C host adapter to the I2C host adapter + * driver. + */ +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) +{ + if (!xe->i2c || !xe->i2c->adapter_irq) + return; + + if (master_ctl & I2C_IRQ) + generic_handle_irq_safe(xe->i2c->adapter_irq); +} + +static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq, + irq_hw_number_t hw_irq_num) +{ + irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); + return 0; +} + +static const struct irq_domain_ops xe_i2c_irq_ops = { + .map = xe_i2c_irq_map, +}; + +static int xe_i2c_create_irq(struct xe_i2c *i2c) +{ + struct irq_domain *domain; + + if (!(i2c->ep.capabilities & XE_I2C_EP_CAP_IRQ)) + return 0; + + domain = irq_domain_create_linear(dev_fwnode(i2c->drm_dev), 1, &xe_i2c_irq_ops, NULL); + if (!domain) + return -ENOMEM; + + i2c->adapter_irq = irq_create_mapping(domain, 0); + i2c->irqdomain = domain; + + return 0; +} + +static void xe_i2c_remove_irq(struct xe_i2c *i2c) +{ + if (!i2c->irqdomain) + return; + + irq_dispose_mapping(i2c->adapter_irq); + irq_domain_remove(i2c->irqdomain); +} + +static int xe_i2c_read(void *context, unsigned int reg, unsigned int *val) +{ + struct xe_i2c *i2c = context; + + *val = xe_mmio_read32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET)); + + return 0; +} + +static int xe_i2c_write(void *context, unsigned int reg, unsigned int val) +{ + struct xe_i2c *i2c = context; + + xe_mmio_write32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET), val); + + return 0; +} + +static const struct regmap_config i2c_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_read = xe_i2c_read, + .reg_write = xe_i2c_write, + .fast_io = true, +}; + +static void xe_i2c_remove(void *data) +{ + struct xe_i2c *i2c = data; + unsigned int i; + + for (i = 0; i < XE_I2C_MAX_CLIENTS; i++) + i2c_unregister_device(i2c->client[i]); + + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + xe_i2c_unregister_adapter(i2c); + xe_i2c_remove_irq(i2c); +} + +/** + * xe_i2c_probe: Probe the I2C host adapter and the I2C clients attached to it + * @xe: xe device instance + * + * Register all the I2C devices described in the I2C Endpoint data structure. + * + * Return: 0 on success, error code on failure + */ +int xe_i2c_probe(struct xe_device *xe) +{ + struct device *drm_dev = xe->drm.dev; + struct xe_i2c_endpoint ep; + struct regmap *regmap; + struct xe_i2c *i2c; + int ret; + + if (xe->info.platform != XE_BATTLEMAGE) + return 0; + + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); + if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return 0; + + i2c = devm_kzalloc(drm_dev, sizeof(*i2c), GFP_KERNEL); + if (!i2c) + return -ENOMEM; + + INIT_WORK(&i2c->work, xe_i2c_client_work); + i2c->mmio = xe_root_tile_mmio(xe); + i2c->drm_dev = drm_dev; + i2c->ep = ep; + + regmap = devm_regmap_init(drm_dev, NULL, i2c, &i2c_regmap_config); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + i2c->bus_notifier.notifier_call = xe_i2c_notifier; + ret = bus_register_notifier(&i2c_bus_type, &i2c->bus_notifier); + if (ret) + return ret; + + ret = xe_i2c_create_irq(i2c); + if (ret) + goto err_unregister_notifier; + + ret = xe_i2c_register_adapter(i2c); + if (ret) + goto err_remove_irq; + + return devm_add_action_or_reset(drm_dev, xe_i2c_remove, i2c); + +err_remove_irq: + xe_i2c_remove_irq(i2c); + +err_unregister_notifier: + bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier); + + return ret; +} diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h new file mode 100644 index 000000000000..7ea40f4e4aa4 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_i2c.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef _XE_I2C_H_ +#define _XE_I2C_H_ + +#include +#include +#include +#include + +struct device; +struct fwnode_handle; +struct i2c_adapter; +struct i2c_client; +struct irq_domain; +struct platform_device; +struct xe_device; +struct xe_mmio; + +#define XE_I2C_MAX_CLIENTS 3 + +#define XE_I2C_EP_COOKIE_DEVICE 0xde + +/* Endpoint Capabilities */ +#define XE_I2C_EP_CAP_IRQ BIT(0) + +struct xe_i2c_endpoint { + u8 cookie; + u8 capabilities; + u16 addr[XE_I2C_MAX_CLIENTS]; +}; + +struct xe_i2c { + struct fwnode_handle *adapter_node; + struct platform_device *pdev; + struct i2c_adapter *adapter; + struct i2c_client *client[XE_I2C_MAX_CLIENTS]; + + struct notifier_block bus_notifier; + struct work_struct work; + + struct irq_domain *irqdomain; + int adapter_irq; + + struct xe_i2c_endpoint ep; + struct device *drm_dev; + + struct xe_mmio *mmio; +}; + +#if IS_ENABLED(CONFIG_I2C) +int xe_i2c_probe(struct xe_device *xe); +void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); +#else +static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } +static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } +#endif + +#endif diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 5362d3174b06..c43e62dc692e 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -18,6 +18,7 @@ #include "xe_gt.h" #include "xe_guc.h" #include "xe_hw_engine.h" +#include "xe_i2c.h" #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_pxp.h" @@ -476,6 +477,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) if (xe->info.has_heci_cscfi) xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); + xe_i2c_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); } } From 0ea07b69517a16808ea700b3226ddfa4484f23b9 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Tue, 1 Jul 2025 15:22:51 +0300 Subject: [PATCH 124/358] drm/xe/pm: Wire up suspend/resume for I2C controller Wire up suspend/resume handles for I2C controller to match its power state with SGUnit. Signed-off-by: Raag Jadav Signed-off-by: Heikki Krogerus Reviewed-by: Karthik Poosa Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20250701122252.2590230-5-heikki.krogerus@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_i2c_regs.h | 5 +++++ drivers/gpu/drm/xe/xe_i2c.c | 29 +++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_i2c.h | 4 ++++ drivers/gpu/drm/xe/xe_pm.c | 9 +++++++++ 4 files changed, 47 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h index 92dae4487614..af781c8e4a80 100644 --- a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h @@ -2,6 +2,8 @@ #ifndef _XE_I2C_REGS_H_ #define _XE_I2C_REGS_H_ +#include + #include "xe_reg_defs.h" #include "xe_regs.h" @@ -12,4 +14,7 @@ #define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164) #define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168) +#define I2C_CONFIG_CMD XE_REG(I2C_CONFIG_SPACE_OFFSET + PCI_COMMAND) +#define I2C_CONFIG_PMCSR XE_REG(I2C_CONFIG_SPACE_OFFSET + 0x84) + #endif /* _XE_I2C_REGS_H_ */ diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 6f05142a8abf..db9c0340be5c 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -226,6 +226,31 @@ static const struct regmap_config i2c_regmap_config = { .fast_io = true, }; +void xe_i2c_pm_suspend(struct xe_device *xe) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return; + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D3hot); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) +{ + struct xe_mmio *mmio = xe_root_tile_mmio(xe); + + if (!xe->i2c || xe->i2c->ep.cookie != XE_I2C_EP_COOKIE_DEVICE) + return; + + if (d3cold) + xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY); + + xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0); + drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR)); +} + static void xe_i2c_remove(void *data) { struct xe_i2c *i2c = data; @@ -270,6 +295,10 @@ int xe_i2c_probe(struct xe_device *xe) i2c->mmio = xe_root_tile_mmio(xe); i2c->drm_dev = drm_dev; i2c->ep = ep; + xe->i2c = i2c; + + /* PCI PM isn't aware of this device, bring it up and match it with SGUnit state. */ + xe_i2c_pm_resume(xe, true); regmap = devm_regmap_init(drm_dev, NULL, i2c, &i2c_regmap_config); if (IS_ERR(regmap)) diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h index 7ea40f4e4aa4..b767ed8ce52b 100644 --- a/drivers/gpu/drm/xe/xe_i2c.h +++ b/drivers/gpu/drm/xe/xe_i2c.h @@ -50,9 +50,13 @@ struct xe_i2c { #if IS_ENABLED(CONFIG_I2C) int xe_i2c_probe(struct xe_device *xe); void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl); +void xe_i2c_pm_suspend(struct xe_device *xe); +void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold); #else static inline int xe_i2c_probe(struct xe_device *xe) { return 0; } static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { } +static inline void xe_i2c_pm_suspend(struct xe_device *xe) { } +static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { } #endif #endif diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index bcfda545e74f..f171a91b849c 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -19,6 +19,7 @@ #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_guc.h" +#include "xe_i2c.h" #include "xe_irq.h" #include "xe_pcode.h" #include "xe_pxp.h" @@ -146,6 +147,8 @@ int xe_pm_suspend(struct xe_device *xe) xe_display_pm_suspend_late(xe); + xe_i2c_pm_suspend(xe); + drm_dbg(&xe->drm, "Device suspended\n"); return 0; @@ -190,6 +193,8 @@ int xe_pm_resume(struct xe_device *xe) if (err) goto err; + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) @@ -487,6 +492,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe) xe_display_pm_runtime_suspend_late(xe); + xe_i2c_pm_suspend(xe); + xe_rpm_lockmap_release(xe); xe_pm_write_callback_task(xe, NULL); return 0; @@ -534,6 +541,8 @@ int xe_pm_runtime_resume(struct xe_device *xe) goto out; } + xe_i2c_pm_resume(xe, xe->d3cold.allowed); + xe_irq_resume(xe); for_each_gt(gt, xe, id) From f5c5d29522ecb3b6797130995e74e4774caf4548 Mon Sep 17 00:00:00 2001 From: Riana Tauro Date: Tue, 1 Jul 2025 15:22:52 +0300 Subject: [PATCH 125/358] drm/xe/xe_i2c: Add support for i2c in survivability mode Initialize i2c in survivability mode to allow firmware update of Add-In Management Controller (AMC) in survivability mode. Signed-off-by: Riana Tauro Signed-off-by: Heikki Krogerus Reviewed-by: Raag Jadav Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20250701122252.2590230-6-heikki.krogerus@linux.intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_survivability_mode.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c index 1f710b3fc599..41705f5d52e3 100644 --- a/drivers/gpu/drm/xe/xe_survivability_mode.c +++ b/drivers/gpu/drm/xe/xe_survivability_mode.c @@ -14,6 +14,7 @@ #include "xe_device.h" #include "xe_gt.h" #include "xe_heci_gsc.h" +#include "xe_i2c.h" #include "xe_mmio.h" #include "xe_pcode_api.h" #include "xe_vsec.h" @@ -173,20 +174,22 @@ static int enable_survivability_mode(struct pci_dev *pdev) survivability->mode = true; ret = xe_heci_gsc_init(xe); - if (ret) { - /* - * But if it fails, device can't enter survivability - * so move it back for correct error handling - */ - survivability->mode = false; - return ret; - } + if (ret) + goto err; xe_vsec_init(xe); + ret = xe_i2c_probe(xe); + if (ret) + goto err; + dev_err(dev, "In Survivability Mode\n"); return 0; + +err: + survivability->mode = false; + return ret; } /** From ce3d39fae3d35fc9f09a7d65bffc218fbdebd002 Mon Sep 17 00:00:00 2001 From: Juston Li Date: Wed, 9 Jul 2025 12:23:14 -0700 Subject: [PATCH 126/358] drm/xe/bo: add GPU memory trace points Add TRACE_GPU_MEM tracepoints for tracking global GPU memory usage. These are required by VSR on Android 12+ for reporting GPU driver memory allocations. v5: - Drop process_mem tracking - Set the gpu_id field to dev->primary->index (Lucas, Tvrtko) - Formatting cleanup under 80 columns v3: - Use now configurable CONFIG_TRACE_GPU_MEM instead of adding a per-driver Kconfig (Lucas) v2: - Use u64 as preferred by checkpatch (Tvrtko) - Fix errors in comments/Kconfig description (Tvrtko) - drop redundant "CONFIG" in Kconfig Signed-off-by: Juston Li Reviewed-by: Tvrtko Ursulin Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250709192313.479336-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 17 +++++++++++++++++ drivers/gpu/drm/xe/xe_device_types.h | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 7f8470b22dc9..4e0355d0f406 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -19,6 +19,8 @@ #include +#include + #include "xe_device.h" #include "xe_dma_buf.h" #include "xe_drm_client.h" @@ -418,6 +420,19 @@ static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt) xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0); } +static void update_global_total_pages(struct ttm_device *ttm_dev, + long num_pages) +{ +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + struct xe_device *xe = ttm_to_xe_device(ttm_dev); + u64 global_total_pages = + atomic64_add_return(num_pages, &xe->global_total_pages); + + trace_gpu_mem_total(xe->drm.primary->index, 0, + global_total_pages << PAGE_SHIFT); +#endif +} + static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, u32 page_flags) { @@ -525,6 +540,7 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt, xe_tt->purgeable = false; xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt); + update_global_total_pages(ttm_dev, tt->num_pages); return 0; } @@ -541,6 +557,7 @@ static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt) ttm_pool_free(&ttm_dev->pool, tt); xe_ttm_tt_account_subtract(xe, tt); + update_global_total_pages(ttm_dev, -(long)tt->num_pages); } static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index decc749fbf70..d15b2793629e 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -600,6 +600,14 @@ struct xe_device { u8 vm_inject_error_position; #endif +#if IS_ENABLED(CONFIG_TRACE_GPU_MEM) + /** + * @global_total_pages: global GPU page usage tracked for gpu_mem + * tracepoints + */ + atomic64_t global_total_pages; +#endif + /* private: */ #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY) From bf8bbaefaa6ae0a07971ea57b3208df60e8ad0a4 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 10 Jul 2025 14:54:06 +0200 Subject: [PATCH 127/358] drm/sched: Avoid memory leaks with cancel_job() callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since its inception, the GPU scheduler can leak memory if the driver calls drm_sched_fini() while there are still jobs in flight. The simplest way to solve this in a backwards compatible manner is by adding a new callback, drm_sched_backend_ops.cancel_job(), which instructs the driver to signal the hardware fence associated with the job. Afterwards, the scheduler can safely use the established free_job() callback for freeing the job. Implement the new backend_ops callback cancel_job(). Suggested-by: Tvrtko Ursulin Link: https://lore.kernel.org/dri-devel/20250418113211.69956-1-tvrtko.ursulin@igalia.com/ Reviewed-by: Maíra Canal Acked-by: Tvrtko Ursulin Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250710125412.128476-4-phasta@kernel.org --- drivers/gpu/drm/scheduler/sched_main.c | 34 ++++++++++++++++---------- include/drm/gpu_scheduler.h | 18 ++++++++++++++ 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 81ad40d9582b..a971f0c9e6e0 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1352,6 +1352,18 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, const struct drm_sched_init_ } EXPORT_SYMBOL(drm_sched_init); +static void drm_sched_cancel_remaining_jobs(struct drm_gpu_scheduler *sched) +{ + struct drm_sched_job *job, *tmp; + + /* All other accessors are stopped. No locking necessary. */ + list_for_each_entry_safe_reverse(job, tmp, &sched->pending_list, list) { + sched->ops->cancel_job(job); + list_del(&job->list); + sched->ops->free_job(job); + } +} + /** * drm_sched_fini - Destroy a gpu scheduler * @@ -1359,19 +1371,11 @@ EXPORT_SYMBOL(drm_sched_init); * * Tears down and cleans up the scheduler. * - * This stops submission of new jobs to the hardware through - * drm_sched_backend_ops.run_job(). Consequently, drm_sched_backend_ops.free_job() - * will not be called for all jobs still in drm_gpu_scheduler.pending_list. - * There is no solution for this currently. Thus, it is up to the driver to make - * sure that: - * - * a) drm_sched_fini() is only called after for all submitted jobs - * drm_sched_backend_ops.free_job() has been called or that - * b) the jobs for which drm_sched_backend_ops.free_job() has not been called - * after drm_sched_fini() ran are freed manually. - * - * FIXME: Take care of the above problem and prevent this function from leaking - * the jobs in drm_gpu_scheduler.pending_list under any circumstances. + * This stops submission of new jobs to the hardware through &struct + * drm_sched_backend_ops.run_job. If &struct drm_sched_backend_ops.cancel_job + * is implemented, all jobs will be canceled through it and afterwards cleaned + * up through &struct drm_sched_backend_ops.free_job. If cancel_job is not + * implemented, memory could leak. */ void drm_sched_fini(struct drm_gpu_scheduler *sched) { @@ -1401,6 +1405,10 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&sched->work_tdr); + /* Avoid memory leaks if supported by the driver. */ + if (sched->ops->cancel_job) + drm_sched_cancel_remaining_jobs(sched); + if (sched->own_submit_wq) destroy_workqueue(sched->submit_wq); sched->ready = false; diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index e62a7214e052..190844370f48 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -512,6 +512,24 @@ struct drm_sched_backend_ops { * and it's time to clean it up. */ void (*free_job)(struct drm_sched_job *sched_job); + + /** + * @cancel_job: Used by the scheduler to guarantee remaining jobs' fences + * get signaled in drm_sched_fini(). + * + * Used by the scheduler to cancel all jobs that have not been executed + * with &struct drm_sched_backend_ops.run_job by the time + * drm_sched_fini() gets invoked. + * + * Drivers need to signal the passed job's hardware fence with an + * appropriate error code (e.g., -ECANCELED) in this callback. They + * must not free the job. + * + * The scheduler will only call this callback once it stopped calling + * all other callbacks forever, with the exception of &struct + * drm_sched_backend_ops.free_job. + */ + void (*cancel_job)(struct drm_sched_job *sched_job); }; /** From 4576de9b79779a6f49ecac829ff5d8984eeb5d0b Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 10 Jul 2025 14:54:07 +0200 Subject: [PATCH 128/358] drm/sched/tests: Implement cancel_job() callback The GPU Scheduler now supports a new callback, cancel_job(), which lets the scheduler cancel all jobs which might not yet be freed when drm_sched_fini() runs. Using this callback allows for significantly simplifying the mock scheduler teardown code. Implement the cancel_job() callback and adjust the code where necessary. Reviewed-by: Tvrtko Ursulin Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250710125412.128476-5-phasta@kernel.org --- .../gpu/drm/scheduler/tests/mock_scheduler.c | 68 +++++++------------ drivers/gpu/drm/scheduler/tests/sched_tests.h | 1 - 2 files changed, 25 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c index 49d067fecd67..0d1d57213e05 100644 --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -63,7 +63,7 @@ static void drm_mock_sched_job_complete(struct drm_mock_sched_job *job) lockdep_assert_held(&sched->lock); job->flags |= DRM_MOCK_SCHED_JOB_DONE; - list_move_tail(&job->link, &sched->done_list); + list_del(&job->link); dma_fence_signal_locked(&job->hw_fence); complete(&job->done); } @@ -236,26 +236,41 @@ mock_sched_timedout_job(struct drm_sched_job *sched_job) static void mock_sched_free_job(struct drm_sched_job *sched_job) { - struct drm_mock_scheduler *sched = - drm_sched_to_mock_sched(sched_job->sched); struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); - unsigned long flags; - /* Remove from the scheduler done list. */ - spin_lock_irqsave(&sched->lock, flags); - list_del(&job->link); - spin_unlock_irqrestore(&sched->lock, flags); dma_fence_put(&job->hw_fence); - drm_sched_job_cleanup(sched_job); /* Mock job itself is freed by the kunit framework. */ } +static void mock_sched_cancel_job(struct drm_sched_job *sched_job) +{ + struct drm_mock_scheduler *sched = drm_sched_to_mock_sched(sched_job->sched); + struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); + unsigned long flags; + + hrtimer_cancel(&job->timer); + + spin_lock_irqsave(&sched->lock, flags); + if (!dma_fence_is_signaled_locked(&job->hw_fence)) { + list_del(&job->link); + dma_fence_set_error(&job->hw_fence, -ECANCELED); + dma_fence_signal_locked(&job->hw_fence); + } + spin_unlock_irqrestore(&sched->lock, flags); + + /* + * The GPU Scheduler will call drm_sched_backend_ops.free_job(), still. + * Mock job itself is freed by the kunit framework. + */ +} + static const struct drm_sched_backend_ops drm_mock_scheduler_ops = { .run_job = mock_sched_run_job, .timedout_job = mock_sched_timedout_job, - .free_job = mock_sched_free_job + .free_job = mock_sched_free_job, + .cancel_job = mock_sched_cancel_job, }; /** @@ -289,7 +304,6 @@ struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, long timeout) sched->hw_timeline.context = dma_fence_context_alloc(1); atomic_set(&sched->hw_timeline.next_seqno, 0); INIT_LIST_HEAD(&sched->job_list); - INIT_LIST_HEAD(&sched->done_list); spin_lock_init(&sched->lock); return sched; @@ -304,38 +318,6 @@ struct drm_mock_scheduler *drm_mock_sched_new(struct kunit *test, long timeout) */ void drm_mock_sched_fini(struct drm_mock_scheduler *sched) { - struct drm_mock_sched_job *job, *next; - unsigned long flags; - LIST_HEAD(list); - - drm_sched_wqueue_stop(&sched->base); - - /* Force complete all unfinished jobs. */ - spin_lock_irqsave(&sched->lock, flags); - list_for_each_entry_safe(job, next, &sched->job_list, link) - list_move_tail(&job->link, &list); - spin_unlock_irqrestore(&sched->lock, flags); - - list_for_each_entry(job, &list, link) - hrtimer_cancel(&job->timer); - - spin_lock_irqsave(&sched->lock, flags); - list_for_each_entry_safe(job, next, &list, link) - drm_mock_sched_job_complete(job); - spin_unlock_irqrestore(&sched->lock, flags); - - /* - * Free completed jobs and jobs not yet processed by the DRM scheduler - * free worker. - */ - spin_lock_irqsave(&sched->lock, flags); - list_for_each_entry_safe(job, next, &sched->done_list, link) - list_move_tail(&job->link, &list); - spin_unlock_irqrestore(&sched->lock, flags); - - list_for_each_entry_safe(job, next, &list, link) - mock_sched_free_job(&job->base); - drm_sched_fini(&sched->base); } diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h index fbba38137f0c..0eddfb8d89e6 100644 --- a/drivers/gpu/drm/scheduler/tests/sched_tests.h +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h @@ -49,7 +49,6 @@ struct drm_mock_scheduler { spinlock_t lock; struct list_head job_list; - struct list_head done_list; struct { u64 context; From c2668a0e03501a26cedc82e32fe9f3f692d330db Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 10 Jul 2025 14:54:08 +0200 Subject: [PATCH 129/358] drm/sched/tests: Add unit test for cancel_job() The scheduler unit tests now provide a new callback, cancel_job(). This callback gets used by drm_sched_fini() for all still pending jobs to cancel them. Implement a new unit test to test this. Reviewed-by: Tvrtko Ursulin Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250710125412.128476-6-phasta@kernel.org --- drivers/gpu/drm/scheduler/tests/tests_basic.c | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c index 7230057e0594..b1ae10c6bb37 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -204,6 +204,47 @@ static struct kunit_suite drm_sched_basic = { .test_cases = drm_sched_basic_tests, }; +static void drm_sched_basic_cancel(struct kunit *test) +{ + struct drm_mock_sched_entity *entity; + struct drm_mock_scheduler *sched; + struct drm_mock_sched_job *job; + bool done; + + /* + * Check that drm_sched_fini() uses the cancel_job() callback to cancel + * jobs that are still pending. + */ + + sched = drm_mock_sched_new(test, MAX_SCHEDULE_TIMEOUT); + entity = drm_mock_sched_entity_new(test, DRM_SCHED_PRIORITY_NORMAL, + sched); + + job = drm_mock_sched_job_new(test, entity); + + drm_mock_sched_job_submit(job); + + done = drm_mock_sched_job_wait_scheduled(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + drm_mock_sched_entity_free(entity); + drm_mock_sched_fini(sched); + + KUNIT_ASSERT_EQ(test, job->hw_fence.error, -ECANCELED); +} + +static struct kunit_case drm_sched_cancel_tests[] = { + KUNIT_CASE(drm_sched_basic_cancel), + {} +}; + +static struct kunit_suite drm_sched_cancel = { + .name = "drm_sched_basic_cancel_tests", + .init = drm_sched_basic_init, + .exit = drm_sched_basic_exit, + .test_cases = drm_sched_cancel_tests, +}; + static void drm_sched_basic_timeout(struct kunit *test) { struct drm_mock_scheduler *sched = test->priv; @@ -471,6 +512,7 @@ static struct kunit_suite drm_sched_credits = { kunit_test_suites(&drm_sched_basic, &drm_sched_timeout, + &drm_sched_cancel, &drm_sched_priority, &drm_sched_modify_sched, &drm_sched_credits); From d1e5ba835f786fe83ecfbf09ba5c1a0060e87b6e Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 10 Jul 2025 14:54:09 +0200 Subject: [PATCH 130/358] drm/sched: Warn if pending_list is not empty drm_sched_fini() can leak jobs under certain circumstances. Warn if that happens. Acked-by: Danilo Krummrich Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250710125412.128476-7-phasta@kernel.org --- drivers/gpu/drm/scheduler/sched_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index a971f0c9e6e0..d94270cf8bf5 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -1414,6 +1414,9 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) sched->ready = false; kfree(sched->sched_rq); sched->sched_rq = NULL; + + if (!list_empty(&sched->pending_list)) + dev_warn(sched->dev, "Tearing down scheduler while jobs are pending!\n"); } EXPORT_SYMBOL(drm_sched_fini); From 89b2675198abf1879b68c65dc0256d92c9eabb04 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 10 Jul 2025 14:54:10 +0200 Subject: [PATCH 131/358] drm/nouveau: Make fence container helper usable driver-wide In order to implement a new DRM GPU scheduler callback in Nouveau, a helper for obtaining a nouveau_fence from a dma_fence is necessary. Such a helper exists already inside nouveau_fence.c, called from_fence(). Make that helper available to other C files with a more precise name. Acked-by: Danilo Krummrich Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250710125412.128476-8-phasta@kernel.org --- drivers/gpu/drm/nouveau/nouveau_fence.c | 20 +++++++------------- drivers/gpu/drm/nouveau/nouveau_fence.h | 6 ++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index d5654e26d5bc..869d4335c0f4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -38,12 +38,6 @@ static const struct dma_fence_ops nouveau_fence_ops_uevent; static const struct dma_fence_ops nouveau_fence_ops_legacy; -static inline struct nouveau_fence * -from_fence(struct dma_fence *fence) -{ - return container_of(fence, struct nouveau_fence, base); -} - static inline struct nouveau_fence_chan * nouveau_fctx(struct nouveau_fence *fence) { @@ -77,7 +71,7 @@ nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm) fence->ops != &nouveau_fence_ops_uevent) return NULL; - return from_fence(fence); + return to_nouveau_fence(fence); } void @@ -268,7 +262,7 @@ nouveau_fence_done(struct nouveau_fence *fence) static long nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); unsigned long sleep_time = NSEC_PER_MSEC / 1000; unsigned long t = jiffies, timeout = t + wait; @@ -448,7 +442,7 @@ static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence) static const char *nouveau_fence_get_timeline_name(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); return !fctx->dead ? fctx->name : "dead channel"; @@ -462,7 +456,7 @@ static const char *nouveau_fence_get_timeline_name(struct dma_fence *f) */ static bool nouveau_fence_is_signaled(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); struct nouveau_channel *chan; bool ret = false; @@ -478,7 +472,7 @@ static bool nouveau_fence_is_signaled(struct dma_fence *f) static bool nouveau_fence_no_signaling(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); /* * caller should have a reference on the fence, @@ -503,7 +497,7 @@ static bool nouveau_fence_no_signaling(struct dma_fence *f) static void nouveau_fence_release(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); kref_put(&fctx->fence_ref, nouveau_fence_context_put); @@ -521,7 +515,7 @@ static const struct dma_fence_ops nouveau_fence_ops_legacy = { static bool nouveau_fence_enable_signaling(struct dma_fence *f) { - struct nouveau_fence *fence = from_fence(f); + struct nouveau_fence *fence = to_nouveau_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); bool ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 6a983dd9f7b9..183dd43ecfff 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -17,6 +17,12 @@ struct nouveau_fence { unsigned long timeout; }; +static inline struct nouveau_fence * +to_nouveau_fence(struct dma_fence *fence) +{ + return container_of(fence, struct nouveau_fence, base); +} + int nouveau_fence_create(struct nouveau_fence **, struct nouveau_channel *); int nouveau_fence_new(struct nouveau_fence **, struct nouveau_channel *); void nouveau_fence_unref(struct nouveau_fence **); From 5f46f5c7af8c632de3742dc0cf3cb9a05bd52437 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 10 Jul 2025 14:54:11 +0200 Subject: [PATCH 132/358] drm/nouveau: Add new callback for scheduler teardown There is a new callback for always tearing the scheduler down in a leak-free, deadlock-free manner. Port Nouveau as its first user by providing the scheduler with a callback that ensures the fence context gets killed in drm_sched_fini(). Acked-by: Danilo Krummrich Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250710125412.128476-9-phasta@kernel.org --- drivers/gpu/drm/nouveau/nouveau_fence.c | 15 +++++++++++++++ drivers/gpu/drm/nouveau/nouveau_fence.h | 1 + drivers/gpu/drm/nouveau/nouveau_sched.c | 15 ++++++++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 869d4335c0f4..9f345a008717 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -240,6 +240,21 @@ nouveau_fence_emit(struct nouveau_fence *fence) return ret; } +void +nouveau_fence_cancel(struct nouveau_fence *fence) +{ + struct nouveau_fence_chan *fctx = nouveau_fctx(fence); + unsigned long flags; + + spin_lock_irqsave(&fctx->lock, flags); + if (!dma_fence_is_signaled_locked(&fence->base)) { + dma_fence_set_error(&fence->base, -ECANCELED); + if (nouveau_fence_signal(fence)) + nvif_event_block(&fctx->event); + } + spin_unlock_irqrestore(&fctx->lock, flags); +} + bool nouveau_fence_done(struct nouveau_fence *fence) { diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 183dd43ecfff..9957a919bd38 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -29,6 +29,7 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); +void nouveau_fence_cancel(struct nouveau_fence *fence); int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 460a5fb02412..2ec62059c351 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -11,6 +11,7 @@ #include "nouveau_exec.h" #include "nouveau_abi16.h" #include "nouveau_sched.h" +#include "nouveau_chan.h" #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 @@ -393,10 +394,23 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job) nouveau_job_fini(job); } +static void +nouveau_sched_cancel_job(struct drm_sched_job *sched_job) +{ + struct nouveau_fence *fence; + struct nouveau_job *job; + + job = to_nouveau_job(sched_job); + fence = to_nouveau_fence(job->done_fence); + + nouveau_fence_cancel(fence); +} + static const struct drm_sched_backend_ops nouveau_sched_ops = { .run_job = nouveau_sched_run_job, .timedout_job = nouveau_sched_timedout_job, .free_job = nouveau_sched_free_job, + .cancel_job = nouveau_sched_cancel_job, }; static int @@ -482,7 +496,6 @@ nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, return 0; } - static void nouveau_sched_fini(struct nouveau_sched *sched) { From bead8800222768dab1a421206350d530b0c45254 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 10 Jul 2025 14:54:12 +0200 Subject: [PATCH 133/358] drm/nouveau: Remove waitque for sched teardown struct nouveau_sched contains a waitque needed to prevent drm_sched_fini() from being called while there are still jobs pending. Doing so so far would have caused memory leaks. With the new memleak-free mode of operation switched on in drm_sched_fini() by providing the callback nouveau_sched_cancel_job() the waitque is not necessary anymore. Remove the waitque. Acked-by: Danilo Krummrich Signed-off-by: Philipp Stanner Link: https://lore.kernel.org/r/20250710125412.128476-10-phasta@kernel.org --- drivers/gpu/drm/nouveau/nouveau_sched.c | 20 +++++++------------- drivers/gpu/drm/nouveau/nouveau_sched.h | 9 +++------ drivers/gpu/drm/nouveau/nouveau_uvmm.c | 8 ++++---- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 2ec62059c351..7d9c3418e76b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -122,11 +122,9 @@ nouveau_job_done(struct nouveau_job *job) { struct nouveau_sched *sched = job->sched; - spin_lock(&sched->job.list.lock); + spin_lock(&sched->job_list.lock); list_del(&job->entry); - spin_unlock(&sched->job.list.lock); - - wake_up(&sched->job.wq); + spin_unlock(&sched->job_list.lock); } void @@ -307,9 +305,9 @@ nouveau_job_submit(struct nouveau_job *job) } /* Submit was successful; add the job to the schedulers job list. */ - spin_lock(&sched->job.list.lock); - list_add(&job->entry, &sched->job.list.head); - spin_unlock(&sched->job.list.lock); + spin_lock(&sched->job_list.lock); + list_add(&job->entry, &sched->job_list.head); + spin_unlock(&sched->job_list.lock); drm_sched_job_arm(&job->base); job->done_fence = dma_fence_get(&job->base.s_fence->finished); @@ -460,9 +458,8 @@ nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, goto fail_sched; mutex_init(&sched->mutex); - spin_lock_init(&sched->job.list.lock); - INIT_LIST_HEAD(&sched->job.list.head); - init_waitqueue_head(&sched->job.wq); + spin_lock_init(&sched->job_list.lock); + INIT_LIST_HEAD(&sched->job_list.head); return 0; @@ -502,9 +499,6 @@ nouveau_sched_fini(struct nouveau_sched *sched) struct drm_gpu_scheduler *drm_sched = &sched->base; struct drm_sched_entity *entity = &sched->entity; - rmb(); /* for list_empty to work without lock */ - wait_event(sched->job.wq, list_empty(&sched->job.list.head)); - drm_sched_entity_fini(entity); drm_sched_fini(drm_sched); diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h index 20cd1da8db73..b98c3f0bef30 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.h +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h @@ -103,12 +103,9 @@ struct nouveau_sched { struct mutex mutex; struct { - struct { - struct list_head head; - spinlock_t lock; - } list; - struct wait_queue_head wq; - } job; + struct list_head head; + spinlock_t lock; + } job_list; }; int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 48f105239f42..ddfc46bc1b3e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1019,8 +1019,8 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) u64 end = addr + range; again: - spin_lock(&sched->job.list.lock); - list_for_each_entry(__job, &sched->job.list.head, entry) { + spin_lock(&sched->job_list.lock); + list_for_each_entry(__job, &sched->job_list.head, entry) { struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job); list_for_each_op(op, &bind_job->ops) { @@ -1030,7 +1030,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) if (!(end <= op_addr || addr >= op_end)) { nouveau_uvmm_bind_job_get(bind_job); - spin_unlock(&sched->job.list.lock); + spin_unlock(&sched->job_list.lock); wait_for_completion(&bind_job->complete); nouveau_uvmm_bind_job_put(bind_job); goto again; @@ -1038,7 +1038,7 @@ bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) } } } - spin_unlock(&sched->job.list.lock); + spin_unlock(&sched->job_list.lock); } static int From 11895f375939d60efe7ed5dddc1cffe2e79f976c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 7 Jul 2025 23:14:12 +0200 Subject: [PATCH 134/358] drm/i915/bios: Apply vlv_fixup_mipi_sequences() to v2 mipi-sequences too It turns out that the fixup from vlv_fixup_mipi_sequences() is necessary for some DSI panel's with version 2 mipi-sequences too. Specifically the Acer Iconia One 8 A1-840 (not to be confused with the A1-840FHD which is different) has the following sequences: BDB block 53 (1284 bytes) - MIPI sequence block: Sequence block version v2 Panel 0 * Sequence 2 - MIPI_SEQ_INIT_OTP GPIO index 9, source 0, set 0 (0x00) Delay: 50000 us GPIO index 9, source 0, set 1 (0x01) Delay: 6000 us GPIO index 9, source 0, set 0 (0x00) Delay: 6000 us GPIO index 9, source 0, set 1 (0x01) Delay: 25000 us Send DCS: Port A, VC 0, LP, Type 39, Length 5, Data ff aa 55 a5 80 Send DCS: Port A, VC 0, LP, Type 39, Length 3, Data 6f 11 00 ... Send DCS: Port A, VC 0, LP, Type 05, Length 1, Data 29 Delay: 120000 us Sequence 4 - MIPI_SEQ_DISPLAY_OFF Send DCS: Port A, VC 0, LP, Type 05, Length 1, Data 28 Delay: 105000 us Send DCS: Port A, VC 0, LP, Type 05, Length 2, Data 10 00 Delay: 10000 us Sequence 5 - MIPI_SEQ_ASSERT_RESET Delay: 10000 us GPIO index 9, source 0, set 0 (0x00) Notice how there is no MIPI_SEQ_DEASSERT_RESET, instead the deassert is done at the beginning of MIPI_SEQ_INIT_OTP, which is exactly what the fixup from vlv_fixup_mipi_sequences() fixes up. Extend it to also apply to v2 sequences, this fixes the panel not working on the Acer Iconia One 8 A1-840. Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14605 Signed-off-by: Hans de Goede Acked-by: Jani Nikula Link: https://lore.kernel.org/r/20250703143824.7121-1-hansg@kernel.org Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_bios.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 9ce41e689d50..9c268bed091d 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -1939,7 +1939,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, int index, len; if (drm_WARN_ON(display->drm, - !data || panel->vbt.dsi.seq_version != 1)) + !data || panel->vbt.dsi.seq_version >= 3)) return 0; /* index = 1 to skip sequence byte */ @@ -1962,7 +1962,7 @@ static int get_init_otp_deassert_fragment_len(struct intel_display *display, } /* - * Some v1 VBT MIPI sequences do the deassert in the init OTP sequence. + * Some v1/v2 VBT MIPI sequences do the deassert in the init OTP sequence. * The deassert must be done before calling intel_dsi_device_ready, so for * these devices we split the init OTP sequence into a deassert sequence and * the actual init OTP part. @@ -1973,9 +1973,9 @@ static void vlv_fixup_mipi_sequences(struct intel_display *display, u8 *init_otp; int len; - /* Limit this to v1 vid-mode sequences */ + /* Limit this to v1/v2 vid-mode sequences */ if (panel->vbt.dsi.config->is_cmd_mode || - panel->vbt.dsi.seq_version != 1) + panel->vbt.dsi.seq_version >= 3) return; /* Only do this if there are otp and assert seqs and no deassert seq */ From 40b6a946d21ee7b2b6d394bb2f1cdd3973aa9da5 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Thu, 10 Jul 2025 14:23:09 +0800 Subject: [PATCH 135/358] drm/ttm: add new api ttm_device_prepare_hibernation() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This new api is used for hibernation to move GTT BOs to shmem after VRAM eviction. shmem will be flushed to swap disk later to reduce the system memory usage for hibernation. Signed-off-by: Samuel Zhang Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250710062313.3226149-2-guoqing.zhang@amd.com Signed-off-by: Mario Limonciello --- drivers/gpu/drm/ttm/ttm_device.c | 22 ++++++++++++++++++++++ include/drm/ttm/ttm_device.h | 1 + 2 files changed, 23 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 816e2cba6016..c3e2fcbdd2cc 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -125,6 +125,28 @@ static int ttm_global_init(void) return ret; } +/** + * ttm_device_prepare_hibernation - move GTT BOs to shmem for hibernation. + * + * @bdev: A pointer to a struct ttm_device to prepare hibernation for. + * + * Return: 0 on success, negative number on failure. + */ +int ttm_device_prepare_hibernation(struct ttm_device *bdev) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false, + }; + int ret; + + do { + ret = ttm_device_swapout(bdev, &ctx, GFP_KERNEL); + } while (ret > 0); + return ret; +} +EXPORT_SYMBOL(ttm_device_prepare_hibernation); + /* * A buffer object shrink method that tries to swap out the first * buffer object on the global::swap_lru list. diff --git a/include/drm/ttm/ttm_device.h b/include/drm/ttm/ttm_device.h index 39b8636b1845..592b5f802859 100644 --- a/include/drm/ttm/ttm_device.h +++ b/include/drm/ttm/ttm_device.h @@ -272,6 +272,7 @@ struct ttm_device { int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags); int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags); +int ttm_device_prepare_hibernation(struct ttm_device *bdev); static inline struct ttm_resource_manager * ttm_manager_type(struct ttm_device *bdev, int mem_type) From 924dda024f3bea64be5f3ac067a075e466739dc9 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Thu, 10 Jul 2025 14:23:10 +0800 Subject: [PATCH 136/358] drm/amdgpu: move GTT to shmem after eviction for hibernation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When hibernate with data center dGPUs, huge number of VRAM BOs evicted to GTT and takes too much system memory. This will cause hibernation fail due to insufficient memory for creating the hibernation image. Move GTT BOs to shmem in KMD, then shmem to swap disk in kernel hibernation code to make room for hibernation image. Signed-off-by: Samuel Zhang Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250710062313.3226149-3-guoqing.zhang@amd.com Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d282c0753b14..723ab95d8c48 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5029,8 +5029,16 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) return 0; ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); - if (ret) + if (ret) { dev_warn(adev->dev, "evicting device resources failed\n"); + return ret; + } + + if (adev->in_s4) { + ret = ttm_device_prepare_hibernation(&adev->mman.bdev); + if (ret) + dev_err(adev->dev, "prepare hibernation failed, %d\n", ret); + } return ret; } From 2640e819474f4a9ec78aa3cdb9063e4b5cf18ae4 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Thu, 10 Jul 2025 14:23:11 +0800 Subject: [PATCH 137/358] PM: hibernate: shrink shmem pages after dev_pm_ops.prepare() When hibernate with data center dGPUs, huge number of VRAM data will be moved to shmem during dev_pm_ops.prepare(). These shmem pages take a lot of system memory so that there's no enough free memory for creating the hibernation image. This will cause hibernation fail and abort. After dev_pm_ops.prepare(), call shrink_all_memory() to force move shmem pages to swap disk and reclaim the pages, so that there's enough system memory for hibernation image and less pages needed to copy to the image. This patch can only flush and free about half shmem pages. It will be better to flush and free more pages, even all of shmem pages, so that there're less pages to be copied to the hibernation image and the overall hibernation time can be reduced. Signed-off-by: Samuel Zhang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250710062313.3226149-4-guoqing.zhang@amd.com Signed-off-by: Mario Limonciello --- kernel/power/hibernate.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 519fb09de5e0..e526237a8c0f 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -381,6 +381,23 @@ static int create_image(int platform_mode) return error; } +static void shrink_shmem_memory(void) +{ + struct sysinfo info; + unsigned long nr_shmem_pages, nr_freed_pages; + + si_meminfo(&info); + nr_shmem_pages = info.sharedram; /* current page count used for shmem */ + /* + * The intent is to reclaim all shmem pages. Though shrink_all_memory() can + * only reclaim about half of them, it's enough for creating the hibernation + * image. + */ + nr_freed_pages = shrink_all_memory(nr_shmem_pages); + pr_debug("requested to reclaim %lu shmem pages, actually freed %lu pages\n", + nr_shmem_pages, nr_freed_pages); +} + /** * hibernation_snapshot - Quiesce devices and create a hibernation image. * @platform_mode: If set, use platform driver to prepare for the transition. @@ -422,6 +439,15 @@ int hibernation_snapshot(int platform_mode) goto Thaw; } + /* + * Device drivers may move lots of data to shmem in dpm_prepare(). The shmem + * pages will use lots of system memory, causing hibernation image creation + * fail due to insufficient free memory. + * This call is to force flush the shmem pages to swap disk and reclaim + * the system memory so that image creation can succeed. + */ + shrink_shmem_memory(); + console_suspend_all(); pm_restrict_gfp_mask(); From c2aaddbd2deded9d3301f1bafed242a0f71baba8 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Thu, 10 Jul 2025 14:23:12 +0800 Subject: [PATCH 138/358] PM: hibernate: add new api pm_hibernate_is_recovering() dev_pm_ops.thaw() is called in following cases: * normal case: after hibernation image has been created. * error case 1: creation of a hibernation image has failed. * error case 2: restoration from a hibernation image has failed. For normal case, it is called mainly for resume storage devices for saving the hibernation image. Other devices that are not involved in the image saving do not need to resume the device. But since there's no api to know which case thaw() is called, device drivers can't conditionally resume device in thaw(). The new pm_hibernate_is_recovering() is such a api to query if thaw() is called in normal case. Signed-off-by: Samuel Zhang Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/20250710062313.3226149-5-guoqing.zhang@amd.com Signed-off-by: Mario Limonciello --- drivers/base/power/main.c | 14 ++++++++++++++ include/linux/suspend.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index eebe699fdf4f..63f3a48f434c 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -66,6 +66,20 @@ static pm_message_t pm_transition; static DEFINE_MUTEX(async_wip_mtx); static int async_error; +/** + * pm_hibernate_is_recovering - if recovering from hibernate due to error. + * + * Used to query if dev_pm_ops.thaw() is called for normal hibernation case or + * recovering from some error. + * + * Return: true for error case, false for normal case. + */ +bool pm_hibernate_is_recovering(void) +{ + return pm_transition.event == PM_EVENT_RECOVER; +} +EXPORT_SYMBOL_GPL(pm_hibernate_is_recovering); + static const char *pm_verb(int event) { switch (event) { diff --git a/include/linux/suspend.h b/include/linux/suspend.h index b1c76c8f2c82..293137210fdf 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -426,6 +426,8 @@ int is_hibernate_resume_dev(dev_t dev); static inline int is_hibernate_resume_dev(dev_t dev) { return 0; } #endif +bool pm_hibernate_is_recovering(void); + /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ #define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ From 530694f54dd5e097866999bbaebc5c133e5507b6 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Thu, 10 Jul 2025 14:23:13 +0800 Subject: [PATCH 139/358] drm/amdgpu: do not resume device in thaw for normal hibernation For normal hibernation, GPU do not need to be resumed in thaw since it is not involved in writing the hibernation image. Skip resume in this case can reduce the hibernation time. On VM with 8 * 192GB VRAM dGPUs, 98% VRAM usage and 1.7TB system memory, this can save 50 minutes. Signed-off-by: Samuel Zhang Tested-by: Mario Limonciello Reviewed-by: Mario Limonciello Reviewed-by: Lijo Lazar Link: https://lore.kernel.org/r/20250710062313.3226149-6-guoqing.zhang@amd.com Signed-off-by: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b299e15bb5e5..6a39903e63b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2541,6 +2541,10 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) if (amdgpu_ras_intr_triggered()) return; + /* device maybe not resumed here, return immediately in this case */ + if (adev->in_s4 && adev->in_suspend) + return; + /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown. * unfortunately we can't detect certain @@ -2557,6 +2561,10 @@ static int amdgpu_pmops_prepare(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + /* device maybe not resumed here, return immediately in this case */ + if (adev->in_s4 && adev->in_suspend) + return 0; + /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ @@ -2656,12 +2664,21 @@ static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + /* do not resume device if it's normal hibernation */ + if (!pm_hibernate_is_recovering()) + return 0; + return amdgpu_device_resume(drm_dev, true); } static int amdgpu_pmops_poweroff(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + /* device maybe not resumed here, return immediately in this case */ + if (adev->in_s4 && adev->in_suspend) + return 0; return amdgpu_device_suspend(drm_dev, true); } From 1d2e2503e506ddc499cbb7afdc8b70bcf6fe241f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 10 Jul 2025 10:30:39 +0000 Subject: [PATCH 140/358] drm/xe/bmg: Don't use WA 16023588340 and 22019338487 on VF These workarounds are not applicable for use by the VFs. Signed-off-by: Michal Wajdeczko Tested-by: Jakub Kolakowski Reviewed-by: Satyanarayana K V P Signed-off-by: Jakub Kolakowski Link: https://lore.kernel.org/r/20250710103040.375610-2-jakub1.kolakowski@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa_oob.rules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 96cc33da0fb5..e7ed5d583d68 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -40,10 +40,10 @@ GRAPHICS_VERSION_RANGE(3000, 3001) GRAPHICS_VERSION(3003) 22019338487 MEDIA_VERSION(2000) - GRAPHICS_VERSION(2001) + GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf) 22019338487_display PLATFORM(LUNARLAKE) -16023588340 GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf) 14019789679 GRAPHICS_VERSION(1255) GRAPHICS_VERSION_RANGE(1270, 2004) no_media_l3 MEDIA_VERSION(3000) From 0efec0500117947f924e5ac83be40f96378af85a Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Tue, 8 Jul 2025 02:14:51 +0000 Subject: [PATCH 141/358] drm/xe/pm: Correct comment of xe_pm_set_vram_threshold() The parameter threshold is with size in MiB, not in bits. Correct it to avoid any confusion. v2: s/mb/MiB, s/vram/VRAM, fix return section. (Michal) Fixes: 30c399529f4c ("drm/xe: Document Xe PM component") Cc: Michal Wajdeczko Cc: Rodrigo Vivi Signed-off-by: Shuicheng Lin Link: https://lore.kernel.org/r/20250708021450.3602087-2-shuicheng.lin@intel.com Reviewed-by: Stuart Summers Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_pm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index f171a91b849c..e279b47ba03b 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -761,11 +761,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe) } /** - * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold + * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold * @xe: xe device instance - * @threshold: VRAM size in bites for the D3cold threshold + * @threshold: VRAM size in MiB for the D3cold threshold * - * Returns 0 for success, negative error code otherwise. + * Return: + * * 0 - success + * * -EINVAL - invalid argument */ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold) { From 1b822b7f564b8e06ac49509baad4468927a9f852 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 9 Jul 2025 19:40:36 +0200 Subject: [PATCH 142/358] drm/xe/guc: Rename CT state change helper In this helper we are already doing much more than just setting a new CT state and its name was little misleading. Rename it. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Jonathan Cavitt Acked-by: Matthew Brost Link: https://lore.kernel.org/r/20250709174038.1876-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 23e8c155025e..2a052dc22ff2 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -384,7 +384,7 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) return ret > 0 ? -EPROTO : ret; } -static void xe_guc_ct_set_state(struct xe_guc_ct *ct, +static void guc_ct_change_state(struct xe_guc_ct *ct, enum xe_guc_ct_state state) { mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ @@ -469,7 +469,7 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) if (err) goto err_out; - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED); smp_mb(); wake_up_all(&ct->wq); @@ -514,7 +514,7 @@ static void stop_g2h_handler(struct xe_guc_ct *ct) */ void xe_guc_ct_disable(struct xe_guc_ct *ct) { - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED); ct_exit_safe_mode(ct); stop_g2h_handler(ct); } @@ -530,7 +530,7 @@ void xe_guc_ct_stop(struct xe_guc_ct *ct) if (!xe_guc_ct_initialized(ct)) return; - xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); + guc_ct_change_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } From 4ecdcf9caf519fbab1aa01b431339387fed00fb8 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 9 Jul 2025 19:40:37 +0200 Subject: [PATCH 143/358] drm/xe/guc: Move state change logger to helper In the state change helper we are already doing extra stuff, move debug state logger there to cover all state changes. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Jonathan Cavitt Acked-by: Matthew Brost Link: https://lore.kernel.org/r/20250709174038.1876-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 2a052dc22ff2..17e5870baf33 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -387,6 +387,8 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable) static void guc_ct_change_state(struct xe_guc_ct *ct, enum xe_guc_ct_state state) { + struct xe_gt *gt = ct_to_gt(ct); + mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ @@ -398,6 +400,10 @@ static void guc_ct_change_state(struct xe_guc_ct *ct, ct->g2h_outstanding = 0; ct->state = state; + xe_gt_dbg(gt, "GuC CT communication channel %s\n", + state == XE_GUC_CT_STATE_STOPPED ? "stopped" : + str_enabled_disabled(state == XE_GUC_CT_STATE_ENABLED)); + spin_unlock_irq(&ct->fast_lock); /* @@ -473,7 +479,6 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) smp_mb(); wake_up_all(&ct->wq); - xe_gt_dbg(gt, "GuC CT communication channel enabled\n"); if (ct_needs_safe_mode(ct)) ct_enter_safe_mode(ct); From 94de94d24ea8cf567ec7254a723c3192c72c2ca6 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 9 Jul 2025 19:40:38 +0200 Subject: [PATCH 144/358] drm/xe/guc: Cancel ongoing H2G requests when stopping CT Once we have started a GT reset sequence, which includes stopping GuC CTB communication, we should also cancel all ongoing H2G send- recv requests, as either GuC is already dead, or due to imminent reset GuC will not be able to reply, or due to internal cleanup we will lose pending fences. With this we will report dedicated -ECANCELED error instead of misleading -ETIME. Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Reviewed-by: Jonathan Cavitt Acked-by: Matthew Brost Link: https://lore.kernel.org/r/20250709174038.1876-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_guc_ct.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 17e5870baf33..b6acccfcd351 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -85,6 +85,7 @@ struct g2h_fence { u16 error; u16 hint; u16 reason; + bool cancel; bool retry; bool fail; bool done; @@ -103,6 +104,13 @@ static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) g2h_fence->seqno = ~0x0; } +static void g2h_fence_cancel(struct g2h_fence *g2h_fence) +{ + g2h_fence->cancel = true; + g2h_fence->fail = true; + g2h_fence->done = true; +} + static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence) { return g2h_fence->seqno == ~0x0; @@ -388,6 +396,8 @@ static void guc_ct_change_state(struct xe_guc_ct *ct, enum xe_guc_ct_state state) { struct xe_gt *gt = ct_to_gt(ct); + struct g2h_fence *g2h_fence; + unsigned long idx; mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */ spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */ @@ -406,6 +416,14 @@ static void guc_ct_change_state(struct xe_guc_ct *ct, spin_unlock_irq(&ct->fast_lock); + /* cancel all in-flight send-recv requests */ + xa_for_each(&ct->fence_lookup, idx, g2h_fence) + g2h_fence_cancel(g2h_fence); + + /* make sure guc_ct_send_recv() will see g2h_fence changes */ + smp_mb(); + wake_up_all(&ct->g2h_fence_wq); + /* * Lockdep doesn't like this under the fast lock and he destroy only * needs to be serialized with the send path which ct lock provides. @@ -1098,6 +1116,11 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, goto retry; } if (g2h_fence.fail) { + if (g2h_fence.cancel) { + xe_gt_dbg(gt, "H2G request %#x canceled!\n", action[0]); + ret = -ECANCELED; + goto unlock; + } xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n", action[0], g2h_fence.error, g2h_fence.hint); ret = -EIO; @@ -1106,6 +1129,7 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len, if (ret > 0) ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data; +unlock: mutex_unlock(&ct->lock); return ret; From b59df66c0876891ad406844f238f6b322741e521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 4 Jul 2025 16:07:22 -0300 Subject: [PATCH 145/358] drm/doc: Fix title underline for "Task information" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following warning: Documentation/gpu/drm-uapi.rst:450: WARNING: Title underline too short. Task information --------------- [docutils] Fixes: cd37124b4093 ("drm/doc: Add a section about "Task information" for the wedge API") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20250618150333.5ded99a0@canb.auug.org.au/ Reviewed-by: Raag Jadav Acked-by: Randy Dunlap Tested-by: Randy Dunlap Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20250704190724.1159416-1-andrealmeid@igalia.com Signed-off-by: André Almeida --- Documentation/gpu/drm-uapi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 263e5a97c080..5bc1da4915af 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -447,7 +447,7 @@ hang is usually the most critical one which can result in consequential hangs or complete wedging. Task information ---------------- +---------------- The information about which application (if any) was involved in the device wedging is useful for userspace if they want to notify the user about what From e41315787dda23daf146afb03b844d2c5880b72b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 4 Jul 2025 16:07:23 -0300 Subject: [PATCH 146/358] drm: Add missing struct drm_wedge_task_info kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following kernel doc warning: include/drm/drm_device.h:40: warning: Function parameter or struct member 'pid' not described in 'drm_wedge_task_info' include/drm/drm_device.h:40: warning: Function parameter or struct member 'comm' not described in 'drm_wedge_task_info' Fixes: 183bccafa176 ("drm: Create a task info option for wedge events") Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/lkml/20250618151307.4a1a5e17@canb.auug.org.au/ Reviewed-by: Raag Jadav Acked-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20250704190724.1159416-2-andrealmeid@igalia.com Signed-off-by: André Almeida --- include/drm/drm_device.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 08b3b2467c4c..a33aedd5e9ec 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -35,7 +35,9 @@ struct pci_controller; * struct drm_wedge_task_info - information about the guilty task of a wedge dev */ struct drm_wedge_task_info { + /** @pid: pid of the task */ pid_t pid; + /** @comm: command name of the task */ char comm[TASK_COMM_LEN]; }; From cf590b239c98ff7772c61a9124dbcb5f250e9395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 4 Jul 2025 16:07:24 -0300 Subject: [PATCH 147/358] drm/doc: Fix grammar for "Task information" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the repetitive wording at the end of "Task information" section. Reviewed-by: Raag Jadav Acked-by: Randy Dunlap Tested-by: Randy Dunlap Reviewed-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20250704190724.1159416-3-andrealmeid@igalia.com Signed-off-by: André Almeida --- Documentation/gpu/drm-uapi.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 5bc1da4915af..843facf01b2d 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -460,8 +460,8 @@ event string. The reliability of this information is driver and hardware specific, and should be taken with a caution regarding it's precision. To have a big picture of what -really happened, the devcoredump file provides should have much more detailed -information about the device state and about the event. +really happened, the devcoredump file provides much more detailed information +about the device state and about the event. Consumer prerequisites ---------------------- From 667efb341917bde19f5d7517b65defcdaed67c9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Almeida?= Date: Fri, 4 Jul 2025 00:06:29 -0300 Subject: [PATCH 148/358] drm/amdgpu: Fix lifetime of struct amdgpu_task_info after ring reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a ring reset happens, amdgpu calls drm_dev_wedged_event() using struct amdgpu_task_info *ti as one of the arguments. After using *ti, a call to amdgpu_vm_put_task_info(ti) is required to correctly track its lifetime. However, it's called from a place that the ring reset path never reaches due to a goto after drm_dev_wedged_event() is called. Move amdgpu_vm_put_task_info() bellow the exit label to make sure that it's called regardless of the code path. amdgpu_vm_put_task_info() can only accept a valid address or NULL as argument, so initialise *ti to make sure we can call this function if *ti isn't used. Fixes: a72002cb181f ("drm/amdgpu: Make use of drm_wedge_task_info") Reported-by: Dave Airlie Closes: https://lore.kernel.org/dri-devel/CAPM=9tz0rQP8VZWKWyuF8kUMqRScxqoa6aVdwWw9=5yYxyYQ2Q@mail.gmail.com/ Reviewed-by: Christian König Link: https://lore.kernel.org/r/20250704030629.1064397-1-andrealmeid@igalia.com Signed-off-by: André Almeida --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 2b58e353cca1..87f7040d8a2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -90,7 +90,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); struct drm_wedge_task_info *info = NULL; - struct amdgpu_task_info *ti; + struct amdgpu_task_info *ti = NULL; struct amdgpu_device *adev = ring->adev; int idx, r; @@ -148,8 +148,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) dma_fence_set_error(&s_job->s_fence->finished, -ETIME); - amdgpu_vm_put_task_info(ti); - if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; memset(&reset_context, 0, sizeof(reset_context)); @@ -175,6 +173,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } exit: + amdgpu_vm_put_task_info(ti); drm_dev_exit(idx); return DRM_GPU_SCHED_STAT_NOMINAL; } From b0a2ee5567ab0d83ff1f7f7542fead46233f0d04 Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 9 Jul 2025 15:16:00 -0700 Subject: [PATCH 149/358] drm/xe: prepare xe_gen_wa_oob to be multi-use There is a need for additional oob rules files. Make the current gen file more robust to support more files. Signed-off-by: Matt Atwood Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250709221605.172516-2-matthew.s.atwood@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gen_wa_oob.c | 45 +++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c index ed9183599e31..6581cb0f0e59 100644 --- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c +++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c @@ -18,8 +18,8 @@ " *\n" \ " * This file was generated from rules: %s\n" \ " */\n" \ - "#ifndef _GENERATED_XE_WA_OOB_\n" \ - "#define _GENERATED_XE_WA_OOB_\n" \ + "#ifndef _GENERATED_%s_\n" \ + "#define _GENERATED_%s_\n" \ "\n" \ "enum {\n" @@ -52,7 +52,7 @@ static char *strip(char *line, size_t linelen) } #define MAX_LINE_LEN 4096 -static int parse(FILE *input, FILE *csource, FILE *cheader) +static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix) { char line[MAX_LINE_LEN + 1]; char *name, *prev_name = NULL, *rules; @@ -96,7 +96,7 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) } if (name) { - fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx); + fprintf(cheader, "\t%s_%s = %u,\n", prefix, name, idx); /* Close previous entry before starting a new one */ if (idx) @@ -118,7 +118,33 @@ static int parse(FILE *input, FILE *csource, FILE *cheader) if (idx) fprintf(csource, ") },\n"); - fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx); + fprintf(cheader, "\t_%s_COUNT = %u\n", prefix, idx); + + return 0; +} + +static int fn_to_prefix(const char *fn, char *prefix, size_t size) +{ + size_t len; + + fn = basename(fn); + len = strlen(fn); + + if (len > size - 1) + return -ENAMETOOLONG; + + memcpy(prefix, fn, len + 1); + + for (char *p = prefix; *p; p++) { + switch (*p) { + case '.': + *p = '\0'; + return 0; + default: + *p = toupper(*p); + break; + } + } return 0; } @@ -141,6 +167,7 @@ int main(int argc, const char *argv[]) [ARGS_CHEADER] = { .fn = argv[3], .mode = "w" }, }; int ret = 1; + char prefix[128]; if (argc < 3) { fprintf(stderr, "ERROR: wrong arguments\n"); @@ -148,6 +175,9 @@ int main(int argc, const char *argv[]) return 1; } + if (fn_to_prefix(args[ARGS_CHEADER].fn, prefix, sizeof(prefix)) < 0) + return 1; + for (int i = 0; i < _ARGS_COUNT; i++) { args[i].f = fopen(args[i].fn, args[i].mode); if (!args[i].f) { @@ -157,9 +187,10 @@ int main(int argc, const char *argv[]) } } - fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn); + fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix); + ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f, - args[ARGS_CHEADER].f); + args[ARGS_CHEADER].f, prefix); if (!ret) fprintf(args[ARGS_CHEADER].f, FOOTER); From f037e0b78e6da6c0f0243b57bb433929a37e6a8f Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 9 Jul 2025 15:16:01 -0700 Subject: [PATCH 150/358] drm/xe: add xe_device_wa infrastructure There are some workarounds that must be appplied before gt init, wa_15015404425 for example. Instead of sprinking them conditionally throughout the driver as we did for i915 generate an oob.rules file reusing the RTP infrastructure to make these easier to track. v2: rename xe_soc_wa to xe_device_wa v5: derive prefix from argument rather than hard coding the values. v6: split out xe_gen-wa_oob changes Signed-off-by: Matt Atwood Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250709221605.172516-3-matthew.s.atwood@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/Makefile | 9 ++++++++- drivers/gpu/drm/xe/xe_device_wa_oob.rules | 0 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_device_wa_oob.rules diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index d52cf5808d6f..83a36c47a2f9 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -21,6 +21,13 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ $(src)/xe_wa_oob.rules $(call cmd,wa_oob) +generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h +quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob)) + cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob) +$(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \ + $(src)/xe_device_wa_oob.rules + $(call cmd,device_wa_oob) + # Please keep these build lists sorted! # core driver code @@ -340,4 +347,4 @@ $(obj)/%.hdrtest: $(src)/%.h FORCE $(call if_changed_dep,hdrtest) uses_generated_oob := $(addprefix $(obj)/, $(xe-y)) -$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h +$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h $(obj)/generated/xe_device_wa_oob.h diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules new file mode 100644 index 000000000000..e69de29bb2d1 From e7201d98ca196b72901686d47003a4bde3865364 Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 9 Jul 2025 15:16:02 -0700 Subject: [PATCH 151/358] drm/xe: add new type to RTP context Prepare the RTP context to be used before GT init. Add the xe device as a type, put WARN_ONs to protect existing RTP_MATCHes. v5: split out into separate patch, change definition order v6: catch missing cases for checking gt init Reviewed-by: Lucas De Marchi Signed-off-by: Matt Atwood Link: https://lore.kernel.org/r/20250709221605.172516-4-matthew.s.atwood@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_rtp.c | 32 +++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_rtp.h | 3 ++- drivers/gpu/drm/xe/xe_rtp_types.h | 2 ++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 29e694bb1219..e4b3b135a4af 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -56,37 +56,61 @@ static bool rule_matches(const struct xe_device *xe, xe->info.subplatform == r->subplatform; break; case XE_RTP_MATCH_GRAPHICS_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 >= r->ver_start && xe->info.graphics_verx100 <= r->ver_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.graphics_verx100 == r->ver_start; break; case XE_RTP_MATCH_GRAPHICS_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.graphics >= r->step_start && xe->info.step.graphics < r->step_end && (!has_samedia(xe) || !xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_RANGE: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 >= r->ver_start && xe->info.media_verx100 <= r->ver_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_STEP: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.step.media >= r->step_start && xe->info.step.media < r->step_end && (!has_samedia(xe) || xe_gt_is_media_type(gt)); break; case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = xe->info.media_verx100 == r->ver_start; break; case XE_RTP_MATCH_INTEGRATED: @@ -108,6 +132,9 @@ static bool rule_matches(const struct xe_device *xe, match = hwe->class != r->engine_class; break; case XE_RTP_MATCH_FUNC: + if (drm_WARN_ON(&xe->drm, !gt)) + return false; + match = r->match_func(gt, hwe); break; default: @@ -186,6 +213,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx, struct xe_device **xe) { switch (ctx->type) { + case XE_RTP_PROCESS_TYPE_DEVICE: + *hwe = NULL; + *gt = NULL; + *xe = ctx->xe; + break; case XE_RTP_PROCESS_TYPE_GT: *hwe = NULL; *gt = ctx->gt; diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 4fe736a11c42..ac260feaabef 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -422,7 +422,8 @@ struct xe_reg_sr; #define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \ struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \ - struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }) + struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \ + struct xe_device * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE }) void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, unsigned long *active_entries, diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h index 1b76b947c706..f4cf30e298cf 100644 --- a/drivers/gpu/drm/xe/xe_rtp_types.h +++ b/drivers/gpu/drm/xe/xe_rtp_types.h @@ -110,12 +110,14 @@ struct xe_rtp_entry { }; enum xe_rtp_process_type { + XE_RTP_PROCESS_TYPE_DEVICE, XE_RTP_PROCESS_TYPE_GT, XE_RTP_PROCESS_TYPE_ENGINE, }; struct xe_rtp_process_ctx { union { + struct xe_device *xe; struct xe_gt *gt; struct xe_hw_engine *hwe; }; From 661a6950e061e3cc976597273180e19126b32e19 Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 9 Jul 2025 15:16:03 -0700 Subject: [PATCH 152/358] drm/xe: Add infrastructure for Device OOB workarounds Some workarounds need to be able to be applied ahead of any GT initialization for example 15015404425. This patch creates XE_DEVICE_WA macro, in the same vein as XE_WA. This macro can be used ahead of GT initialization, and can be tracked in sysfs. This should alleviate some of the complexities that exist in i915. v2: name change SoC to Device, address style issues v5: split into separate patch from RTP changes, put oob within a struct, move the initiation of oob workarounds into xe_device_probe_early(), clean up the comments around XE_WA. Reviewed-by: Lucas De Marchi Signed-off-by: Matt Atwood Link: https://lore.kernel.org/r/20250709221605.172516-5-matthew.s.atwood@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_debugfs.c | 20 ++++++++++ drivers/gpu/drm/xe/xe_device.c | 3 ++ drivers/gpu/drm/xe/xe_device_types.h | 12 ++++++ drivers/gpu/drm/xe/xe_wa.c | 58 ++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_wa.h | 17 +++++++- 5 files changed, 108 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index d83cd6ed3fa8..0e26658bcf7e 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -21,6 +21,7 @@ #include "xe_pxp_debugfs.h" #include "xe_sriov.h" #include "xe_step.h" +#include "xe_wa.h" #ifdef CONFIG_DRM_XE_DEBUG #include "xe_bo_evict.h" @@ -82,9 +83,28 @@ static int sriov_info(struct seq_file *m, void *data) return 0; } +static int workarounds(struct xe_device *xe, struct drm_printer *p) +{ + xe_pm_runtime_get(xe); + xe_wa_device_dump(xe, p); + xe_pm_runtime_put(xe); + + return 0; +} + +static int workaround_info(struct seq_file *m, void *data) +{ + struct xe_device *xe = node_to_xe(m->private); + struct drm_printer p = drm_seq_file_printer(m); + + workarounds(xe, &p); + return 0; +} + static const struct drm_info_list debugfs_list[] = { {"info", info, 0}, { .name = "sriov_info", .show = sriov_info, }, + { .name = "workarounds", .show = workaround_info, }, }; static int forcewake_open(struct inode *inode, struct file *file) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6db09cfc8eb8..28b76fb72859 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -700,6 +700,9 @@ int xe_device_probe_early(struct xe_device *xe) { int err; + xe_wa_device_init(xe); + xe_wa_process_device_oob(xe); + err = xe_mmio_probe_early(xe); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index d15b2793629e..61d3f473a7a6 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -363,6 +363,18 @@ struct xe_device { u8 skip_pcode:1; } info; + struct { + /** @wa_active.oob: bitmap with active OOB workarounds */ + unsigned long *oob; + + /** + * @wa_active.oob_initialized: Mark oob as initialized to help detecting misuse + * of XE_DEVICE_WA() - it can only be called on initialization after + * Device OOB WAs have been processed. + */ + bool oob_initialized; + } wa_active; + /** @survivability: survivability information for device */ struct xe_survivability survivability; diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index f51218a7a580..22a98600fd8f 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -10,6 +10,7 @@ #include #include +#include #include #include "regs/xe_engine_regs.h" @@ -876,8 +877,33 @@ static __maybe_unused const struct xe_rtp_entry oob_was[] = { static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT); +static __maybe_unused const struct xe_rtp_entry device_oob_was[] = { +#include + {} +}; + +static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT); + __diag_pop(); +/** + * xe_wa_process_device_oob - process OOB workaround table + * @xe: device instance to process workarounds for + * + * process OOB workaround table for this device, marking in @xe the + * workarounds that are active. + */ + +void xe_wa_process_device_oob(struct xe_device *xe) +{ + struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe); + + xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)); + + xe->wa_active.oob_initialized = true; + xe_rtp_process(&ctx, device_oob_was); +} + /** * xe_wa_process_oob - process OOB workaround table * @gt: GT instance to process workarounds for @@ -946,6 +972,28 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), &hwe->reg_lrc); } +/** + * xe_wa_device_init - initialize device with workaround oob bookkeeping + * @xe: Xe device instance to initialize + * + * Returns 0 for success, negative with error code otherwise + */ +int xe_wa_device_init(struct xe_device *xe) +{ + unsigned long *p; + + p = drmm_kzalloc(&xe->drm, + sizeof(*p) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)), + GFP_KERNEL); + + if (!p) + return -ENOMEM; + + xe->wa_active.oob = p; + + return 0; +} + /** * xe_wa_init - initialize gt with workaround bookkeeping * @gt: GT instance to initialize @@ -980,6 +1028,16 @@ int xe_wa_init(struct xe_gt *gt) } ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */ +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p) +{ + size_t idx; + + drm_printf(p, "Device OOB Workarounds\n"); + for_each_set_bit(idx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was)) + if (device_oob_was[idx].name) + drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name); +} + void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p) { size_t idx; diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index 52337405b5bc..c290f449716b 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -13,17 +13,19 @@ struct xe_gt; struct xe_hw_engine; struct xe_tile; +int xe_wa_device_init(struct xe_device *xe); int xe_wa_init(struct xe_gt *gt); +void xe_wa_process_device_oob(struct xe_device *xe); void xe_wa_process_oob(struct xe_gt *gt); void xe_wa_process_gt(struct xe_gt *gt); void xe_wa_process_engine(struct xe_hw_engine *hwe); void xe_wa_process_lrc(struct xe_hw_engine *hwe); void xe_wa_apply_tile_workarounds(struct xe_tile *tile); +void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p); void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); /** - * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any - * other more specific type + * XE_WA - Out-of-band workarounds, to be queried and called as needed. * @gt__: gt instance * @id__: XE_OOB_, as generated by build system in generated/xe_wa_oob.h */ @@ -32,4 +34,15 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \ }) +/** + * XE_DEVICE_WA - Out-of-band Device workarounds, to be queried and called + * as needed. + * @xe__: xe_device + * @id__: XE_DEVICE_WA_OOB_, as generated by build system in generated/xe_device_wa_oob.h + */ +#define XE_DEVICE_WA(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + #endif From ac596dee8008885664274efcebf8ca7538fc2ddc Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 9 Jul 2025 15:16:04 -0700 Subject: [PATCH 153/358] drm/xe: Move Wa_15015404425 to use the new XE_DEVICE_WA macro Move Wa_15015404425 to use the new implemented OOB macro XE_DEVICE_WA() v2: rename from SoC to Device v5: move workaround call back into the flush call v6: remove redundant commenting Reviewed-by: Lucas De Marchi Signed-off-by: Matt Atwood Link: https://lore.kernel.org/r/20250709221605.172516-6-matthew.s.atwood@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device_wa_oob.rules | 1 + drivers/gpu/drm/xe/xe_mmio.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules index e69de29bb2d1..b7d12ea4d65c 100644 --- a/drivers/gpu/drm/xe/xe_device_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -0,0 +1 @@ +15015404425 PLATFORM(LUNARLAKE) diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index 751586d6806a..e4db8d58ea2d 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -22,6 +22,9 @@ #include "xe_macros.h" #include "xe_sriov.h" #include "xe_trace.h" +#include "xe_wa.h" + +#include "generated/xe_device_wa_oob.h" static void tiles_fini(void *arg) { @@ -167,7 +170,7 @@ static void mmio_flush_pending_writes(struct xe_mmio *mmio) #define DUMMY_REG_OFFSET 0x130030 int i; - if (mmio->tile->xe->info.platform != XE_LUNARLAKE) + if (!XE_DEVICE_WA(mmio->tile->xe, 15015404425)) return; /* 4 dummy writes */ @@ -180,7 +183,6 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u8 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readb(mmio->regs + addr); @@ -194,7 +196,6 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u16 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); val = readw(mmio->regs + addr); @@ -221,7 +222,6 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr); u32 val; - /* Wa_15015404425 */ mmio_flush_pending_writes(mmio); if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe)) From 77fa16c8f8ee2736f9fe49d5244bec5c35ea3c5b Mon Sep 17 00:00:00 2001 From: Matt Atwood Date: Wed, 9 Jul 2025 15:16:05 -0700 Subject: [PATCH 154/358] drm/xe: extend Wa_15015404425 to apply to PTL Wa_15015404425 only needs to be applied on PTL platforms with an A step compute die. There is no way to map PCI revid to the compute die stepping. The easiest way to figure out compute die stepping our end is to map the media IP's stepping to the compute die. For PTL, compute die has an A stepping if and only if the media IP's stepping is also A-step (This relationship is determined on a per platform basis and just happens to be this way on PTL). In addition this workaround is a chicken-and-egg problem. Wa_15015404425 requires that all register reads be preceded by four dummy MMIO writes (including during early driver init and even pre-OS firmware). The driver needs to perform some MMIO reads during init which include the GMD_ID register that contains the Media IPs stepping. To handle this in the safest manner assume the workaround applies to all of PTL during driver probe and deactivate the workaround after. The overall solution becomes a set of two workarounds: * 15015404425 - a Device OOB workaround that's always active for PTL * 15015404425_disable - a GT OOB workaround that applies to PTL platfroms with a B0 or later stepping The first of these workarounds issues dummy MMIO writes we do when reading registers. The second guards logic that disables the first once we have the necessary information later in the probe process. v2: rename SoC to device, avoid null pointer dereference, update commit message. v3: rebase v5: move disable check into xe_device_probe to avoid linking in xe_wa into xe_pci, reword commit message v6: squash extension and b0 support into 1 patch Reviewed-by: Lucas De Marchi Signed-off-by: Matt Atwood Link: https://lore.kernel.org/r/20250709221605.172516-7-matthew.s.atwood@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 5 +++++ drivers/gpu/drm/xe/xe_device_wa_oob.rules | 1 + drivers/gpu/drm/xe/xe_wa.h | 5 +++++ drivers/gpu/drm/xe/xe_wa_oob.rules | 2 ++ 4 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 28b76fb72859..6dc84e4ed281 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -68,6 +68,7 @@ #include "xe_wait_user_fence.h" #include "xe_wa.h" +#include #include static int xe_file_open(struct drm_device *dev, struct drm_file *file) @@ -864,6 +865,10 @@ int xe_device_probe(struct xe_device *xe) return err; } + if (xe->tiles->media_gt && + XE_WA(xe->tiles->media_gt, 15015404425_disable)) + XE_DEVICE_WA_DISABLE(xe, 15015404425); + xe_nvm_init(xe); err = xe_heci_gsc_init(xe); diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules index b7d12ea4d65c..3a0c4ccc4224 100644 --- a/drivers/gpu/drm/xe/xe_device_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules @@ -1 +1,2 @@ 15015404425 PLATFORM(LUNARLAKE) + PLATFORM(PANTHERLAKE) diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h index c290f449716b..f3880c65cb8d 100644 --- a/drivers/gpu/drm/xe/xe_wa.h +++ b/drivers/gpu/drm/xe/xe_wa.h @@ -45,4 +45,9 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p); test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ }) +#define XE_DEVICE_WA_DISABLE(xe__, id__) ({ \ + xe_assert(xe__, (xe__)->wa_active.oob_initialized); \ + clear_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \ +}) + #endif diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index e7ed5d583d68..e990f20eccfe 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -70,3 +70,5 @@ no_media_l3 MEDIA_VERSION(3000) # SoC workaround - currently applies to all platforms with the following # primary GT GMDID 14022085890 GRAPHICS_VERSION(2001) + +15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER) From 908d9d56c8264536b9e10d682c08781a54527d7b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 10 Jul 2025 10:30:40 +0000 Subject: [PATCH 155/358] drm/xe/sriov: Mark BMG as SR-IOV capable Enable SR-IOV support for BMG platforms. Note that as other flags from the platform descriptor, it only means it may have that capability: it still depends on runtime checks for the proper support in HW and firmware. Signed-off-by: Michal Wajdeczko Tested-by: Jakub Kolakowski Signed-off-by: Jakub Kolakowski Reviewed-by: Satyanarayana K V P Link: https://lore.kernel.org/r/20250710103040.375610-3-jakub1.kolakowski@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index 3614fcf3f088..3c40ef426f0c 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -327,6 +327,7 @@ static const struct xe_device_desc bmg_desc = { .has_mbx_power_limits = true, .has_gsc_nvm = 1, .has_heci_cscfi = 1, + .has_sriov = true, .max_gt_per_tile = 2, .needs_scratch = true, }; From b528e896fa570844d654b5a4617a97fa770a1030 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Thu, 10 Jul 2025 10:29:45 +0530 Subject: [PATCH 156/358] drm/xe: Dont skip TLB invalidations on VF Skipping TLB invalidations on VF causing unrecoverable faults. Probable reason for skipping TLB invalidations on SRIOV could be lack of support for instruction MI_FLUSH_DW_STORE_INDEX. Add back TLB flush with some additional handling. Helps in resolving, [ 704.913454] xe 0000:00:02.1: [drm:pf_queue_work_func [xe]] ASID: 0 VFID: 0 PDATA: 0x0d92 Faulted Address: 0x0000000002fa0000 FaultType: 0 AccessType: 1 FaultLevel: 0 EngineClass: 3 bcs EngineInstance: 8 [ 704.913551] xe 0000:00:02.1: [drm:pf_queue_work_func [xe]] Fault response: Unsuccessful -22 V2: - Use Xmas tree (MichalW) Suggested-by: Matthew Brost Fixes: 97515d0b3ed92 ("drm/xe/vf: Don't emit access to Global HWSP if VF") Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250710045945.1023840-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay --- drivers/gpu/drm/xe/xe_ring_ops.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index bc1689db4cd7..7b50c7c1ee21 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 *dw, int i) +static int emit_flush_invalidate(u32 addr, u32 val, u32 *dw, int i) { dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | - MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; - dw[i++] = 0; + MI_FLUSH_IMM_DW; + + dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; + dw[i++] = val; return i; } @@ -397,23 +398,20 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job, static void emit_migration_job_gen12(struct xe_sched_job *job, struct xe_lrc *lrc, u32 seqno) { + u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc); u32 dw[MAX_JOB_SIZE_DW], i = 0; i = emit_copy_timestamp(lrc, dw, i); - i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, dw, i); + i = emit_store_imm_ggtt(saddr, seqno, dw, i); dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */ i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i); - if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { - /* XXX: Do we need this? Leaving for now. */ - dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(dw, i); - dw[i++] = preparser_disable(false); - } + dw[i++] = preparser_disable(true); + i = emit_flush_invalidate(saddr, seqno, dw, i); + dw[i++] = preparser_disable(false); i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i); From 917b10d90990fd2138b5dbc2d22cfa428c070ade Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 11 Jul 2025 08:04:38 +0000 Subject: [PATCH 157/358] drm: rust: rename as_ref() to from_raw() for drm constructors The prefix as_* should not be used for a constructor. Constructors usually use the prefix from_* instead. Some prior art in the stdlib: Box::from_raw, CString::from_raw, Rc::from_raw, Arc::from_raw, Waker::from_raw, File::from_raw_fd. There is also prior art in the kernel crate: cpufreq::Policy::from_raw, fs::File::from_raw_file, Kuid::from_raw, ARef::from_raw, SeqFile::from_raw, VmaNew::from_raw, Io::from_raw. Link: https://lore.kernel.org/r/aCd8D5IA0RXZvtcv@pollux Signed-off-by: Alice Ryhl Signed-off-by: Danilo Krummrich Link: https://lore.kernel.org/r/20250711-device-as-ref-v2-2-1b16ab6402d7@google.com --- rust/kernel/drm/device.rs | 2 +- rust/kernel/drm/file.rs | 8 ++++---- rust/kernel/drm/gem/mod.rs | 16 ++++++++-------- rust/kernel/drm/ioctl.rs | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs index 624d7a4c83ea..98418f4ce625 100644 --- a/rust/kernel/drm/device.rs +++ b/rust/kernel/drm/device.rs @@ -154,7 +154,7 @@ unsafe fn from_drm_device(ptr: *const bindings::drm_device) -> *mut Self { /// Additionally, callers must ensure that the `struct device`, `ptr` is pointing to, is /// embedded in `Self`. #[doc(hidden)] - pub unsafe fn as_ref<'a>(ptr: *const bindings::drm_device) -> &'a Self { + pub unsafe fn from_raw<'a>(ptr: *const bindings::drm_device) -> &'a Self { // SAFETY: By the safety requirements of this function `ptr` is a valid pointer to a // `struct drm_device` embedded in `Self`. let ptr = unsafe { Self::from_drm_device(ptr) }; diff --git a/rust/kernel/drm/file.rs b/rust/kernel/drm/file.rs index b9527705e551..e8789c9110d6 100644 --- a/rust/kernel/drm/file.rs +++ b/rust/kernel/drm/file.rs @@ -32,7 +32,7 @@ impl File { /// # Safety /// /// `raw_file` must be a valid pointer to an open `struct drm_file`, opened through `T::open`. - pub unsafe fn as_ref<'a>(ptr: *mut bindings::drm_file) -> &'a File { + pub unsafe fn from_raw<'a>(ptr: *mut bindings::drm_file) -> &'a File { // SAFETY: `raw_file` is valid by the safety requirements of this function. unsafe { &*ptr.cast() } } @@ -61,10 +61,10 @@ pub(crate) extern "C" fn open_callback( // SAFETY: A callback from `struct drm_driver::open` guarantees that // - `raw_dev` is valid pointer to a `struct drm_device`, // - the corresponding `struct drm_device` has been registered. - let drm = unsafe { drm::Device::as_ref(raw_dev) }; + let drm = unsafe { drm::Device::from_raw(raw_dev) }; // SAFETY: `raw_file` is a valid pointer to a `struct drm_file`. - let file = unsafe { File::::as_ref(raw_file) }; + let file = unsafe { File::::from_raw(raw_file) }; let inner = match T::open(drm) { Err(e) => { @@ -89,7 +89,7 @@ pub(crate) extern "C" fn postclose_callback( raw_file: *mut bindings::drm_file, ) { // SAFETY: This reference won't escape this function - let file = unsafe { File::::as_ref(raw_file) }; + let file = unsafe { File::::from_raw(raw_file) }; // SAFETY: `file.driver_priv` has been created in `open_callback` through `KBox::into_raw`. let _ = unsafe { KBox::from_raw(file.driver_priv()) }; diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs index 4cd69fa84318..a24c9a2fc201 100644 --- a/rust/kernel/drm/gem/mod.rs +++ b/rust/kernel/drm/gem/mod.rs @@ -51,7 +51,7 @@ pub trait IntoGEMObject: Sized + super::private::Sealed + AlwaysRefCounted { /// - `self_ptr` must be a valid pointer to `Self`. /// - The caller promises that holding the immutable reference returned by this function does /// not violate rust's data aliasing rules and remains valid throughout the lifetime of `'a`. - unsafe fn as_ref<'a>(self_ptr: *mut bindings::drm_gem_object) -> &'a Self; + unsafe fn from_raw<'a>(self_ptr: *mut bindings::drm_gem_object) -> &'a Self; } // SAFETY: All gem objects are refcounted. @@ -86,12 +86,12 @@ extern "C" fn open_callback, U: BaseObject>( ) -> core::ffi::c_int { // SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`. let file = unsafe { - drm::File::<<::Driver as drm::Driver>::File>::as_ref(raw_file) + drm::File::<<::Driver as drm::Driver>::File>::from_raw(raw_file) }; // SAFETY: `open_callback` is specified in the AllocOps structure for `Object`, ensuring that // `raw_obj` is indeed contained within a `Object`. let obj = unsafe { - <<::Driver as drm::Driver>::Object as IntoGEMObject>::as_ref(raw_obj) + <<::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) }; match T::open(obj, file) { @@ -106,12 +106,12 @@ extern "C" fn close_callback, U: BaseObject>( ) { // SAFETY: `open_callback` is only ever called with a valid pointer to a `struct drm_file`. let file = unsafe { - drm::File::<<::Driver as drm::Driver>::File>::as_ref(raw_file) + drm::File::<<::Driver as drm::Driver>::File>::from_raw(raw_file) }; // SAFETY: `close_callback` is specified in the AllocOps structure for `Object`, ensuring // that `raw_obj` is indeed contained within a `Object`. let obj = unsafe { - <<::Driver as drm::Driver>::Object as IntoGEMObject>::as_ref(raw_obj) + <<::Driver as drm::Driver>::Object as IntoGEMObject>::from_raw(raw_obj) }; T::close(obj, file); @@ -124,7 +124,7 @@ fn as_raw(&self) -> *mut bindings::drm_gem_object { self.obj.get() } - unsafe fn as_ref<'a>(self_ptr: *mut bindings::drm_gem_object) -> &'a Self { + unsafe fn from_raw<'a>(self_ptr: *mut bindings::drm_gem_object) -> &'a Self { let self_ptr: *mut Opaque = self_ptr.cast(); // SAFETY: `obj` is guaranteed to be in an `Object` via the safety contract of this @@ -170,9 +170,9 @@ fn lookup_handle( // - A `drm::Driver` can only have a single `File` implementation. // - `file` uses the same `drm::Driver` as `Self`. // - Therefore, we're guaranteed that `ptr` must be a gem object embedded within `Self`. - // - And we check if the pointer is null befoe calling as_ref(), ensuring that `ptr` is a + // - And we check if the pointer is null befoe calling from_raw(), ensuring that `ptr` is a // valid pointer to an initialized `Self`. - let obj = unsafe { Self::as_ref(ptr) }; + let obj = unsafe { Self::from_raw(ptr) }; // SAFETY: // - We take ownership of the reference of `drm_gem_object_lookup()`. diff --git a/rust/kernel/drm/ioctl.rs b/rust/kernel/drm/ioctl.rs index 445639404fb7..fdec01c37168 100644 --- a/rust/kernel/drm/ioctl.rs +++ b/rust/kernel/drm/ioctl.rs @@ -134,7 +134,7 @@ macro_rules! declare_drm_ioctls { // FIXME: Currently there is nothing enforcing that the types of the // dev/file match the current driver these ioctls are being declared // for, and it's not clear how to enforce this within the type system. - let dev = $crate::drm::device::Device::as_ref(raw_dev); + let dev = $crate::drm::device::Device::from_raw(raw_dev); // SAFETY: The ioctl argument has size `_IOC_SIZE(cmd)`, which we // asserted above matches the size of this type, and all bit patterns of // UAPI structs must be valid. @@ -142,7 +142,7 @@ macro_rules! declare_drm_ioctls { &*(raw_data as *const $crate::types::Opaque<$crate::uapi::$struct>) }; // SAFETY: This is just the DRM file structure - let file = unsafe { $crate::drm::File::as_ref(raw_file) }; + let file = unsafe { $crate::drm::File::from_raw(raw_file) }; match $func(dev, data, file) { Err(e) => e.to_errno(), From c12fe703cab93f9d8bfe0ff32b58e7b1fd52be1f Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 10 Jul 2025 14:41:29 +0100 Subject: [PATCH 158/358] drm/xe/migrate: fix copy direction in access_memory After we do the modification on the host side, ensure we write the result back to VRAM and not the other way around, otherwise the modification will be lost if treated like a read. Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Signed-off-by: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250710134128.800756-2-matthew.auld@intel.com --- drivers/gpu/drm/xe/xe_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 4e2bdf70eb70..2adf95d35c31 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1848,7 +1848,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, err = xe_migrate_access_memory(m, bo, offset & ~XE_CACHELINE_MASK, (void *)ptr, - sizeof(bounce), 0); + sizeof(bounce), write); if (err) return err; } else { From beb72acb5b38dbe670d8eb752d1ad7a32f9c4119 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 10 Jul 2025 12:12:08 -0700 Subject: [PATCH 159/358] drm/xe: Move page fault init after topology init We need the topology to determine GT page fault queue size, move page fault init after topology init. Cc: stable@vger.kernel.org Fixes: 3338e4f90c14 ("drm/xe: Use topology to determine page fault queue size") Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Reviewed-by: Stuart Summers Link: https://lore.kernel.org/r/20250710191208.1040215-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d397df056e4c..af03e19ef9be 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -628,10 +628,6 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; - err = xe_gt_pagefault_init(gt); - if (err) - return err; - err = xe_gt_sysfs_init(gt); if (err) return err; @@ -640,6 +636,10 @@ int xe_gt_init(struct xe_gt *gt) if (err) return err; + err = xe_gt_pagefault_init(gt); + if (err) + return err; + err = xe_gt_idle_init(>->gtidle); if (err) return err; From 4a1eaf7d110aa54c2b0e891cb450a6ab37a6c3dd Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 10 Jul 2025 13:54:13 -0700 Subject: [PATCH 160/358] drm/xe: Remove references to CONFIG_DRM_XE_DEVMEM_MIRROR The prefetch code was referencing CONFIG_DRM_XE_DEVMEM_MIRROR, which has been replaced by CONFIG_DRM_XE_PAGEMAP. As a result, prefetches were limited to SRAM. Update the code to use CONFIG_DRM_XE_PAGEMAP instead of the deprecated option. Fixes: f86ad0ed620c ("drm/gpusvm, drm/pagemap: Move migration functionality to drm_pagemap") Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250710205413.1105595-1-matthew.brost@intel.com --- drivers/gpu/drm/xe/xe_vm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index e875ea4658a9..2035604121e6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -2380,7 +2380,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, ctx.read_only = xe_vma_read_only(vma); ctx.devmem_possible = IS_DGFX(vm->xe) && - IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); + IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); for_each_tile(tile, vm->xe, id) tile_mask |= 0x1 << id; @@ -2887,7 +2887,7 @@ static int check_ufence(struct xe_vma *vma) static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op) { - bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR); + bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP); struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va); int err = 0; From 81e139db6900503a2e68009764054fad128fbf95 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Jul 2025 14:34:41 -0700 Subject: [PATCH 161/358] drm/xe/migrate: Fix alignment check The check would fail if the address is unaligned, but not when accounting the offset. Instead of `buf | offset` it should have been `buf + offset`. To make it more readable and also drop the uintptr_t, just use the IS_ALIGNED() macro. Fixes: 270172f64b11 ("drm/xe: Update xe_ttm_access_memory to use GPU for non-visible access") Reviewed-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20250710-migrate-aligned-v1-1-44003ef3c078@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_migrate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 2adf95d35c31..ba1cff2e4cda 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -1817,8 +1817,8 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo, xe_bo_assert_held(bo); /* Use bounce buffer for small access and unaligned access */ - if (len & XE_CACHELINE_MASK || - ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) { + if (!IS_ALIGNED(len, XE_CACHELINE_BYTES) || + !IS_ALIGNED((unsigned long)buf + offset, XE_CACHELINE_BYTES)) { int buf_offset = 0; /* From 7b6db1731a642be2ac89168d6aa9be6383796844 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 26 Jun 2025 14:25:53 -0700 Subject: [PATCH 162/358] drm/xe: Normalize default param values Document xe module params with the default values following a similar strategy for all of them: 1) Define a DEFAULT_* macro with the default value. When the value can't be directly stringified, also define a *_STR variant 2) Use __stringify() or the _STR variant to make sure the default value shows up in the param description This allows us to show the correct default according to the configuration. max_vfs for example was wrongly documented for CONFIG_DRM_XE_DEBUG and svm_notifier_size didn't have its default documented. Reviewed-by: John Harrison Link: https://lore.kernel.org/r/20250626-guc-log-level-v3-1-c3ed8b452e91@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_module.c | 35 ++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index e332f3142435..107ffe87808c 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -19,31 +19,40 @@ #include "xe_sched_job.h" #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) -#define DEFAULT_GUC_LOG_LEVEL 3 +#define DEFAULT_GUC_LOG_LEVEL 3 #else -#define DEFAULT_GUC_LOG_LEVEL 1 +#define DEFAULT_GUC_LOG_LEVEL 1 #endif +#define DEFAULT_PROBE_DISPLAY true +#define DEFAULT_VRAM_BAR_SIZE 0 +#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE +#define DEFAULT_WEDGED_MODE 1 +#define DEFAULT_SVM_NOTIFIER_SIZE 512 + struct xe_modparam xe_modparam = { - .probe_display = true, - .guc_log_level = DEFAULT_GUC_LOG_LEVEL, - .force_probe = CONFIG_DRM_XE_FORCE_PROBE, - .wedged_mode = 1, - .svm_notifier_size = 512, + .probe_display = DEFAULT_PROBE_DISPLAY, + .guc_log_level = DEFAULT_GUC_LOG_LEVEL, + .force_probe = DEFAULT_FORCE_PROBE, + .wedged_mode = DEFAULT_WEDGED_MODE, + .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE, /* the rest are 0 by default */ }; module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600); -MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2"); +MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 " + "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]"); module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444); MODULE_PARM_DESC(force_execlist, "Force Execlist submission"); module_param_named(probe_display, xe_modparam.probe_display, bool, 0444); -MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)"); +MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched " + "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])"); module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600); -MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size"); +MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size " + "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])"); module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600); MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels " @@ -63,7 +72,8 @@ MODULE_PARM_DESC(gsc_firmware_path, module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400); MODULE_PARM_DESC(force_probe, - "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details."); + "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details " + "[default=" DEFAULT_FORCE_PROBE "])"); #ifdef CONFIG_PCI_IOV module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400); @@ -74,7 +84,8 @@ MODULE_PARM_DESC(max_vfs, module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600); MODULE_PARM_DESC(wedged_mode, - "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang"); + "Module's default policy for the wedged mode (0=never, 1=upon-critical-errors, 2=upon-any-hang " + "[default=" __stringify(DEFAULT_WEDGED_MODE) "])"); static int xe_check_nomodeset(void) { From a6cfa4c8833944f8912c1fa7f95795753f6376ea Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sat, 12 Jul 2025 18:37:12 -0500 Subject: [PATCH 163/358] PM: hibernate: Add stub for pm_hibernate_is_recovering() Randy reports that amdgpu fails to compile with the following error: ERROR: modpost: "pm_hibernate_is_recovering" [drivers/gpu/drm/amd/amdgpu/amdgpu.ko] undefined! This happens because pm_hibernate_is_recovering() is only compiled when CONFIG_PM_SLEEP is set. Add a stub for it so that drivers don't need to depend upon CONFIG_PM. Cc: Samuel Zhang Reported-by: Randy Dunlap Closes: https://lore.kernel.org/dri-devel/CAJZ5v0h1CX+aTu7dFy6vB-9LM6t5J4rt7Su3qVnq1xx-BFAm=Q@mail.gmail.com/T/#m2b9fe212b35fde11d58fcbc4e0727bc02ebba7b0 Fixes: c2aaddbd2dede ("PM: hibernate: add new api pm_hibernate_is_recovering()") Acked-by: Rafael J. Wysocki Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://lore.kernel.org/r/20250712233715.821424-1-superm1@kernel.org Signed-off-by: Mario Limonciello --- include/linux/suspend.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 293137210fdf..fcb150ee83b6 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -426,8 +426,6 @@ int is_hibernate_resume_dev(dev_t dev); static inline int is_hibernate_resume_dev(dev_t dev) { return 0; } #endif -bool pm_hibernate_is_recovering(void); - /* Hibernation and suspend events */ #define PM_HIBERNATION_PREPARE 0x0001 /* Going to hibernate */ #define PM_POST_HIBERNATION 0x0002 /* Hibernation finished */ @@ -478,6 +476,7 @@ extern unsigned int lock_system_sleep(void); extern void unlock_system_sleep(unsigned int); extern bool pm_sleep_transition_in_progress(void); +bool pm_hibernate_is_recovering(void); #else /* !CONFIG_PM_SLEEP */ @@ -508,6 +507,7 @@ static inline unsigned int lock_system_sleep(void) { return 0; } static inline void unlock_system_sleep(unsigned int flags) {} static inline bool pm_sleep_transition_in_progress(void) { return false; } +static inline bool pm_hibernate_is_recovering(void) { return false; } #endif /* !CONFIG_PM_SLEEP */ From 8f3d1c9fb04e2ae018d06b4cacc49e146120facc Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sun, 13 Jul 2025 16:25:31 +0100 Subject: [PATCH 164/358] drm/xe: Remove unused functions xe_bo_create_from_data() last use was removed in 2023 by commit 0e1a47fcabc8 ("drm/xe: Add a helper for DRM device-lifetime BO create") xe_rtp_match_first_gslice_fused_off() last use was removed in 2023 by commit 4e124151fcfc ("drm/xe/dg2: Drop pre-production workarounds") Remove them, and xe_dss_mask_empty whose last use was by xe_rtp_match_first_gslice_fused_off(). (Xe has a bunch ofother symbols that have been added but not used, given how new it is, I've left those, as opposed to these that had the code that used them removed). Reviewed-by: Lucas De Marchi Signed-off-by: Dr. David Alan Gilbert Link: https://lore.kernel.org/r/20250713152531.219326-1-linux@treblig.org Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bo.c | 15 --------------- drivers/gpu/drm/xe/xe_bo.h | 3 --- drivers/gpu/drm/xe/xe_gt_topology.c | 5 ----- drivers/gpu/drm/xe/xe_gt_topology.h | 2 -- drivers/gpu/drm/xe/xe_rtp.c | 15 --------------- drivers/gpu/drm/xe/xe_rtp.h | 11 ----------- 6 files changed, 51 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 4e0355d0f406..18f27da47a36 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -2174,21 +2174,6 @@ struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags); } -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags) -{ - struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL, - ALIGN(size, PAGE_SIZE), - type, flags); - if (IS_ERR(bo)) - return bo; - - xe_map_memcpy_to(xe, &bo->vmap, 0, data, size); - - return bo; -} - static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 8559901e4088..02e8cde4c6b2 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -118,9 +118,6 @@ struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe, size_t size, u64 offset, enum ttm_bo_type type, u32 flags, u64 alignment); -struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, - const void *data, size_t size, - enum ttm_bo_type type, u32 flags); struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile, size_t size, u32 flags); struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 305939c69747..8c63e3263643 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -290,11 +290,6 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum) return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize); } -bool xe_dss_mask_empty(const xe_dss_mask_t mask) -{ - return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS); -} - /** * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant * @gt: GT to check diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h index a72d26ba0653..c8140704ad4c 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.h +++ b/drivers/gpu/drm/xe/xe_gt_topology.h @@ -41,8 +41,6 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask) unsigned int xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum); -bool xe_dss_mask_empty(const xe_dss_mask_t mask); - bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad); diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index e4b3b135a4af..95571b87aa73 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -358,21 +358,6 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, hwe->engine_id == __ffs(render_compute_mask); } -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe) -{ - unsigned int dss_per_gslice = 4; - unsigned int dss; - - if (drm_WARN(>_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask), - "Checking gslice for platform without geometry pipeline\n")) - return false; - - dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0); - - return dss >= dss_per_gslice; -} - bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt, const struct xe_hw_engine *hwe) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index ac260feaabef..5ed6c14b9ae3 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -466,17 +466,6 @@ bool xe_rtp_match_even_instance(const struct xe_gt *gt, bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt, const struct xe_hw_engine *hwe); -/* - * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off - * - * @gt: GT structure - * @hwe: Engine instance - * - * Returns: true if first gslice is fused off, false otherwise. - */ -bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt, - const struct xe_hw_engine *hwe); - /* * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device * From e08c0fa02e4ea02b4494b7bcb2e9cac99439f725 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 11 Jul 2025 14:49:12 -0700 Subject: [PATCH 165/358] drm/xe: Fix missing kernel-doc Fix warning: Warning: drivers/gpu/drm/xe/xe_device_types.h:658 struct member 'wa_active' not described in 'xe_device' Fixes: 661a6950e061 ("drm/xe: Add infrastructure for Device OOB workarounds") Cc: Matt Atwood Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250711214911.2009714-2-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 61d3f473a7a6..1e6ecf1e6979 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -363,6 +363,7 @@ struct xe_device { u8 skip_pcode:1; } info; + /** @wa_active: keep track of active workarounds */ struct { /** @wa_active.oob: bitmap with active OOB workarounds */ unsigned long *oob; From fa7c2a2460198983e16734fea8251b705775ac11 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 11 Jul 2025 17:01:46 +0100 Subject: [PATCH 166/358] drm/xe: Generalize wa bb emission code Generalize the wa bb emission by splitting it into three phases - setup, emit and finish, and extract setup and finish steps into helpers. This will enable using the same infrastructure for emitting the indirect context workarounds. Signed-off-by: Tvrtko Ursulin Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250711160153.49833-2-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 74 +++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index d2ad8fe737eb..2385e22437b3 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -972,32 +972,36 @@ static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine * return cmd - batch; } -struct wa_bb_setup { +struct bo_setup { ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *batch, size_t max_size); }; -static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +static u32 *setup_bo(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + const size_t max_size, + unsigned int offset, + const struct bo_setup *funcs, + unsigned int num_funcs, + u32 **free) { - const size_t max_size = LRC_WA_BB_SIZE; - static const struct wa_bb_setup funcs[] = { - { .setup = wa_bb_setup_utilization }, - }; - ssize_t remain; u32 *cmd, *buf = NULL; + ssize_t remain; if (lrc->bo->vmap.is_iomem) { buf = kmalloc(max_size, GFP_KERNEL); if (!buf) - return -ENOMEM; + return ERR_PTR(-ENOMEM); cmd = buf; + *free = buf; } else { - cmd = lrc->bo->vmap.vaddr + __xe_lrc_wa_bb_offset(lrc); + cmd = lrc->bo->vmap.vaddr + offset; + *free = NULL; } remain = max_size / sizeof(*cmd); - for (size_t i = 0; i < ARRAY_SIZE(funcs); i++) { + for (size_t i = 0; i < num_funcs; i++) { ssize_t len = funcs[i].setup(lrc, hwe, cmd, remain); remain -= len; @@ -1012,23 +1016,45 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) cmd += len; } - *cmd++ = MI_BATCH_BUFFER_END; - - if (buf) { - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, - __xe_lrc_wa_bb_offset(lrc), buf, - (cmd - buf) * sizeof(*cmd)); - kfree(buf); - } - - xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, xe_bo_ggtt_addr(lrc->bo) + - __xe_lrc_wa_bb_offset(lrc) + 1); - - return 0; + return cmd; fail: kfree(buf); - return -ENOSPC; + return ERR_PTR(-ENOSPC); +} + +static void finish_bo(struct xe_lrc *lrc, unsigned int offset, u32 *cmd, + u32 *free) +{ + if (!free) + return; + + xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, offset, free, + (cmd - free) * sizeof(*cmd)); + kfree(free); +} + +static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static const struct bo_setup funcs[] = { + { .setup = wa_bb_setup_utilization }, + }; + unsigned int offset = __xe_lrc_wa_bb_offset(lrc); + u32 *cmd, *buf = NULL; + + cmd = setup_bo(lrc, hwe, LRC_WA_BB_SIZE, offset, funcs, + ARRAY_SIZE(funcs), &buf); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + *cmd++ = MI_BATCH_BUFFER_END; + + finish_bo(lrc, offset, cmd, buf); + + xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, + xe_bo_ggtt_addr(lrc->bo) + offset + 1); + + return 0; } #define PVC_CTX_ASID (0x2e + 1) From 81b79670a37e6d796cbab3d18e77b7540f405067 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 11 Jul 2025 17:01:47 +0100 Subject: [PATCH 167/358] drm/xe: Pass wa bb setup arguments in a struct Group the function arguments in a struct for more readable code and easier extending. Signed-off-by: Tvrtko Ursulin Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250711160153.49833-3-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 91 +++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 2385e22437b3..6f8bef0802d4 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -977,32 +977,39 @@ struct bo_setup { u32 *batch, size_t max_size); }; -static u32 *setup_bo(struct xe_lrc *lrc, - struct xe_hw_engine *hwe, - const size_t max_size, - unsigned int offset, - const struct bo_setup *funcs, - unsigned int num_funcs, - u32 **free) +struct bo_setup_state { + /* Input: */ + struct xe_lrc *lrc; + struct xe_hw_engine *hwe; + size_t max_size; + unsigned int offset; + const struct bo_setup *funcs; + unsigned int num_funcs; + + /* State: */ + u32 *buffer; + u32 *ptr; +}; + +static int setup_bo(struct bo_setup_state *state) { - u32 *cmd, *buf = NULL; ssize_t remain; - if (lrc->bo->vmap.is_iomem) { - buf = kmalloc(max_size, GFP_KERNEL); - if (!buf) - return ERR_PTR(-ENOMEM); - cmd = buf; - *free = buf; + if (state->lrc->bo->vmap.is_iomem) { + state->buffer = kmalloc(state->max_size, GFP_KERNEL); + if (!state->buffer) + return -ENOMEM; + state->ptr = state->buffer; } else { - cmd = lrc->bo->vmap.vaddr + offset; - *free = NULL; + state->ptr = state->lrc->bo->vmap.vaddr + state->offset; + state->buffer = NULL; } - remain = max_size / sizeof(*cmd); + remain = state->max_size / sizeof(u32); - for (size_t i = 0; i < num_funcs; i++) { - ssize_t len = funcs[i].setup(lrc, hwe, cmd, remain); + for (size_t i = 0; i < state->num_funcs; i++) { + ssize_t len = state->funcs[i].setup(state->lrc, state->hwe, + state->ptr, remain); remain -= len; @@ -1010,28 +1017,28 @@ static u32 *setup_bo(struct xe_lrc *lrc, * There should always be at least 1 additional dword for * the end marker */ - if (len < 0 || xe_gt_WARN_ON(lrc->gt, remain < 1)) + if (len < 0 || xe_gt_WARN_ON(state->lrc->gt, remain < 1)) goto fail; - cmd += len; + state->ptr += len; } - return cmd; + return 0; fail: - kfree(buf); - return ERR_PTR(-ENOSPC); + kfree(state->buffer); + return -ENOSPC; } -static void finish_bo(struct xe_lrc *lrc, unsigned int offset, u32 *cmd, - u32 *free) +static void finish_bo(struct bo_setup_state *state) { - if (!free) + if (!state->buffer) return; - xe_map_memcpy_to(gt_to_xe(lrc->gt), &lrc->bo->vmap, offset, free, - (cmd - free) * sizeof(*cmd)); - kfree(free); + xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, + state->offset, state->buffer, + (state->ptr - state->buffer) * sizeof(u32)); + kfree(state->buffer); } static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) @@ -1039,20 +1046,26 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) static const struct bo_setup funcs[] = { { .setup = wa_bb_setup_utilization }, }; - unsigned int offset = __xe_lrc_wa_bb_offset(lrc); - u32 *cmd, *buf = NULL; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = LRC_WA_BB_SIZE, + .offset = __xe_lrc_wa_bb_offset(lrc), + .funcs = funcs, + .num_funcs = ARRAY_SIZE(funcs), + }; + int ret; - cmd = setup_bo(lrc, hwe, LRC_WA_BB_SIZE, offset, funcs, - ARRAY_SIZE(funcs), &buf); - if (IS_ERR(cmd)) - return PTR_ERR(cmd); + ret = setup_bo(&state); + if (ret) + return ret; - *cmd++ = MI_BATCH_BUFFER_END; + *state.ptr++ = MI_BATCH_BUFFER_END; - finish_bo(lrc, offset, cmd, buf); + finish_bo(&state); xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR, - xe_bo_ggtt_addr(lrc->bo) + offset + 1); + xe_bo_ggtt_addr(lrc->bo) + state.offset + 1); return 0; } From 1ec31d355c2d225f50dfb70dcaab07bf3afee0ed Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 11 Jul 2025 17:01:48 +0100 Subject: [PATCH 168/358] drm/xe: Rename utilization workaround emission function Lucas suggested to consolidate to a slightly different naming scheme which will align with the upcoming additions better. Signed-off-by: Tvrtko Ursulin Suggested-by: Lucas De Marchi Cc: Matt Roper Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250711160153.49833-4-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 6f8bef0802d4..bd5dde28fa4b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -944,8 +944,10 @@ static void xe_lrc_finish(struct xe_lrc *lrc) * store it in the PPHSWP. */ #define CONTEXT_ACTIVE 1ULL -static ssize_t wa_bb_setup_utilization(struct xe_lrc *lrc, struct xe_hw_engine *hwe, - u32 *batch, size_t max_len) +static ssize_t setup_utilization_wa(struct xe_lrc *lrc, + struct xe_hw_engine *hwe, + u32 *batch, + size_t max_len) { u32 *cmd = batch; @@ -1044,7 +1046,7 @@ static void finish_bo(struct bo_setup_state *state) static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) { static const struct bo_setup funcs[] = { - { .setup = wa_bb_setup_utilization }, + { .setup = setup_utilization_wa }, }; struct bo_setup_state state = { .lrc = lrc, From 5ce511ad2b1e2c449e26dba11ac5027c1a142e19 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 11 Jul 2025 17:01:49 +0100 Subject: [PATCH 169/358] drm/xe: Track number of written dwords from workaround batch buffer emission Indirect context setup will need to get to the number of written dwords. Lets add it as an output parameter so it can be accessed from the finish helper regardless of whether code is writing directly or via an shadow buffer. Signed-off-by: Tvrtko Ursulin Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250711160153.49833-5-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index bd5dde28fa4b..16921605fa6d 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -991,6 +991,7 @@ struct bo_setup_state { /* State: */ u32 *buffer; u32 *ptr; + unsigned int written; }; static int setup_bo(struct bo_setup_state *state) @@ -1023,6 +1024,7 @@ static int setup_bo(struct bo_setup_state *state) goto fail; state->ptr += len; + state->written += len; } return 0; @@ -1039,7 +1041,7 @@ static void finish_bo(struct bo_setup_state *state) xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap, state->offset, state->buffer, - (state->ptr - state->buffer) * sizeof(u32)); + state->written * sizeof(u32)); kfree(state->buffer); } @@ -1063,6 +1065,7 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) return ret; *state.ptr++ = MI_BATCH_BUFFER_END; + state.written++; finish_bo(&state); From a3397b24ae0045113189424442bcaa14cbc696e2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 11 Jul 2025 17:01:50 +0100 Subject: [PATCH 170/358] drm/xe: Allow specifying number of extra dwords at the end of wa bb emission Indirect context setup will need more than one. Signed-off-by: Tvrtko Ursulin Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250711160153.49833-6-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 16921605fa6d..f9b8e6e94828 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -984,6 +984,7 @@ struct bo_setup_state { struct xe_lrc *lrc; struct xe_hw_engine *hwe; size_t max_size; + size_t reserve_dw; unsigned int offset; const struct bo_setup *funcs; unsigned int num_funcs; @@ -1017,10 +1018,10 @@ static int setup_bo(struct bo_setup_state *state) remain -= len; /* - * There should always be at least 1 additional dword for - * the end marker + * Caller has asked for at least reserve_dw to remain unused. */ - if (len < 0 || xe_gt_WARN_ON(state->lrc->gt, remain < 1)) + if (len < 0 || + xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw)) goto fail; state->ptr += len; @@ -1054,6 +1055,7 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) .lrc = lrc, .hwe = hwe, .max_size = LRC_WA_BB_SIZE, + .reserve_dw = 1, .offset = __xe_lrc_wa_bb_offset(lrc), .funcs = funcs, .num_funcs = ARRAY_SIZE(funcs), From fba12307633933917a799fa2cda5bfc324b3f114 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 11 Jul 2025 17:01:51 +0100 Subject: [PATCH 171/358] drm/xe: Add plumbing for indirect context workarounds Some upcoming workarounds need to be emitted from the indirect workaround context so lets add some plumbing where they will be able to easily slot in. No functional changes for now since everything is still deactivated. Signed-off-by: Tvrtko Ursulin Bspec: 45954 Cc: Lucas De Marchi Cc: Matt Roper Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250711160153.49833-7-tvrtko.ursulin@igalia.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_lrc_layout.h | 5 ++ drivers/gpu/drm/xe/xe_lrc.c | 84 ++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_lrc_types.h | 3 +- 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 994af591a2e8..11417c2d07dd 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -12,6 +12,8 @@ #define CTX_RING_START (0x08 + 1) #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_PER_CTX_PTR (0x12 + 1) +#define CTX_CS_INDIRECT_CTX (0x14 + 1) +#define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1) #define CTX_TIMESTAMP (0x22 + 1) #define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) @@ -36,4 +38,7 @@ #define INDIRECT_CTX_RING_START_UDW (0x08 + 1) #define INDIRECT_CTX_RING_CTL (0x0a + 1) +#define CTX_INDIRECT_CTX_OFFSET_MASK REG_GENMASK(15, 6) +#define CTX_INDIRECT_CTX_OFFSET_DEFAULT REG_FIELD_PREP(CTX_INDIRECT_CTX_OFFSET_MASK, 0xd) + #endif diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index f9b8e6e94828..61b1935d1c46 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -39,6 +39,7 @@ #define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48) #define LRC_PPHWSP_SIZE SZ_4K +#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K #define LRC_WA_BB_SIZE SZ_4K @@ -48,6 +49,12 @@ lrc_to_xe(struct xe_lrc *lrc) return gt_to_xe(lrc->fence_ctx.gt); } +static bool +gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class) +{ + return false; +} + size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class) { struct xe_device *xe = gt_to_xe(gt); @@ -717,7 +724,18 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc) static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc) { - return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_RING_STATE_SIZE; + u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - + LRC_INDIRECT_RING_STATE_SIZE; + + if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX) + offset -= LRC_INDIRECT_CTX_BO_SIZE; + + return offset; +} + +static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc) +{ + return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE; } static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc) @@ -1077,6 +1095,58 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe) return 0; } +static int +setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) +{ + static struct bo_setup rcs_funcs[] = { + }; + struct bo_setup_state state = { + .lrc = lrc, + .hwe = hwe, + .max_size = (63 * 64) /* max 63 cachelines */, + .offset = __xe_lrc_indirect_ctx_offset(lrc), + }; + int ret; + + if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)) + return 0; + + if (hwe->class == XE_ENGINE_CLASS_RENDER || + hwe->class == XE_ENGINE_CLASS_COMPUTE) { + state.funcs = rcs_funcs; + state.num_funcs = ARRAY_SIZE(rcs_funcs); + } + + if (xe_gt_WARN_ON(lrc->gt, !state.funcs)) + return 0; + + ret = setup_bo(&state); + if (ret) + return ret; + + /* + * Align to 64B cacheline so there's no garbage at the end for CS to + * execute: size for indirect ctx must be a multiple of 64. + */ + while (state.written & 0xf) { + *state.ptr++ = MI_NOOP; + state.written++; + } + + finish_bo(&state); + + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX, + (xe_bo_ggtt_addr(lrc->bo) + state.offset) | + /* Size in CLs. */ + (state.written * sizeof(u32) / 64)); + xe_lrc_write_ctx_reg(lrc, + CTX_CS_INDIRECT_CTX_OFFSET, + CTX_INDIRECT_CTX_OFFSET_DEFAULT); + + return 0; +} + #define PVC_CTX_ASID (0x2e + 1) #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) @@ -1086,7 +1156,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, { struct xe_gt *gt = hwe->gt; const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class); - const u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; + u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE; struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; @@ -1101,6 +1171,12 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, lrc->flags = 0; lrc->ring.size = ring_size; lrc->ring.tail = 0; + + if (gt_engine_needs_indirect_ctx(gt, hwe->class)) { + lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX; + bo_size += LRC_INDIRECT_CTX_BO_SIZE; + } + if (xe_gt_has_indirect_ring_state(gt)) lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE; @@ -1225,6 +1301,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (err) goto err_lrc_finish; + err = setup_indirect_ctx(lrc, hwe); + if (err) + goto err_lrc_finish; + return 0; err_lrc_finish: diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h index 2c7c81079801..e9883706e004 100644 --- a/drivers/gpu/drm/xe/xe_lrc_types.h +++ b/drivers/gpu/drm/xe/xe_lrc_types.h @@ -29,7 +29,8 @@ struct xe_lrc { struct xe_gt *gt; /** @flags: LRC flags */ -#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1 +#define XE_LRC_FLAG_INDIRECT_CTX 0x1 +#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2 u32 flags; /** @refcount: ref count of this lrc */ From 02bb63d1a59341032b8e7e4021e18d044bdb1786 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 3 Jul 2025 20:49:52 +0800 Subject: [PATCH 172/358] drm/bridge: Make dp/hdmi_audio_* callback keep the same paramter order with get_modes Make the dp/hdmi_audio_* callback maintain the same parameter order as get_modes and edid_read: first the bridge, then the connector. Signed-off-by: Andy Yan Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250703125027.311109-2-andyshrk@163.com [DB: added the chunk to the cdn-dp driver] Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/adv7511/adv7511.h | 16 ++++----- .../gpu/drm/bridge/adv7511/adv7511_audio.c | 12 +++---- drivers/gpu/drm/bridge/adv7511/adv7511_cec.c | 4 +-- drivers/gpu/drm/bridge/lontium-lt9611.c | 12 +++---- drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c | 12 +++---- .../gpu/drm/display/drm_bridge_connector.c | 18 +++++----- drivers/gpu/drm/msm/dp/dp_audio.c | 8 ++--- drivers/gpu/drm/msm/dp/dp_audio.h | 8 ++--- drivers/gpu/drm/msm/hdmi/hdmi.h | 8 ++--- drivers/gpu/drm/msm/hdmi/hdmi_audio.c | 8 ++--- drivers/gpu/drm/rockchip/cdn-dp-core.c | 12 +++---- include/drm/drm_bridge.h | 36 +++++++++---------- 12 files changed, 77 insertions(+), 77 deletions(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index 71bb64e5f481..85ebead9809c 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -399,8 +399,8 @@ static inline struct adv7511 *bridge_to_adv7511(struct drm_bridge *bridge) } #ifdef CONFIG_DRM_I2C_ADV7511_CEC -int adv7511_cec_init(struct drm_connector *connector, - struct drm_bridge *bridge); +int adv7511_cec_init(struct drm_bridge *bridge, + struct drm_connector *connector); int adv7511_cec_enable(struct drm_bridge *bridge, bool enable); int adv7511_cec_log_addr(struct drm_bridge *bridge, u8 addr); int adv7511_cec_transmit(struct drm_bridge *bridge, u8 attempts, @@ -424,12 +424,12 @@ int adv7533_attach_dsi(struct adv7511 *adv); int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv); #ifdef CONFIG_DRM_I2C_ADV7511_AUDIO -int adv7511_hdmi_audio_startup(struct drm_connector *connector, - struct drm_bridge *bridge); -void adv7511_hdmi_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge); -int adv7511_hdmi_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int adv7511_hdmi_audio_startup(struct drm_bridge *bridge, + struct drm_connector *connector); +void adv7511_hdmi_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector); +int adv7511_hdmi_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms); #else /*CONFIG_DRM_I2C_ADV7511_AUDIO */ diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c index 915c3b967216..766b1c96bc88 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_audio.c @@ -55,8 +55,8 @@ static int adv7511_update_cts_n(struct adv7511 *adv7511) return 0; } -int adv7511_hdmi_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int adv7511_hdmi_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms) { @@ -168,8 +168,8 @@ int adv7511_hdmi_audio_prepare(struct drm_connector *connector, return 0; } -int adv7511_hdmi_audio_startup(struct drm_connector *connector, - struct drm_bridge *bridge) +int adv7511_hdmi_audio_startup(struct drm_bridge *bridge, + struct drm_connector *connector) { struct adv7511 *adv7511 = bridge_to_adv7511(bridge); @@ -206,8 +206,8 @@ int adv7511_hdmi_audio_startup(struct drm_connector *connector, return 0; } -void adv7511_hdmi_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge) +void adv7511_hdmi_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector) { struct adv7511 *adv7511 = bridge_to_adv7511(bridge); diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c index 822265426f58..8ecbc25dc647 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c @@ -346,8 +346,8 @@ static int adv7511_cec_parse_dt(struct device *dev, struct adv7511 *adv7511) return 0; } -int adv7511_cec_init(struct drm_connector *connector, - struct drm_bridge *bridge) +int adv7511_cec_init(struct drm_bridge *bridge, + struct drm_connector *connector) { struct adv7511 *adv7511 = bridge_to_adv7511(bridge); struct device *dev = &adv7511->i2c_main->dev; diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index d6ee79c1e427..ff85ac8130b4 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -936,8 +936,8 @@ lt9611_hdmi_tmds_char_rate_valid(const struct drm_bridge *bridge, return MODE_OK; } -static int lt9611_hdmi_audio_startup(struct drm_connector *connector, - struct drm_bridge *bridge) +static int lt9611_hdmi_audio_startup(struct drm_bridge *bridge, + struct drm_connector *connector) { struct lt9611 *lt9611 = bridge_to_lt9611(bridge); @@ -952,8 +952,8 @@ static int lt9611_hdmi_audio_startup(struct drm_connector *connector, return 0; } -static int lt9611_hdmi_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +static int lt9611_hdmi_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms) { @@ -974,8 +974,8 @@ static int lt9611_hdmi_audio_prepare(struct drm_connector *connector, &hparms->cea); } -static void lt9611_hdmi_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge) +static void lt9611_hdmi_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector) { struct lt9611 *lt9611 = bridge_to_lt9611(bridge); diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c index 7ade80f02a94..f9438e39b94a 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c @@ -440,8 +440,8 @@ static void dw_hdmi_qp_set_sample_rate(struct dw_hdmi_qp *hdmi, unsigned long lo dw_hdmi_qp_set_cts_n(hdmi, cts, n); } -static int dw_hdmi_qp_audio_enable(struct drm_connector *connector, - struct drm_bridge *bridge) +static int dw_hdmi_qp_audio_enable(struct drm_bridge *bridge, + struct drm_connector *connector) { struct dw_hdmi_qp *hdmi = dw_hdmi_qp_from_bridge(bridge); @@ -451,8 +451,8 @@ static int dw_hdmi_qp_audio_enable(struct drm_connector *connector, return 0; } -static int dw_hdmi_qp_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +static int dw_hdmi_qp_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms) { @@ -497,8 +497,8 @@ static void dw_hdmi_qp_audio_disable_regs(struct dw_hdmi_qp *hdmi) AVP_DATAPATH_PACKET_AUDIO_SWDISABLE, GLOBAL_SWDISABLE); } -static void dw_hdmi_qp_audio_disable(struct drm_connector *connector, - struct drm_bridge *bridge) +static void dw_hdmi_qp_audio_disable(struct drm_bridge *bridge, + struct drm_connector *connector) { struct dw_hdmi_qp *hdmi = dw_hdmi_qp_from_bridge(bridge); diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 6cdb432dbc30..717d96530c38 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -463,7 +463,7 @@ static int drm_bridge_connector_audio_startup(struct drm_connector *connector) if (!bridge->funcs->hdmi_audio_startup) return 0; - return bridge->funcs->hdmi_audio_startup(connector, bridge); + return bridge->funcs->hdmi_audio_startup(bridge, connector); } if (bridge_connector->bridge_dp_audio) { @@ -472,7 +472,7 @@ static int drm_bridge_connector_audio_startup(struct drm_connector *connector) if (!bridge->funcs->dp_audio_startup) return 0; - return bridge->funcs->dp_audio_startup(connector, bridge); + return bridge->funcs->dp_audio_startup(bridge, connector); } return -EINVAL; @@ -489,13 +489,13 @@ static int drm_bridge_connector_audio_prepare(struct drm_connector *connector, if (bridge_connector->bridge_hdmi_audio) { bridge = bridge_connector->bridge_hdmi_audio; - return bridge->funcs->hdmi_audio_prepare(connector, bridge, fmt, hparms); + return bridge->funcs->hdmi_audio_prepare(bridge, connector, fmt, hparms); } if (bridge_connector->bridge_dp_audio) { bridge = bridge_connector->bridge_dp_audio; - return bridge->funcs->dp_audio_prepare(connector, bridge, fmt, hparms); + return bridge->funcs->dp_audio_prepare(bridge, connector, fmt, hparms); } return -EINVAL; @@ -509,12 +509,12 @@ static void drm_bridge_connector_audio_shutdown(struct drm_connector *connector) if (bridge_connector->bridge_hdmi_audio) { bridge = bridge_connector->bridge_hdmi_audio; - bridge->funcs->hdmi_audio_shutdown(connector, bridge); + bridge->funcs->hdmi_audio_shutdown(bridge, connector); } if (bridge_connector->bridge_dp_audio) { bridge = bridge_connector->bridge_dp_audio; - bridge->funcs->dp_audio_shutdown(connector, bridge); + bridge->funcs->dp_audio_shutdown(bridge, connector); } } @@ -531,7 +531,7 @@ static int drm_bridge_connector_audio_mute_stream(struct drm_connector *connecto if (!bridge->funcs->hdmi_audio_mute_stream) return -ENOTSUPP; - return bridge->funcs->hdmi_audio_mute_stream(connector, bridge, + return bridge->funcs->hdmi_audio_mute_stream(bridge, connector, enable, direction); } @@ -541,7 +541,7 @@ static int drm_bridge_connector_audio_mute_stream(struct drm_connector *connecto if (!bridge->funcs->dp_audio_mute_stream) return -ENOTSUPP; - return bridge->funcs->dp_audio_mute_stream(connector, bridge, + return bridge->funcs->dp_audio_mute_stream(bridge, connector, enable, direction); } @@ -604,7 +604,7 @@ static int drm_bridge_connector_hdmi_cec_init(struct drm_connector *connector) if (!bridge->funcs->hdmi_cec_init) return 0; - return bridge->funcs->hdmi_cec_init(connector, bridge); + return bridge->funcs->hdmi_cec_init(bridge, connector); } static const struct drm_connector_hdmi_cec_funcs drm_bridge_connector_hdmi_cec_funcs = { diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c index 959cf53be4b8..41018e82efa1 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.c +++ b/drivers/gpu/drm/msm/dp/dp_audio.c @@ -265,8 +265,8 @@ static struct msm_dp_audio_private *msm_dp_audio_get_data(struct msm_dp *msm_dp_ return container_of(msm_dp_audio, struct msm_dp_audio_private, msm_dp_audio); } -int msm_dp_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int msm_dp_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params) { @@ -308,8 +308,8 @@ int msm_dp_audio_prepare(struct drm_connector *connector, return rc; } -void msm_dp_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge) +void msm_dp_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connecter) { struct msm_dp_audio_private *audio; struct msm_dp *msm_dp_display; diff --git a/drivers/gpu/drm/msm/dp/dp_audio.h b/drivers/gpu/drm/msm/dp/dp_audio.h index 842278516c99..ce2342856adb 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.h +++ b/drivers/gpu/drm/msm/dp/dp_audio.h @@ -45,12 +45,12 @@ struct msm_dp_audio *msm_dp_audio_get(struct platform_device *pdev, */ void msm_dp_audio_put(struct msm_dp_audio *msm_dp_audio); -int msm_dp_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int msm_dp_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params); -void msm_dp_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge); +void msm_dp_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector); #endif /* _DP_AUDIO_H_ */ diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h index d5e572d10d6a..1d02d4e1ed5b 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.h +++ b/drivers/gpu/drm/msm/hdmi/hdmi.h @@ -200,12 +200,12 @@ struct hdmi_codec_daifmt; struct hdmi_codec_params; int msm_hdmi_audio_update(struct hdmi *hdmi); -int msm_hdmi_bridge_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int msm_hdmi_bridge_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params); -void msm_hdmi_bridge_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge); +void msm_hdmi_bridge_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector); /* * hdmi bridge: diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c index b9ec14ef2c20..d9a8dc9dae8f 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c @@ -122,8 +122,8 @@ int msm_hdmi_audio_update(struct hdmi *hdmi) return 0; } -int msm_hdmi_bridge_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +int msm_hdmi_bridge_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params) { @@ -163,8 +163,8 @@ int msm_hdmi_bridge_audio_prepare(struct drm_connector *connector, return msm_hdmi_audio_update(hdmi); } -void msm_hdmi_bridge_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge) +void msm_hdmi_bridge_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector) { struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge); struct hdmi *hdmi = hdmi_bridge->hdmi; diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index 24f6b3879f4b..c7e199ba123e 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -743,8 +743,8 @@ static int cdn_dp_parse_dt(struct cdn_dp_device *dp) return 0; } -static int cdn_dp_audio_prepare(struct drm_connector *connector, - struct drm_bridge *bridge, +static int cdn_dp_audio_prepare(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *daifmt, struct hdmi_codec_params *params) { @@ -784,8 +784,8 @@ static int cdn_dp_audio_prepare(struct drm_connector *connector, return ret; } -static void cdn_dp_audio_shutdown(struct drm_connector *connector, - struct drm_bridge *bridge) +static void cdn_dp_audio_shutdown(struct drm_bridge *bridge, + struct drm_connector *connector) { struct cdn_dp_device *dp = bridge_to_dp(bridge); int ret; @@ -801,8 +801,8 @@ static void cdn_dp_audio_shutdown(struct drm_connector *connector, mutex_unlock(&dp->lock); } -static int cdn_dp_audio_mute_stream(struct drm_connector *connector, - struct drm_bridge *bridge, +static int cdn_dp_audio_mute_stream(struct drm_bridge *bridge, + struct drm_connector *connector, bool enable, int direction) { struct cdn_dp_device *dp = bridge_to_dp(bridge); diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index d2454ba83db3..ccead3edf59a 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -817,8 +817,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*hdmi_audio_startup)(struct drm_connector *connector, - struct drm_bridge *bridge); + int (*hdmi_audio_startup)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @hdmi_audio_prepare: @@ -831,8 +831,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*hdmi_audio_prepare)(struct drm_connector *connector, - struct drm_bridge *bridge, + int (*hdmi_audio_prepare)(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms); @@ -847,8 +847,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - void (*hdmi_audio_shutdown)(struct drm_connector *connector, - struct drm_bridge *bridge); + void (*hdmi_audio_shutdown)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @hdmi_audio_mute_stream: @@ -861,12 +861,12 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*hdmi_audio_mute_stream)(struct drm_connector *connector, - struct drm_bridge *bridge, + int (*hdmi_audio_mute_stream)(struct drm_bridge *bridge, + struct drm_connector *connector, bool enable, int direction); - int (*hdmi_cec_init)(struct drm_connector *connector, - struct drm_bridge *bridge); + int (*hdmi_cec_init)(struct drm_bridge *bridge, + struct drm_connector *connector); int (*hdmi_cec_enable)(struct drm_bridge *bridge, bool enable); @@ -886,8 +886,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*dp_audio_startup)(struct drm_connector *connector, - struct drm_bridge *bridge); + int (*dp_audio_startup)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @dp_audio_prepare: @@ -900,8 +900,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*dp_audio_prepare)(struct drm_connector *connector, - struct drm_bridge *bridge, + int (*dp_audio_prepare)(struct drm_bridge *bridge, + struct drm_connector *connector, struct hdmi_codec_daifmt *fmt, struct hdmi_codec_params *hparms); @@ -916,8 +916,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - void (*dp_audio_shutdown)(struct drm_connector *connector, - struct drm_bridge *bridge); + void (*dp_audio_shutdown)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @dp_audio_mute_stream: @@ -930,8 +930,8 @@ struct drm_bridge_funcs { * Returns: * 0 on success, a negative error code otherwise */ - int (*dp_audio_mute_stream)(struct drm_connector *connector, - struct drm_bridge *bridge, + int (*dp_audio_mute_stream)(struct drm_bridge *bridge, + struct drm_connector *connector, bool enable, int direction); /** From 5d156a9c3d5ea3dbec192121259dee2c2f938fa1 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Thu, 3 Jul 2025 20:49:53 +0800 Subject: [PATCH 173/358] drm/bridge: Pass down connector to drm bridge detect hook In some application scenarios, we hope to get the corresponding connector when the bridge's detect hook is invoked. In most cases, we can get the connector by drm_atomic_get_connector_for_encoder if the encoder attached to the bridge is enabled, however there will still be some scenarios where the detect hook of the bridge is called but the corresponding encoder has not been enabled yet. For instance, this occurs when the device is hot plug in for the first time. Since the call to bridge's detect is initiated by the connector, passing down the corresponding connector directly will make things simpler. Signed-off-by: Andy Yan Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250703125027.311109-3-andyshrk@163.com [DB: added the chunk to the cdn-dp driver] Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 3 ++- drivers/gpu/drm/bridge/analogix/anx7625.c | 2 +- drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c | 3 ++- drivers/gpu/drm/bridge/chrontel-ch7033.c | 2 +- drivers/gpu/drm/bridge/display-connector.c | 11 ++++++++--- drivers/gpu/drm/bridge/ite-it6263.c | 3 ++- drivers/gpu/drm/bridge/ite-it6505.c | 2 +- drivers/gpu/drm/bridge/ite-it66121.c | 3 ++- drivers/gpu/drm/bridge/lontium-lt8912b.c | 6 +++--- drivers/gpu/drm/bridge/lontium-lt9611.c | 3 ++- drivers/gpu/drm/bridge/lontium-lt9611uxc.c | 3 ++- .../gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c | 3 ++- drivers/gpu/drm/bridge/sii902x.c | 3 ++- drivers/gpu/drm/bridge/simple-bridge.c | 2 +- drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c | 2 +- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 3 ++- drivers/gpu/drm/bridge/tc358767.c | 5 +++-- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 3 ++- drivers/gpu/drm/bridge/ti-tfp410.c | 2 +- drivers/gpu/drm/bridge/ti-tpd12s015.c | 8 +++++++- drivers/gpu/drm/display/drm_bridge_connector.c | 2 +- drivers/gpu/drm/drm_bridge.c | 5 +++-- drivers/gpu/drm/mediatek/mtk_dp.c | 3 ++- drivers/gpu/drm/mediatek/mtk_hdmi.c | 3 ++- drivers/gpu/drm/msm/dp/dp_drm.c | 3 ++- drivers/gpu/drm/msm/hdmi/hdmi.h | 2 +- drivers/gpu/drm/msm/hdmi/hdmi_bridge.c | 2 +- drivers/gpu/drm/msm/hdmi/hdmi_hpd.c | 4 ++-- drivers/gpu/drm/rockchip/cdn-dp-core.c | 2 +- drivers/gpu/drm/rockchip/rk3066_hdmi.c | 2 +- drivers/gpu/drm/xlnx/zynqmp_dp.c | 3 ++- include/drm/drm_bridge.h | 6 ++++-- 32 files changed, 69 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index f59d19b4b81a..00d6417c177b 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -864,7 +864,8 @@ static int adv7511_bridge_attach(struct drm_bridge *bridge, return ret; } -static enum drm_connector_status adv7511_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +adv7511_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct adv7511 *adv = bridge_to_adv7511(bridge); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 0ac4a82c5a6e..c0ad8f59e483 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -2448,7 +2448,7 @@ anx7625_audio_update_connector_status(struct anx7625_data *ctx, enum drm_connector_status status); static enum drm_connector_status -anx7625_bridge_detect(struct drm_bridge *bridge) +anx7625_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct anx7625_data *ctx = bridge_to_anx7625(bridge); struct device *dev = ctx->dev; diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c index cb5f5a8c539a..a614d1384f71 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-core.c @@ -2143,7 +2143,8 @@ static int cdns_mhdp_atomic_check(struct drm_bridge *bridge, return 0; } -static enum drm_connector_status cdns_mhdp_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +cdns_mhdp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct cdns_mhdp_device *mhdp = bridge_to_mhdp(bridge); diff --git a/drivers/gpu/drm/bridge/chrontel-ch7033.c b/drivers/gpu/drm/bridge/chrontel-ch7033.c index ab9274793356..54d49d4882c8 100644 --- a/drivers/gpu/drm/bridge/chrontel-ch7033.c +++ b/drivers/gpu/drm/bridge/chrontel-ch7033.c @@ -215,7 +215,7 @@ static enum drm_connector_status ch7033_connector_detect( { struct ch7033_priv *priv = conn_to_ch7033_priv(connector); - return drm_bridge_detect(priv->next_bridge); + return drm_bridge_detect(priv->next_bridge, connector); } static const struct drm_connector_funcs ch7033_connector_funcs = { diff --git a/drivers/gpu/drm/bridge/display-connector.c b/drivers/gpu/drm/bridge/display-connector.c index badd2c7f91a1..52b7b5889e6f 100644 --- a/drivers/gpu/drm/bridge/display-connector.c +++ b/drivers/gpu/drm/bridge/display-connector.c @@ -40,8 +40,7 @@ static int display_connector_attach(struct drm_bridge *bridge, return flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR ? 0 : -EINVAL; } -static enum drm_connector_status -display_connector_detect(struct drm_bridge *bridge) +static enum drm_connector_status display_connector_detect(struct drm_bridge *bridge) { struct display_connector *conn = to_display_connector(bridge); @@ -82,6 +81,12 @@ display_connector_detect(struct drm_bridge *bridge) } } +static enum drm_connector_status +display_connector_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) +{ + return display_connector_detect(bridge); +} + static const struct drm_edid *display_connector_edid_read(struct drm_bridge *bridge, struct drm_connector *connector) { @@ -172,7 +177,7 @@ static u32 *display_connector_get_input_bus_fmts(struct drm_bridge *bridge, static const struct drm_bridge_funcs display_connector_bridge_funcs = { .attach = display_connector_attach, - .detect = display_connector_detect, + .detect = display_connector_bridge_detect, .edid_read = display_connector_edid_read, .atomic_get_output_bus_fmts = display_connector_get_output_bus_fmts, .atomic_get_input_bus_fmts = display_connector_get_input_bus_fmts, diff --git a/drivers/gpu/drm/bridge/ite-it6263.c b/drivers/gpu/drm/bridge/ite-it6263.c index c4eedf643f39..cf813672b4ff 100644 --- a/drivers/gpu/drm/bridge/ite-it6263.c +++ b/drivers/gpu/drm/bridge/ite-it6263.c @@ -693,7 +693,8 @@ static int it6263_bridge_attach(struct drm_bridge *bridge, return 0; } -static enum drm_connector_status it6263_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +it6263_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct it6263 *it = bridge_to_it6263(bridge); diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c index b0dc9280d870..89649c17ffad 100644 --- a/drivers/gpu/drm/bridge/ite-it6505.c +++ b/drivers/gpu/drm/bridge/ite-it6505.c @@ -3238,7 +3238,7 @@ static void it6505_bridge_atomic_post_disable(struct drm_bridge *bridge, } static enum drm_connector_status -it6505_bridge_detect(struct drm_bridge *bridge) +it6505_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct it6505 *it6505 = bridge_to_it6505(bridge); diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index 6494f0842793..aa7b1dcc5d70 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -843,7 +843,8 @@ static enum drm_mode_status it66121_bridge_mode_valid(struct drm_bridge *bridge, return MODE_OK; } -static enum drm_connector_status it66121_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +it66121_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct it66121_ctx *ctx = container_of(bridge, struct it66121_ctx, bridge); diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index bd83228b0f0e..342374cb8fc6 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -408,7 +408,7 @@ lt8912_connector_detect(struct drm_connector *connector, bool force) struct lt8912 *lt = connector_to_lt8912(connector); if (lt->hdmi_port->ops & DRM_BRIDGE_OP_DETECT) - return drm_bridge_detect(lt->hdmi_port); + return drm_bridge_detect(lt->hdmi_port, connector); return lt8912_check_cable_status(lt); } @@ -607,12 +607,12 @@ lt8912_bridge_mode_valid(struct drm_bridge *bridge, } static enum drm_connector_status -lt8912_bridge_detect(struct drm_bridge *bridge) +lt8912_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct lt8912 *lt = bridge_to_lt8912(bridge); if (lt->hdmi_port->ops & DRM_BRIDGE_OP_DETECT) - return drm_bridge_detect(lt->hdmi_port); + return drm_bridge_detect(lt->hdmi_port, connector); return lt8912_check_cable_status(lt); } diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index ff85ac8130b4..a2d032ee4744 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -543,7 +543,8 @@ static int lt9611_regulator_enable(struct lt9611 *lt9611) return 0; } -static enum drm_connector_status lt9611_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +lt9611_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct lt9611 *lt9611 = bridge_to_lt9611(bridge); unsigned int reg_val = 0; diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index 766da2cb45a7..38fb8776c0f4 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -353,7 +353,8 @@ static void lt9611uxc_bridge_mode_set(struct drm_bridge *bridge, lt9611uxc_unlock(lt9611uxc); } -static enum drm_connector_status lt9611uxc_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +lt9611uxc_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct lt9611uxc *lt9611uxc = bridge_to_lt9611uxc(bridge); unsigned int reg_val = 0; diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index 81dde9ed7bcf..de57f8a9e98c 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -120,7 +120,8 @@ drm_connector_helper_funcs ge_b850v3_lvds_connector_helper_funcs = { .get_modes = ge_b850v3_lvds_get_modes, }; -static enum drm_connector_status ge_b850v3_lvds_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +ge_b850v3_lvds_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c; diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 882973e90062..d537b1d036fb 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -458,7 +458,8 @@ static int sii902x_bridge_attach(struct drm_bridge *bridge, return 0; } -static enum drm_connector_status sii902x_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +sii902x_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct sii902x *sii902x = bridge_to_sii902x(bridge); diff --git a/drivers/gpu/drm/bridge/simple-bridge.c b/drivers/gpu/drm/bridge/simple-bridge.c index c66bd913e33a..3d15ddd39470 100644 --- a/drivers/gpu/drm/bridge/simple-bridge.c +++ b/drivers/gpu/drm/bridge/simple-bridge.c @@ -90,7 +90,7 @@ simple_bridge_connector_detect(struct drm_connector *connector, bool force) { struct simple_bridge *sbridge = drm_connector_to_simple_bridge(connector); - return drm_bridge_detect(sbridge->next_bridge); + return drm_bridge_detect(sbridge->next_bridge, connector); } static const struct drm_connector_funcs simple_bridge_con_funcs = { diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c index f9438e39b94a..39332c57f2c5 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c @@ -876,7 +876,7 @@ static void dw_hdmi_qp_bridge_atomic_disable(struct drm_bridge *bridge, } static enum drm_connector_status -dw_hdmi_qp_bridge_detect(struct drm_bridge *bridge) +dw_hdmi_qp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct dw_hdmi_qp *hdmi = bridge->driver_private; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 76c6570e2a85..206b099a35e9 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -2978,7 +2978,8 @@ static void dw_hdmi_bridge_atomic_enable(struct drm_bridge *bridge, mutex_unlock(&hdmi->mutex); } -static enum drm_connector_status dw_hdmi_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +dw_hdmi_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct dw_hdmi *hdmi = bridge->driver_private; diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 562fea47b3ec..4097fef4b86b 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1760,7 +1760,8 @@ static const struct drm_connector_helper_funcs tc_connector_helper_funcs = { .get_modes = tc_connector_get_modes, }; -static enum drm_connector_status tc_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +tc_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct tc_data *tc = bridge_to_tc(bridge); bool conn; @@ -1785,7 +1786,7 @@ tc_connector_detect(struct drm_connector *connector, bool force) struct tc_data *tc = connector_to_tc(connector); if (tc->hpd_pin >= 0) - return tc_bridge_detect(&tc->bridge); + return tc_bridge_detect(&tc->bridge, connector); if (tc->panel_bridge) return connector_status_connected; diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 3d0b4bc5129d..575dc2667592 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1155,7 +1155,8 @@ static void ti_sn_bridge_atomic_post_disable(struct drm_bridge *bridge, pm_runtime_put_sync(pdata->dev); } -static enum drm_connector_status ti_sn_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +ti_sn_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct ti_sn65dsi86 *pdata = bridge_to_ti_sn65dsi86(bridge); int val = 0; diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c index 549e8e8edeb4..b80ee089f880 100644 --- a/drivers/gpu/drm/bridge/ti-tfp410.c +++ b/drivers/gpu/drm/bridge/ti-tfp410.c @@ -89,7 +89,7 @@ tfp410_connector_detect(struct drm_connector *connector, bool force) { struct tfp410 *dvi = drm_connector_to_tfp410(connector); - return drm_bridge_detect(dvi->next_bridge); + return drm_bridge_detect(dvi->next_bridge, connector); } static const struct drm_connector_funcs tfp410_con_funcs = { diff --git a/drivers/gpu/drm/bridge/ti-tpd12s015.c b/drivers/gpu/drm/bridge/ti-tpd12s015.c index 0919364e80d1..dcf686c4e73d 100644 --- a/drivers/gpu/drm/bridge/ti-tpd12s015.c +++ b/drivers/gpu/drm/bridge/ti-tpd12s015.c @@ -77,6 +77,12 @@ static enum drm_connector_status tpd12s015_detect(struct drm_bridge *bridge) return connector_status_disconnected; } +static enum drm_connector_status +tpd12s015_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) +{ + return tpd12s015_detect(bridge); +} + static void tpd12s015_hpd_enable(struct drm_bridge *bridge) { struct tpd12s015_device *tpd = to_tpd12s015(bridge); @@ -94,7 +100,7 @@ static void tpd12s015_hpd_disable(struct drm_bridge *bridge) static const struct drm_bridge_funcs tpd12s015_bridge_funcs = { .attach = tpd12s015_attach, .detach = tpd12s015_detach, - .detect = tpd12s015_detect, + .detect = tpd12s015_bridge_detect, .hpd_enable = tpd12s015_hpd_enable, .hpd_disable = tpd12s015_hpd_disable, }; diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 717d96530c38..3e4d561a3a32 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -210,7 +210,7 @@ drm_bridge_connector_detect(struct drm_connector *connector, bool force) enum drm_connector_status status; if (detect) { - status = detect->funcs->detect(detect); + status = detect->funcs->detect(detect, connector); if (hdmi) drm_atomic_helper_connector_hdmi_hotplug(connector, status); diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index 0b450b334afd..dd45d9b504d8 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -1237,12 +1237,13 @@ EXPORT_SYMBOL(drm_atomic_bridge_chain_check); * The detection status on success, or connector_status_unknown if the bridge * doesn't support output detection. */ -enum drm_connector_status drm_bridge_detect(struct drm_bridge *bridge) +enum drm_connector_status +drm_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { if (!(bridge->ops & DRM_BRIDGE_OP_DETECT)) return connector_status_unknown; - return bridge->funcs->detect(bridge); + return bridge->funcs->detect(bridge, connector); } EXPORT_SYMBOL_GPL(drm_bridge_detect); diff --git a/drivers/gpu/drm/mediatek/mtk_dp.c b/drivers/gpu/drm/mediatek/mtk_dp.c index a5b10b2545dc..bef6eeb30d3e 100644 --- a/drivers/gpu/drm/mediatek/mtk_dp.c +++ b/drivers/gpu/drm/mediatek/mtk_dp.c @@ -2118,7 +2118,8 @@ static void mtk_dp_update_plugged_status(struct mtk_dp *mtk_dp) mutex_unlock(&mtk_dp->update_plugged_status_lock); } -static enum drm_connector_status mtk_dp_bdg_detect(struct drm_bridge *bridge) +static enum drm_connector_status +mtk_dp_bdg_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct mtk_dp *mtk_dp = mtk_dp_from_bridge(bridge); enum drm_connector_status ret = connector_status_disconnected; diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c index 6943cdc77dec..845fd8aa43c3 100644 --- a/drivers/gpu/drm/mediatek/mtk_hdmi.c +++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c @@ -1174,7 +1174,8 @@ static void mtk_hdmi_hpd_event(bool hpd, struct device *dev) * Bridge callbacks */ -static enum drm_connector_status mtk_hdmi_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +mtk_hdmi_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge); diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index f222d7ccaa88..9a461ab2f32f 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -20,7 +20,8 @@ * @bridge: Pointer to drm bridge structure * Returns: Bridge's 'is connected' status */ -static enum drm_connector_status msm_dp_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +msm_dp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct msm_dp *dp; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h index 1d02d4e1ed5b..02cfd46df594 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.h +++ b/drivers/gpu/drm/msm/hdmi/hdmi.h @@ -215,7 +215,7 @@ int msm_hdmi_bridge_init(struct hdmi *hdmi); void msm_hdmi_hpd_irq(struct drm_bridge *bridge); enum drm_connector_status msm_hdmi_bridge_detect( - struct drm_bridge *bridge); + struct drm_bridge *bridge, struct drm_connector *connector); void msm_hdmi_hpd_enable(struct drm_bridge *bridge); void msm_hdmi_hpd_disable(struct drm_bridge *bridge); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c index 53a7ce8cc7bc..46fd58646d32 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c @@ -475,7 +475,7 @@ msm_hdmi_hotplug_work(struct work_struct *work) container_of(work, struct hdmi_bridge, hpd_work); struct drm_bridge *bridge = &hdmi_bridge->base; - drm_bridge_hpd_notify(bridge, drm_bridge_detect(bridge)); + drm_bridge_hpd_notify(bridge, drm_bridge_detect(bridge, hdmi_bridge->hdmi->connector)); } /* initialize bridge */ diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c b/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c index 407e6c449ee0..114b0d507700 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi_hpd.c @@ -177,8 +177,8 @@ static enum drm_connector_status detect_gpio(struct hdmi *hdmi) connector_status_disconnected; } -enum drm_connector_status msm_hdmi_bridge_detect( - struct drm_bridge *bridge) +enum drm_connector_status +msm_hdmi_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge); struct hdmi *hdmi = hdmi_bridge->hdmi; diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index c7e199ba123e..b7e3f5dcf8d5 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -233,7 +233,7 @@ static bool cdn_dp_check_sink_connection(struct cdn_dp_device *dp) } static enum drm_connector_status -cdn_dp_bridge_detect(struct drm_bridge *bridge) +cdn_dp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct cdn_dp_device *dp = bridge_to_dp(bridge); enum drm_connector_status status = connector_status_disconnected; diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c index e7875b52f298..ae4a5ac2299a 100644 --- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c +++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c @@ -450,7 +450,7 @@ struct drm_encoder_helper_funcs rk3066_hdmi_encoder_helper_funcs = { }; static enum drm_connector_status -rk3066_hdmi_bridge_detect(struct drm_bridge *bridge) +rk3066_hdmi_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct rk3066_hdmi *hdmi = bridge_to_rk3066_hdmi(bridge); diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index 02e1feaa6115..588dd5610fa5 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -1720,7 +1720,8 @@ static enum drm_connector_status __zynqmp_dp_bridge_detect(struct zynqmp_dp *dp) return connector_status_disconnected; } -static enum drm_connector_status zynqmp_dp_bridge_detect(struct drm_bridge *bridge) +static enum drm_connector_status +zynqmp_dp_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { struct zynqmp_dp *dp = bridge_to_dp(bridge); diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index ccead3edf59a..8ed80cad77ec 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -660,7 +660,8 @@ struct drm_bridge_funcs { * * drm_connector_status indicating the bridge output status. */ - enum drm_connector_status (*detect)(struct drm_bridge *bridge); + enum drm_connector_status (*detect)(struct drm_bridge *bridge, + struct drm_connector *connector); /** * @get_modes: @@ -1382,7 +1383,8 @@ drm_atomic_helper_bridge_propagate_bus_fmt(struct drm_bridge *bridge, u32 output_fmt, unsigned int *num_input_fmts); -enum drm_connector_status drm_bridge_detect(struct drm_bridge *bridge); +enum drm_connector_status +drm_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector); int drm_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *connector); const struct drm_edid *drm_bridge_edid_read(struct drm_bridge *bridge, From 8d4aec43f6e7c52691ffa13c71ca03c6853a0f65 Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Fri, 11 Jul 2025 06:09:24 +0000 Subject: [PATCH 174/358] drm/xe: Update register definitions in LRC layout header Update the register definitions in xe_lrc_layout.h to align with the official hardware specification (Bspec) terminology. Specifically: - rename PVC_CTX_ACC_CTR_THOLD to CTX_ACC_CTR_THOLD - rename PVC_CTX_ASID to CTX_ASID Signed-off-by: Xin Wang Reviewed-by: Stuart Summers Reviewed-by: Lucas De Marchi Link: https://lore.kernel.org/r/20250711060924.7373-1-x.wang@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_lrc_layout.h | 2 ++ drivers/gpu/drm/xe/xe_lrc.c | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h index 11417c2d07dd..1b101edb838b 100644 --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h @@ -17,6 +17,8 @@ #define CTX_TIMESTAMP (0x22 + 1) #define CTX_TIMESTAMP_UDW (0x24 + 1) #define CTX_INDIRECT_RING_STATE (0x26 + 1) +#define CTX_ACC_CTR_THOLD (0x2a + 1) +#define CTX_ASID (0x2e + 1) #define CTX_PDP0_UDW (0x30 + 1) #define CTX_PDP0_LDW (0x32 + 1) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 61b1935d1c46..0769d759b558 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1147,9 +1147,6 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe) return 0; } -#define PVC_CTX_ASID (0x2e + 1) -#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1) - static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_vm *vm, u32 ring_size, u16 msix_vec, u32 init_flags) @@ -1271,7 +1268,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0); if (xe->info.has_asid && vm) - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid); + xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid); lrc->desc = LRC_VALID; lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT); From 7dcae5288a0967493ba1b15e8194cb6bfb1a23ca Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 13 Jul 2025 12:36:19 +0200 Subject: [PATCH 175/358] drm/xe: Combine PF and VF device data into union MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to keep PF and VF data fields fully separate since we can be only in one mode at the time. Move them into a anonymous union to save few bytes. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250713103625.1964-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device_types.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 1e6ecf1e6979..b41dd3e76b91 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -422,10 +422,12 @@ struct xe_device { /** @sriov.__mode: SR-IOV mode (Don't access directly!) */ enum xe_sriov_mode __mode; - /** @sriov.pf: PF specific data */ - struct xe_device_pf pf; - /** @sriov.vf: VF specific data */ - struct xe_device_vf vf; + union { + /** @sriov.pf: PF specific data */ + struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; + }; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; From 159df89564c5d1fcd794e54ad48e1a05c64c7bc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Larumbe?= Date: Fri, 11 Jul 2025 16:45:53 +0100 Subject: [PATCH 176/358] drm/panthor: Remove dead VM flushing code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit ec62d37d2c0d("drm/panthor: Fix the fast-reset logic") did away with the only reference to panthor_vm_flush_all(), so let's get rid of the orphaned definition. Signed-off-by: Adrián Larumbe Reviewed-by: Liviu Dudau Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://lore.kernel.org/r/20250711154557.739326-1-adrian.larumbe@collabora.com --- drivers/gpu/drm/panthor/panthor_mmu.c | 11 ----------- drivers/gpu/drm/panthor/panthor_mmu.h | 1 - 2 files changed, 12 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index b39ea6acc6a9..ed3712f8d6a9 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -885,17 +885,6 @@ static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size) return ret; } -/** - * panthor_vm_flush_all() - Flush L2 caches for the entirety of a VM's AS - * @vm: VM whose cache to flush - * - * Return: 0 on success, a negative error code if flush failed. - */ -int panthor_vm_flush_all(struct panthor_vm *vm) -{ - return panthor_vm_flush_range(vm, vm->base.mm_start, vm->base.mm_range); -} - static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) { struct panthor_device *ptdev = vm->ptdev; diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h index fc274637114e..0e268fdfdb2f 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.h +++ b/drivers/gpu/drm/panthor/panthor_mmu.h @@ -33,7 +33,6 @@ int panthor_vm_active(struct panthor_vm *vm); void panthor_vm_idle(struct panthor_vm *vm); u32 panthor_vm_page_size(struct panthor_vm *vm); int panthor_vm_as(struct panthor_vm *vm); -int panthor_vm_flush_all(struct panthor_vm *vm); struct panthor_heap_pool * panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create); From 73c0e8054fcf36883c1a20d5e2e91fb8ed24d3ea Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 13 Jul 2025 12:36:20 +0200 Subject: [PATCH 177/358] drm/xe: Move PF and VF device types to separate headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We plan to add more PF and VF types and mixing them in a single file is not desired. Move them out to new dedicated files. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250713103625.1964-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device_types.h | 2 ++ drivers/gpu/drm/xe/xe_sriov_pf_types.h | 29 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_types.h | 36 -------------------------- drivers/gpu/drm/xe/xe_sriov_vf_types.h | 27 +++++++++++++++++++ 4 files changed, 58 insertions(+), 36 deletions(-) create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_types.h create mode 100644 drivers/gpu/drm/xe/xe_sriov_vf_types.h diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index b41dd3e76b91..d4d2c6854790 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -21,7 +21,9 @@ #include "xe_platform_types.h" #include "xe_pmu_types.h" #include "xe_pt_types.h" +#include "xe_sriov_pf_types.h" #include "xe_sriov_types.h" +#include "xe_sriov_vf_types.h" #include "xe_step_types.h" #include "xe_survivability_mode_types.h" #include "xe_ttm_vram_mgr_types.h" diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h new file mode 100644 index 000000000000..918dc089eb1d --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_TYPES_H_ +#define _XE_SRIOV_PF_TYPES_H_ + +#include +#include + +/** + * struct xe_device_pf - Xe PF related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_PF mode. + */ +struct xe_device_pf { + /** @device_total_vfs: Maximum number of VFs supported by the device. */ + u16 device_total_vfs; + + /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ + u16 driver_max_vfs; + + /** @master_lock: protects all VFs configurations across GTs */ + struct mutex master_lock; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index ca94382a721e..1a138108d139 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -7,9 +7,6 @@ #define _XE_SRIOV_TYPES_H_ #include -#include -#include -#include /** * VFID - Virtual Function Identifier @@ -40,37 +37,4 @@ enum xe_sriov_mode { }; static_assert(XE_SRIOV_MODE_NONE); -/** - * struct xe_device_pf - Xe PF related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_PF mode. - */ -struct xe_device_pf { - /** @device_total_vfs: Maximum number of VFs supported by the device. */ - u16 device_total_vfs; - - /** @driver_max_vfs: Maximum number of VFs supported by the driver. */ - u16 driver_max_vfs; - - /** @master_lock: protects all VFs configurations across GTs */ - struct mutex master_lock; -}; - -/** - * struct xe_device_vf - Xe Virtual Function related data - * - * The data in this structure is valid only if driver is running in the - * @XE_SRIOV_MODE_VF mode. - */ -struct xe_device_vf { - /** @migration: VF Migration state data */ - struct { - /** @migration.worker: VF migration recovery worker */ - struct work_struct worker; - /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ - unsigned long gt_flags; - } migration; -}; - #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h new file mode 100644 index 000000000000..55c2421d4b2e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_TYPES_H_ +#define _XE_SRIOV_VF_TYPES_H_ + +#include + +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @migration: VF Migration state data */ + struct { + /** @migration.worker: VF migration recovery worker */ + struct work_struct worker; + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ + unsigned long gt_flags; + } migration; +}; + +#endif From 76293a83a9db7fb52e48f5ee320c3c6708f05a8e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 13 Jul 2025 12:36:21 +0200 Subject: [PATCH 178/358] drm/xe: Introduce xe_tile_is_root helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of looking at the tile->id member provide a small helper to explicitly express our intentions. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250713103625.1964-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gsc_proxy.c | 3 ++- drivers/gpu/drm/xe/xe_irq.c | 3 ++- drivers/gpu/drm/xe/xe_tile.h | 6 ++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index d0519cd6704a..464282a89eef 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -23,6 +23,7 @@ #include "xe_map.h" #include "xe_mmio.h" #include "xe_pm.h" +#include "xe_tile.h" /* * GSC proxy: @@ -483,7 +484,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc) } /* no multi-tile devices with this feature yet */ - if (tile->id > 0) { + if (!xe_tile_is_root(tile)) { xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id); return -EINVAL; } diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index c43e62dc692e..cbb1850d0cab 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -23,6 +23,7 @@ #include "xe_mmio.h" #include "xe_pxp.h" #include "xe_sriov.h" +#include "xe_tile.h" /* * Interrupt registers for a unit are always consecutive and ordered @@ -552,7 +553,7 @@ static void xelp_irq_reset(struct xe_tile *tile) static void dg1_irq_reset(struct xe_tile *tile) { - if (tile->id == 0) + if (xe_tile_is_root(tile)) dg1_intr_disable(tile_to_xe(tile)); gt_irq_reset(tile); diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h index 066a3d0cea79..cc33e8733983 100644 --- a/drivers/gpu/drm/xe/xe_tile.h +++ b/drivers/gpu/drm/xe/xe_tile.h @@ -27,4 +27,10 @@ static inline struct drm_pagemap *xe_tile_local_pagemap(struct xe_tile *tile) return NULL; } #endif + +static inline bool xe_tile_is_root(struct xe_tile *tile) +{ + return tile->id == 0; +} + #endif From ffab82b062a8e75f8877de363c9e203be7a241a7 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 13 Jul 2025 12:36:22 +0200 Subject: [PATCH 179/358] drm/xe: Introduce xe_gt_is_main_type helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of checking for not being a media type GT provide a small helper to explicitly express our intentions. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250713103625.1964-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_bb.c | 2 +- drivers/gpu/drm/xe/xe_force_wake.c | 2 +- drivers/gpu/drm/xe/xe_gt.c | 12 ++++----- drivers/gpu/drm/xe/xe_gt.h | 5 ++++ drivers/gpu/drm/xe/xe_gt_idle.c | 2 +- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 30 ++++++++++----------- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 4 +-- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 6 ++--- drivers/gpu/drm/xe/xe_irq.c | 4 +-- drivers/gpu/drm/xe/xe_oa.c | 6 ++--- 10 files changed, 39 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c index 9570672fce33..5ce0e26822f2 100644 --- a/drivers/gpu/drm/xe/xe_bb.c +++ b/drivers/gpu/drm/xe/xe_bb.c @@ -19,7 +19,7 @@ static int bb_prefetch(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); - if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) + if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt)) /* * RCS and CCS require 1K, although other engines would be * okay with 512. diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c index 8a5cba22b586..c59a9b330697 100644 --- a/drivers/gpu/drm/xe/xe_force_wake.c +++ b/drivers/gpu/drm/xe/xe_force_wake.c @@ -64,7 +64,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw) { int i, j; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) init_domain(fw, XE_FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER, FORCEWAKE_ACK_RENDER); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index af03e19ef9be..9dad4f79328e 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -112,7 +112,7 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) if (!fw_ref) return; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); @@ -470,7 +470,7 @@ static int gt_init_with_gt_forcewake(struct xe_gt *gt) xe_gt_mcr_init(gt); xe_gt_enable_host_l2_vram(gt); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt); if (err) goto err_force_wake; @@ -547,7 +547,7 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) if (err) goto err_force_wake; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* * USM has its only SA pool to non-block behind user operations */ @@ -563,7 +563,7 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) } } - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { struct xe_tile *tile = gt_to_tile(gt); tile->migrate = xe_migrate_init(tile); @@ -583,7 +583,7 @@ static int gt_init_with_all_forcewake(struct xe_gt *gt) xe_gt_apply_ccs_mode(gt); } - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) { @@ -780,7 +780,7 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) + if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); if (IS_SRIOV_PF(gt_to_xe(gt))) diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 8128ddfdd788..e9ccab8aedbe 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -106,6 +106,11 @@ static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt) xe_device_uc_enabled(gt_to_xe(gt)); } +static inline bool xe_gt_is_main_type(struct xe_gt *gt) +{ + return gt->info.type == XE_GT_TYPE_MAIN; +} + static inline bool xe_gt_is_media_type(struct xe_gt *gt) { return gt->info.type == XE_GT_TYPE_MEDIA; diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c index c11206410a4d..ffb210216aa9 100644 --- a/drivers/gpu/drm/xe/xe_gt_idle.c +++ b/drivers/gpu/drm/xe/xe_gt_idle.c @@ -121,7 +121,7 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt) if (vcs_mask || vecs_mask) gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE; - if (!xe_gt_is_media_type(gt)) + if (xe_gt_is_main_type(gt)) gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE; if (xe->info.platform != XE_DG1) { diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index d0cf1d80be07..9ba9dc7c4ee9 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -376,7 +376,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) { u64 spare; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -388,7 +388,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt) static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size) { - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); @@ -443,7 +443,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size) int err; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); size = round_up(size, alignment); @@ -492,7 +492,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_ggtt_node *node = config->ggtt_region; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return xe_ggtt_node_allocated(node) ? node->base.size : 0; } @@ -560,7 +560,7 @@ int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size { int err; - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); if (vfid) @@ -622,7 +622,7 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -693,7 +693,7 @@ int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); fair = pf_estimate_fair_ggtt(gt, num_vfs); @@ -1406,7 +1406,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid) static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config) { xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); if (config->lmem_obj) { @@ -1425,7 +1425,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) xe_gt_assert(gt, vfid); xe_gt_assert(gt, IS_DGFX(xe)); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); size = round_up(size, pf_get_lmem_alignment(gt)); @@ -1552,7 +1552,7 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, int err = 0; xe_gt_assert(gt, vfid); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!num_vfs) return 0; @@ -1629,7 +1629,7 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); if (!xe_device_has_lmtt(gt_to_xe(gt))) return 0; @@ -1663,7 +1663,7 @@ int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid, xe_gt_assert(gt, vfid); xe_gt_assert(gt, num_vfs); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs); result = result ?: err; err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs); @@ -1991,7 +1991,7 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid) struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); struct xe_device *xe = gt_to_xe(gt); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_release_vf_config_ggtt(gt, config); if (IS_DGFX(xe)) { pf_release_vf_config_lmem(gt, config); @@ -2082,7 +2082,7 @@ static int pf_sanitize_vf_resources(struct xe_gt *gt, u32 vfid, long timeout) * Only GGTT and LMEM requires to be cleared by the PF. * GuC doorbell IDs and context IDs do not need any clearing. */ - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { pf_sanitize_ggtt(config->ggtt_region, vfid); if (IS_DGFX(xe)) err = pf_sanitize_lmem(tile, config->lmem_obj, timeout); @@ -2149,7 +2149,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); - bool is_primary = !xe_gt_is_media_type(gt); + bool is_primary = xe_gt_is_main_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 13970d5a2867..9b7772928d62 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -305,7 +305,7 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne xe_gt_assert(gt, gt == extract_gt(parent)); xe_gt_assert(gt, vfid == extract_vfid(parent)); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare", 0644, parent, parent, &ggtt_fops); if (xe_device_has_lmtt(gt_to_xe(gt))) @@ -554,7 +554,7 @@ void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) pfdentry->d_inode->i_private = gt; drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor); - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { drm_debugfs_create_files(pf_ggtt_info, ARRAY_SIZE(pf_ggtt_info), pfdentry, minor); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 93cd26dca070..d65eafd91b17 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -552,7 +552,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt) if (unlikely(err)) return err; - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { err = vf_get_lmem_info(gt); if (unlikely(err)) return err; @@ -649,7 +649,7 @@ s64 xe_gt_sriov_vf_ggtt_shift(struct xe_gt *gt) struct xe_gt_sriov_vf_selfconfig *config = >->sriov.vf.self_config; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - xe_gt_assert(gt, !xe_gt_is_media_type(gt)); + xe_gt_assert(gt, xe_gt_is_main_type(gt)); return config->ggtt_shift; } @@ -1036,7 +1036,7 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "GGTT shift on last restore:\t%lld\n", config->ggtt_shift); - if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) { + if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) { string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf)); drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf); } diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index cbb1850d0cab..5df5b8c2a3e4 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -162,7 +162,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) dmask = irqs << 16 | irqs; smask = irqs << 16; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { /* Enable interrupts for each engine class */ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask); if (ccs_mask) @@ -262,7 +262,7 @@ gt_engine_identity(struct xe_device *xe, static void gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir) { - if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt)) + if (instance == OTHER_GUC_INSTANCE && xe_gt_is_main_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt)) return xe_guc_irq_handler(>->uc.guc, iir); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a3379d39f76d..d991fbd90f20 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -2493,7 +2493,7 @@ int xe_oa_register(struct xe_device *xe) static u32 num_oa_units_per_gt(struct xe_gt *gt) { - if (!xe_gt_is_media_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) + if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) return 1; else if (!IS_DGFX(gt_to_xe(gt))) return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ @@ -2506,7 +2506,7 @@ static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) return XE_OA_UNIT_INVALID; - xe_gt_WARN_ON(hwe->gt, !xe_gt_is_media_type(hwe->gt)); + xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt)); if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) return 0; @@ -2589,7 +2589,7 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) for (i = 0; i < num_units; i++) { struct xe_oa_unit *u = >->oa.oa_unit[i]; - if (!xe_gt_is_media_type(gt)) { + if (xe_gt_is_main_type(gt)) { u->regs = __oag_regs(); u->type = DRM_XE_OA_UNIT_TYPE_OAG; } else { From d962178a882a1db2f56953e0f956685a12eeb83f Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 13 Jul 2025 12:36:23 +0200 Subject: [PATCH 180/358] drm/xe/pf: Expose basic info about VFs in debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already have function to print summary about VFs, but we missed to add debugfs attribute to make it visible. Do it now. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250713103625.1964-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_debugfs.c | 4 +++ drivers/gpu/drm/xe/xe_sriov_pf.c | 43 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf.h | 6 +++++ 3 files changed, 53 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 0e26658bcf7e..26e9d146ccbf 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -20,6 +20,7 @@ #include "xe_pm.h" #include "xe_pxp_debugfs.h" #include "xe_sriov.h" +#include "xe_sriov_pf.h" #include "xe_step.h" #include "xe_wa.h" @@ -293,4 +294,7 @@ void xe_debugfs_register(struct xe_device *xe) xe_pxp_debugfs_register(xe->pxp); fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); + + if (IS_SRIOV_PF(xe)) + xe_sriov_pf_debugfs_register(xe, root); } diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 0f721ae17b26..331755843e10 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -3,6 +3,8 @@ * Copyright © 2023-2024 Intel Corporation */ +#include +#include #include #include "xe_assert.h" @@ -102,3 +104,44 @@ void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p) drm_printf(p, "supported: %u\n", xe->sriov.pf.driver_max_vfs); drm_printf(p, "enabled: %u\n", pci_num_vf(pdev)); } + +static int simple_show(struct seq_file *m, void *data) +{ + struct drm_printer p = drm_seq_file_printer(m); + struct drm_info_node *node = m->private; + struct dentry *parent = node->dent->d_parent; + struct xe_device *xe = parent->d_inode->i_private; + void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data; + + print(xe, &p); + return 0; +} + +static const struct drm_info_list debugfs_list[] = { + { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, +}; + +/** + * xe_sriov_pf_debugfs_register - Register PF debugfs attributes. + * @xe: the &xe_device + * @root: the root &dentry + * + * Prepare debugfs attributes exposed by the PF. + */ +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ + struct drm_minor *minor = xe->drm.primary; + struct dentry *parent; + + /* + * /sys/kernel/debug/dri/0/ + * ├── pf + * │   ├── ... + */ + parent = debugfs_create_dir("pf", root); + if (IS_ERR(parent)) + return; + parent->d_inode->i_private = xe; + + drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), parent, minor); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h index d1220e70e1c0..c392c3fcf085 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf.h @@ -8,12 +8,14 @@ #include +struct dentry; struct drm_printer; struct xe_device; #ifdef CONFIG_PCI_IOV bool xe_sriov_pf_readiness(struct xe_device *xe); int xe_sriov_pf_init_early(struct xe_device *xe); +void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root); void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p); #else static inline bool xe_sriov_pf_readiness(struct xe_device *xe) @@ -25,6 +27,10 @@ static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; } + +static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) +{ +} #endif #endif From a6c384b24f13bc3f315c226287601727b1e74969 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 13 Jul 2025 12:36:24 +0200 Subject: [PATCH 181/358] drm/xe/pf: Stop requiring VF/PF version negotiation on every GT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While some VF/PF relay actions must be handled on the GT level, like query for runtime registers, it was clarified by the arch team that initial version negotiation can be done by the VF just once, by using any available GuC/GT. Move handling of the VF/PF ABI version negotiation on the PF side from the GT level functions to the device level functions. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Acked-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250713103625.1964-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/Makefile | 3 +- .../xe/tests/xe_gt_sriov_pf_service_test.c | 232 ------------------ .../drm/xe/tests/xe_sriov_pf_service_kunit.c | 227 +++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c | 7 +- drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c | 5 - drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c | 166 +------------ drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h | 2 - drivers/gpu/drm/xe/xe_sriov_pf.c | 18 +- drivers/gpu/drm/xe/xe_sriov_pf_service.c | 216 ++++++++++++++++ drivers/gpu/drm/xe/xe_sriov_pf_service.h | 23 ++ .../gpu/drm/xe/xe_sriov_pf_service_types.h | 36 +++ drivers/gpu/drm/xe/xe_sriov_pf_types.h | 16 ++ 12 files changed, 548 insertions(+), 403 deletions(-) delete mode 100644 drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c create mode 100644 drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_service.c create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_service.h create mode 100644 drivers/gpu/drm/xe/xe_sriov_pf_service_types.h diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 83a36c47a2f9..74231ca3fb9a 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -163,7 +163,8 @@ xe-$(CONFIG_PCI_IOV) += \ xe_lmtt_2l.o \ xe_lmtt_ml.o \ xe_pci_sriov.o \ - xe_sriov_pf.o + xe_sriov_pf.o \ + xe_sriov_pf_service.o # include helpers for tests even when XE is built-in ifdef CONFIG_DRM_XE_KUNIT_TEST diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c deleted file mode 100644 index b683585db852..000000000000 --- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 AND MIT -/* - * Copyright © 2024 Intel Corporation - */ - -#include - -#include "xe_device.h" -#include "xe_kunit_helpers.h" -#include "xe_pci_test.h" - -static int pf_service_test_init(struct kunit *test) -{ - struct xe_pci_fake_data fake = { - .sriov_mode = XE_SRIOV_MODE_PF, - .platform = XE_TIGERLAKE, /* some random platform */ - .subplatform = XE_SUBPLATFORM_NONE, - }; - struct xe_device *xe; - struct xe_gt *gt; - - test->priv = &fake; - xe_kunit_helper_xe_device_test_init(test); - - xe = test->priv; - KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); - - gt = xe_device_get_gt(xe, 0); - pf_init_versions(gt); - - /* - * sanity check: - * - all supported platforms VF/PF ABI versions must be defined - * - base version can't be newer than latest - */ - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.latest.major); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor, - gt->sriov.pf.service.version.latest.minor); - - test->priv = gt; - return 0; -} - -static void pf_negotiate_any(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY, - VF2PF_HANDSHAKE_MINOR_ANY, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_base_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor); -} - -static void pf_negotiate_base_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor); - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); - KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major); - if (major == gt->sriov.pf.service.version.latest.major) - KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor); - else - KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); -} - -static void pf_negotiate_base_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.base.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major, - gt->sriov.pf.service.version.base.minor - 1, - &major, &minor)); -} - -static void pf_negotiate_base_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_NE(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.base.major - 1, 1, - &major, &minor)); -} - -static void pf_negotiate_latest_match(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_newer(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_next(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major + 1, 0, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor); -} - -static void pf_negotiate_latest_older(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (!gt->sriov.pf.service.version.latest.minor) - kunit_skip(test, "no older minor\n"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major, - gt->sriov.pf.service.version.latest.minor - 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major); - KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1); -} - -static void pf_negotiate_latest_prev(struct kunit *test) -{ - struct xe_gt *gt = test->priv; - u32 major, minor; - - if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major) - kunit_skip(test, "no prev major"); - - KUNIT_ASSERT_EQ(test, 0, - pf_negotiate_version(gt, - gt->sriov.pf.service.version.latest.major - 1, - gt->sriov.pf.service.version.base.minor + 1, - &major, &minor)); - KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1); - KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major); -} - -static struct kunit_case pf_service_test_cases[] = { - KUNIT_CASE(pf_negotiate_any), - KUNIT_CASE(pf_negotiate_base_match), - KUNIT_CASE(pf_negotiate_base_newer), - KUNIT_CASE(pf_negotiate_base_next), - KUNIT_CASE(pf_negotiate_base_older), - KUNIT_CASE(pf_negotiate_base_prev), - KUNIT_CASE(pf_negotiate_latest_match), - KUNIT_CASE(pf_negotiate_latest_newer), - KUNIT_CASE(pf_negotiate_latest_next), - KUNIT_CASE(pf_negotiate_latest_older), - KUNIT_CASE(pf_negotiate_latest_prev), - {} -}; - -static struct kunit_suite pf_service_suite = { - .name = "pf_service", - .test_cases = pf_service_test_cases, - .init = pf_service_test_init, -}; - -kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c new file mode 100644 index 000000000000..ba95e29b597d --- /dev/null +++ b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 AND MIT +/* + * Copyright © 2024-2025 Intel Corporation + */ + +#include + +#include "xe_device.h" +#include "xe_kunit_helpers.h" +#include "xe_pci_test.h" + +static int pf_service_test_init(struct kunit *test) +{ + struct xe_pci_fake_data fake = { + .sriov_mode = XE_SRIOV_MODE_PF, + .platform = XE_TIGERLAKE, /* some random platform */ + .subplatform = XE_SUBPLATFORM_NONE, + }; + struct xe_device *xe; + + test->priv = &fake; + xe_kunit_helper_xe_device_test_init(test); + + xe = test->priv; + KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0); + + xe_sriov_pf_service_init(xe); + /* + * sanity check: + * - all supported platforms VF/PF ABI versions must be defined + * - base version can't be newer than latest + */ + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.latest.major); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.minor, + xe->sriov.pf.service.version.latest.minor); + return 0; +} + +static void pf_negotiate_any(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, VF2PF_HANDSHAKE_MAJOR_ANY, + VF2PF_HANDSHAKE_MINOR_ANY, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_base_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.base.minor); +} + +static void pf_negotiate_base_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_GE(test, minor, xe->sriov.pf.service.version.base.minor); + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); + KUNIT_ASSERT_LE(test, major, xe->sriov.pf.service.version.latest.major); + if (major == xe->sriov.pf.service.version.latest.major) + KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor); + else + KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n"); +} + +static void pf_negotiate_base_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.base.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major, + xe->sriov.pf.service.version.base.minor - 1, + &major, &minor)); +} + +static void pf_negotiate_base_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_NE(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.base.major - 1, 1, + &major, &minor)); +} + +static void pf_negotiate_latest_match(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_newer(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_next(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major + 1, 0, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor); +} + +static void pf_negotiate_latest_older(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (!xe->sriov.pf.service.version.latest.minor) + kunit_skip(test, "no older minor\n"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major, + xe->sriov.pf.service.version.latest.minor - 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major); + KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor - 1); +} + +static void pf_negotiate_latest_prev(struct kunit *test) +{ + struct xe_device *xe = test->priv; + u32 major, minor; + + if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major) + kunit_skip(test, "no prev major"); + + KUNIT_ASSERT_EQ(test, 0, + pf_negotiate_version(xe, + xe->sriov.pf.service.version.latest.major - 1, + xe->sriov.pf.service.version.base.minor + 1, + &major, &minor)); + KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major - 1); + KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major); +} + +static struct kunit_case pf_service_test_cases[] = { + KUNIT_CASE(pf_negotiate_any), + KUNIT_CASE(pf_negotiate_base_match), + KUNIT_CASE(pf_negotiate_base_newer), + KUNIT_CASE(pf_negotiate_base_next), + KUNIT_CASE(pf_negotiate_base_older), + KUNIT_CASE(pf_negotiate_base_prev), + KUNIT_CASE(pf_negotiate_latest_match), + KUNIT_CASE(pf_negotiate_latest_newer), + KUNIT_CASE(pf_negotiate_latest_next), + KUNIT_CASE(pf_negotiate_latest_older), + KUNIT_CASE(pf_negotiate_latest_prev), + {} +}; + +static struct kunit_suite pf_service_suite = { + .name = "pf_service", + .test_cases = pf_service_test_cases, + .init = pf_service_test_init, +}; + +kunit_test_suite(pf_service_suite); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c index 1f50aec3a059..4f7fff892bc0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c @@ -15,10 +15,11 @@ #include "xe_gt_sriov_pf_helpers.h" #include "xe_gt_sriov_pf_migration.h" #include "xe_gt_sriov_pf_monitor.h" -#include "xe_gt_sriov_pf_service.h" #include "xe_gt_sriov_printk.h" #include "xe_guc_ct.h" #include "xe_sriov.h" +#include "xe_sriov_pf_service.h" +#include "xe_tile.h" static const char *control_cmd_to_string(u32 cmd) { @@ -1064,7 +1065,9 @@ static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid) if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA)) return false; - xe_gt_sriov_pf_service_reset(gt, vfid); + if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt)) + xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid); + xe_gt_sriov_pf_monitor_flr(gt, vfid); pf_enter_vf_flr_reset_mmio(gt, vfid); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 9b7772928d62..bf679b21f485 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -77,11 +77,6 @@ static const struct drm_info_list pf_info[] = { .show = xe_gt_debugfs_simple_show, .data = xe_gt_sriov_pf_service_print_runtime, }, - { - "negotiated_versions", - .show = xe_gt_debugfs_simple_show, - .data = xe_gt_sriov_pf_service_print_version, - }, { "adverse_events", .show = xe_gt_debugfs_simple_show, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c index fa74b3e1a964..76dd9233ef9f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c @@ -19,91 +19,7 @@ #include "xe_gt_sriov_pf_service_types.h" #include "xe_guc_ct.h" #include "xe_guc_hxg_helpers.h" - -static void pf_init_versions(struct xe_gt *gt) -{ - BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); - BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); - - /* base versions may differ between platforms */ - gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; - gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; - - /* latest version is same for all platforms */ - gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; - gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_negotiate_version(struct xe_gt *gt, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base; - struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest; - - xe_gt_assert(gt, base.major); - xe_gt_assert(gt, base.major <= latest.major); - xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor)); - - /* VF doesn't care - return our latest */ - if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && - wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants newer than our - return our latest */ - if (wanted_major > latest.major) { - *major = latest.major; - *minor = latest.minor; - return 0; - } - - /* VF wants older than min required - reject */ - if (wanted_major < base.major || - (wanted_major == base.major && wanted_minor < base.minor)) { - return -EPERM; - } - - /* previous major - return wanted, as we should still support it */ - if (wanted_major < latest.major) { - /* XXX: we are not prepared for multi-versions yet */ - xe_gt_assert(gt, base.major == latest.major); - return -ENOPKG; - } - - /* same major - return common minor */ - *major = wanted_major; - *minor = min_t(u32, latest.minor, wanted_minor); - return 0; -} - -static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - xe_gt_assert(gt, major || minor); - - gt->sriov.pf.vfs[vfid].version.major = major; - gt->sriov.pf.vfs[vfid].version.minor = minor; -} - -static void pf_disconnect(struct xe_gt *gt, u32 vfid) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - gt->sriov.pf.vfs[vfid].version.major = 0; - gt->sriov.pf.vfs[vfid].version.minor = 0; -} - -static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor) -{ - xe_gt_sriov_pf_assert_vfid(gt, vfid); - - return major == gt->sriov.pf.vfs[vfid].version.major && - minor <= gt->sriov.pf.vfs[vfid].version.minor; -} +#include "xe_sriov_pf_service.h" static const struct xe_reg tgl_runtime_regs[] = { RPM_CONFIG0, /* _MMIO(0x0d00) */ @@ -285,8 +201,6 @@ int xe_gt_sriov_pf_service_init(struct xe_gt *gt) { int err; - pf_init_versions(gt); - err = pf_alloc_runtime_info(gt); if (unlikely(err)) goto failed; @@ -311,47 +225,6 @@ void xe_gt_sriov_pf_service_update(struct xe_gt *gt) pf_prepare_runtime_info(gt); } -/** - * xe_gt_sriov_pf_service_reset - Reset a connection with the VF. - * @gt: the &xe_gt - * @vfid: the VF identifier - * - * Reset a VF driver negotiated VF/PF ABI version. - * After that point, the VF driver will have to perform new version handshake - * to continue use of the PF services again. - * - * This function can only be called on PF. - */ -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid) -{ - pf_disconnect(gt, vfid); -} - -/* Return: 0 on success or a negative error code on failure. */ -static int pf_process_handshake(struct xe_gt *gt, u32 vfid, - u32 wanted_major, u32 wanted_minor, - u32 *major, u32 *minor) -{ - int err; - - xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n", - vfid, wanted_major, wanted_minor); - - err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor); - - if (err < 0) { - xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n", - vfid, wanted_major, wanted_minor, ERR_PTR(err)); - pf_disconnect(gt, vfid); - } else { - xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n", - vfid, *major, *minor); - pf_connect(gt, vfid, *major, *minor); - } - - return 0; -} - /* Return: length of the response message or a negative error code on failure. */ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, const u32 *request, u32 len, u32 *response, u32 size) @@ -371,7 +244,8 @@ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin, wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]); wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]); - err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor); + err = xe_sriov_pf_service_handshake_vf(gt_to_xe(gt), origin, wanted_major, wanted_minor, + &major, &minor); if (err < 0) return err; @@ -430,8 +304,10 @@ static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin, u32 remaining = 0; int ret; - if (!pf_is_negotiated(gt, origin, 1, 0)) + /* this action is available from ABI 1.0 */ + if (!xe_sriov_pf_service_is_negotiated(gt_to_xe(gt), origin, 1, 0)) return -EACCES; + if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) return -EMSGSIZE; if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN)) @@ -528,33 +404,3 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p return 0; } - -/** - * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs. - * @gt: the &xe_gt - * @p: the &drm_printer - * - * This function is for PF use only. - */ -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p) -{ - struct xe_device *xe = gt_to_xe(gt); - unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); - struct xe_gt_sriov_pf_service_version *version; - - xe_gt_assert(gt, IS_SRIOV_PF(xe)); - - for (n = 1; n <= total_vfs; n++) { - version = >->sriov.pf.vfs[n].version; - if (!version->major && !version->minor) - continue; - - drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor); - } - - return 0; -} - -#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) -#include "tests/xe_gt_sriov_pf_service_test.c" -#endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h index 56aaadf0360d..10b02c9b651c 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h @@ -14,9 +14,7 @@ struct xe_gt; int xe_gt_sriov_pf_service_init(struct xe_gt *gt); void xe_gt_sriov_pf_service_update(struct xe_gt *gt); -void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid); -int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p); int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p); #ifdef CONFIG_PCI_IOV diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c index 331755843e10..afbdd894bd6e 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_sriov_pf.c @@ -12,6 +12,8 @@ #include "xe_module.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_pf_service.h" #include "xe_sriov_printk.h" static unsigned int wanted_max_vfs(struct xe_device *xe) @@ -82,9 +84,22 @@ bool xe_sriov_pf_readiness(struct xe_device *xe) */ int xe_sriov_pf_init_early(struct xe_device *xe) { + int err; + xe_assert(xe, IS_SRIOV_PF(xe)); - return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + xe->sriov.pf.vfs = drmm_kcalloc(&xe->drm, 1 + xe_sriov_pf_get_totalvfs(xe), + sizeof(*xe->sriov.pf.vfs), GFP_KERNEL); + if (!xe->sriov.pf.vfs) + return -ENOMEM; + + err = drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock); + if (err) + return err; + + xe_sriov_pf_service_init(xe); + + return 0; } /** @@ -119,6 +134,7 @@ static int simple_show(struct seq_file *m, void *data) static const struct drm_info_list debugfs_list[] = { { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary }, + { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions }, }; /** diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_sriov_pf_service.c new file mode 100644 index 000000000000..eee3b2a1ba41 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#include "abi/guc_relay_actions_abi.h" + +#include "xe_device_types.h" +#include "xe_sriov.h" +#include "xe_sriov_pf_helpers.h" +#include "xe_sriov_printk.h" + +#include "xe_sriov_pf_service.h" +#include "xe_sriov_pf_service_types.h" + +/** + * xe_sriov_pf_service_init - Early initialization of the SR-IOV PF service. + * @xe: the &xe_device to initialize + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_init(struct xe_device *xe) +{ + BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR); + BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR); + + xe_assert(xe, IS_SRIOV_PF(xe)); + + /* base versions may differ between platforms */ + xe->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR; + xe->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR; + + /* latest version is same for all platforms */ + xe->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR; + xe->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR; +} + +/* Return: 0 on success or a negative error code on failure. */ +static int pf_negotiate_version(struct xe_device *xe, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + struct xe_sriov_pf_service_version base = xe->sriov.pf.service.version.base; + struct xe_sriov_pf_service_version latest = xe->sriov.pf.service.version.latest; + + xe_assert(xe, IS_SRIOV_PF(xe)); + xe_assert(xe, base.major); + xe_assert(xe, base.major <= latest.major); + xe_assert(xe, (base.major < latest.major) || (base.minor <= latest.minor)); + + /* VF doesn't care - return our latest */ + if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY && + wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants newer than our - return our latest */ + if (wanted_major > latest.major) { + *major = latest.major; + *minor = latest.minor; + return 0; + } + + /* VF wants older than min required - reject */ + if (wanted_major < base.major || + (wanted_major == base.major && wanted_minor < base.minor)) { + return -EPERM; + } + + /* previous major - return wanted, as we should still support it */ + if (wanted_major < latest.major) { + /* XXX: we are not prepared for multi-versions yet */ + xe_assert(xe, base.major == latest.major); + return -ENOPKG; + } + + /* same major - return common minor */ + *major = wanted_major; + *minor = min_t(u32, latest.minor, wanted_minor); + return 0; +} + +static void pf_connect(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + xe_assert(xe, major || minor); + + xe->sriov.pf.vfs[vfid].version.major = major; + xe->sriov.pf.vfs[vfid].version.minor = minor; +} + +static void pf_disconnect(struct xe_device *xe, u32 vfid) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + xe->sriov.pf.vfs[vfid].version.major = 0; + xe->sriov.pf.vfs[vfid].version.minor = 0; +} + +/** + * xe_sriov_pf_service_is_negotiated - Check if VF has negotiated given ABI version. + * @xe: the &xe_device + * @vfid: the VF identifier + * @major: the major version to check + * @minor: the minor version to check + * + * Performs early initialization of the SR-IOV PF service. + * + * This function can only be called on PF. + * + * Returns: true if VF can use given ABI version functionality. + */ +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor) +{ + xe_sriov_pf_assert_vfid(xe, vfid); + + return major == xe->sriov.pf.vfs[vfid].version.major && + minor <= xe->sriov.pf.vfs[vfid].version.minor; +} + +/** + * xe_sriov_pf_service_handshake_vf - Confirm a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * @wanted_major: the major service version expected by the VF + * @wanted_minor: the minor service version expected by the VF + * @major: the major service version to be used by the VF + * @minor: the minor service version to be used by the VF + * + * Negotiate a VF/PF ABI version to allow VF use the PF services. + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor) +{ + int err; + + xe_sriov_dbg_verbose(xe, "VF%u wants ABI version %u.%u\n", + vfid, wanted_major, wanted_minor); + + err = pf_negotiate_version(xe, wanted_major, wanted_minor, major, minor); + + if (err < 0) { + xe_sriov_notice(xe, "VF%u failed to negotiate ABI %u.%u (%pe)\n", + vfid, wanted_major, wanted_minor, ERR_PTR(err)); + pf_disconnect(xe, vfid); + } else { + xe_sriov_dbg(xe, "VF%u negotiated ABI version %u.%u\n", + vfid, *major, *minor); + pf_connect(xe, vfid, *major, *minor); + } + + return err; +} + +/** + * xe_sriov_pf_service_reset_vf - Reset a connection with the VF. + * @xe: the &xe_device + * @vfid: the VF identifier + * + * Reset a VF driver negotiated VF/PF ABI version. + * + * After that point, the VF driver will have to perform new version handshake + * to continue use of the PF services again. + * + * This function can only be called on PF. + */ +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid) +{ + pf_disconnect(xe, vfid); +} + +static void print_pf_version(struct drm_printer *p, const char *name, + const struct xe_sriov_pf_service_version *version) +{ + drm_printf(p, "%s:\t%u.%u\n", name, version->major, version->minor); +} + +/** + * xe_sriov_pf_service_print_versions - Print ABI versions negotiated with VFs. + * @xe: the &xe_device + * @p: the &drm_printer + * + * This function is for PF use only. + */ +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p) +{ + unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe); + struct xe_sriov_pf_service_version *version; + char name[8]; + + xe_assert(xe, IS_SRIOV_PF(xe)); + + print_pf_version(p, "base", &xe->sriov.pf.service.version.base); + print_pf_version(p, "latest", &xe->sriov.pf.service.version.latest); + + for (n = 1; n <= total_vfs; n++) { + version = &xe->sriov.pf.vfs[n].version; + if (!version->major && !version->minor) + continue; + + print_pf_version(p, xe_sriov_function_name(n, name, sizeof(name)), version); + } +} + +#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST) +#include "tests/xe_sriov_pf_service_kunit.c" +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_sriov_pf_service.h new file mode 100644 index 000000000000..d38c18f5ed10 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_H_ +#define _XE_SRIOV_PF_SERVICE_H_ + +#include + +struct drm_printer; +struct xe_device; + +void xe_sriov_pf_service_init(struct xe_device *xe); +void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p); + +int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid, + u32 wanted_major, u32 wanted_minor, + u32 *major, u32 *minor); +bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor); +void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid); + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h new file mode 100644 index 000000000000..0835dde358c1 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2025 Intel Corporation + */ + +#ifndef _XE_SRIOV_PF_SERVICE_TYPES_H_ +#define _XE_SRIOV_PF_SERVICE_TYPES_H_ + +#include + +/** + * struct xe_sriov_pf_service_version - VF/PF ABI Version. + * @major: the major version of the VF/PF ABI + * @minor: the minor version of the VF/PF ABI + * + * See `GuC Relay Communication`_. + */ +struct xe_sriov_pf_service_version { + u16 major; + u16 minor; +}; + +/** + * struct xe_sriov_pf_service - Data used by the PF service. + * @version: information about VF/PF ABI versions for current platform. + * @version.base: lowest VF/PF ABI version that could be negotiated with VF. + * @version.latest: latest VF/PF ABI version supported by the PF driver. + */ +struct xe_sriov_pf_service { + struct { + struct xe_sriov_pf_service_version base; + struct xe_sriov_pf_service_version latest; + } version; +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h index 918dc089eb1d..956a88f9f213 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h @@ -9,6 +9,16 @@ #include #include +#include "xe_sriov_pf_service_types.h" + +/** + * struct xe_sriov_metadata - per-VF device level metadata + */ +struct xe_sriov_metadata { + /** @version: negotiated VF/PF ABI version */ + struct xe_sriov_pf_service_version version; +}; + /** * struct xe_device_pf - Xe PF related data * @@ -24,6 +34,12 @@ struct xe_device_pf { /** @master_lock: protects all VFs configurations across GTs */ struct mutex master_lock; + + /** @service: device level service data. */ + struct xe_sriov_pf_service service; + + /** @vfs: metadata for all VFs. */ + struct xe_sriov_metadata *vfs; }; #endif From b533b8e5a1f90aa15bb6e021cbf84cba2ea23e00 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Sun, 13 Jul 2025 12:36:25 +0200 Subject: [PATCH 182/358] drm/xe/vf: Store negotiated VF/PF ABI version at device level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no need to maintain PF ABI version on per-GT level. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250713103625.1964-8-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_vf.c | 28 +++++++++++++---------- drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h | 12 ---------- drivers/gpu/drm/xe/xe_sriov_vf_types.h | 14 ++++++++++++ 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index d65eafd91b17..b282838d59e6 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -686,21 +686,22 @@ static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor) return 0; } -static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor) +static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + xe_assert(xe, IS_SRIOV_VF(xe)); - gt->sriov.vf.pf_version.major = major; - gt->sriov.vf.pf_version.minor = minor; + xe->sriov.vf.pf_version.major = major; + xe->sriov.vf.pf_version.minor = minor; } -static void vf_disconnect_pf(struct xe_gt *gt) +static void vf_disconnect_pf(struct xe_device *xe) { - vf_connect_pf(gt, 0, 0); + vf_connect_pf(xe, 0, 0); } static int vf_handshake_with_pf(struct xe_gt *gt) { + struct xe_device *xe = gt_to_xe(gt); u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR; u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR; u32 major = major_wanted, minor = minor_wanted; @@ -716,13 +717,13 @@ static int vf_handshake_with_pf(struct xe_gt *gt) } xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor); - vf_connect_pf(gt, major, minor); + vf_connect_pf(xe, major, minor); return 0; failed: xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n", major, minor, ERR_PTR(err)); - vf_disconnect_pf(gt); + vf_disconnect_pf(xe); return err; } @@ -775,10 +776,12 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { - xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + struct xe_device *xe = gt_to_xe(gt); - return major == gt->sriov.vf.pf_version.major && - minor <= gt->sriov.vf.pf_version.minor; + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + return major == xe->sriov.vf.pf_version.major && + minor <= xe->sriov.vf.pf_version.minor; } static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs) @@ -1072,9 +1075,10 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p) */ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p) { + struct xe_device *xe = gt_to_xe(gt); struct xe_uc_fw_version *guc_version = >->sriov.vf.guc_version; struct xe_uc_fw_version *wanted = >->sriov.vf.wanted_guc_version; - struct xe_gt_sriov_vf_relay_version *pf_version = >->sriov.vf.pf_version; + struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version; struct xe_uc_fw_version ver; xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h index ef041679e9d4..298dedf4b009 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h @@ -9,16 +9,6 @@ #include #include "xe_uc_fw_types.h" -/** - * struct xe_gt_sriov_vf_relay_version - PF ABI version details. - */ -struct xe_gt_sriov_vf_relay_version { - /** @major: major version. */ - u16 major; - /** @minor: minor version. */ - u16 minor; -}; - /** * struct xe_gt_sriov_vf_selfconfig - VF configuration data. */ @@ -66,8 +56,6 @@ struct xe_gt_sriov_vf { struct xe_uc_fw_version guc_version; /** @self_config: resource configurations. */ struct xe_gt_sriov_vf_selfconfig self_config; - /** @pf_version: negotiated VF/PF ABI version. */ - struct xe_gt_sriov_vf_relay_version pf_version; /** @runtime: runtime data retrieved from the PF. */ struct xe_gt_sriov_vf_runtime runtime; }; diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h index 55c2421d4b2e..8300416a6226 100644 --- a/drivers/gpu/drm/xe/xe_sriov_vf_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h @@ -6,8 +6,19 @@ #ifndef _XE_SRIOV_VF_TYPES_H_ #define _XE_SRIOV_VF_TYPES_H_ +#include #include +/** + * struct xe_sriov_vf_relay_version - PF ABI version details. + */ +struct xe_sriov_vf_relay_version { + /** @major: major version. */ + u16 major; + /** @minor: minor version. */ + u16 minor; +}; + /** * struct xe_device_vf - Xe Virtual Function related data * @@ -15,6 +26,9 @@ * @XE_SRIOV_MODE_VF mode. */ struct xe_device_vf { + /** @pf_version: negotiated VF/PF ABI version. */ + struct xe_sriov_vf_relay_version pf_version; + /** @migration: VF Migration state data */ struct { /** @migration.worker: VF migration recovery worker */ From 1a304a2f8f7dbe25f555721f502227f9197145ed Mon Sep 17 00:00:00 2001 From: Val Packett Date: Sun, 6 Jul 2025 17:50:27 -0300 Subject: [PATCH 183/358] drm/panel-edp: Add BOE NE14QDM panel for Dell Latitude 7455 Cannot confirm which variant exactly it is, as the EDID alphanumeric data contains '0RGNR' <0x80> 'NE14QDM' and ends there; but it's 60 Hz and with touch. I do not have access to datasheets for these panels, so the timing is a guess that was tested to work fine on this laptop. Raw EDID dump: 00 ff ff ff ff ff ff 00 09 e5 1e 0b 00 00 00 00 10 20 01 04 a5 1e 13 78 07 fd 85 a7 53 4c 9b 25 0f 50 54 00 00 00 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 01 a7 6d 00 a0 a0 40 78 60 30 20 36 00 2e bc 10 00 00 1a b9 57 00 a0 a0 40 78 60 30 20 36 00 2e bc 10 00 00 1a 00 00 00 fe 00 30 52 47 4e 52 80 4e 45 31 34 51 44 4d 00 00 00 00 00 02 41 31 a8 00 01 00 00 1a 41 0a 20 20 00 8f Signed-off-by: Val Packett Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250706205723.9790-7-val@packett.cool --- drivers/gpu/drm/panel/panel-edp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 3796c41629cc..9a56e208cbdd 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -1939,6 +1939,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('B', 'O', 'E', 0x0ac5, &delay_200_500_e50, "NV116WHM-N4C"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0ae8, &delay_200_500_e50_p2e80, "NV140WUM-N41"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b09, &delay_200_500_e50_po2e200, "NV140FHM-NZ"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b1e, &delay_200_500_e80, "NE140QDM-N6A"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b34, &delay_200_500_e80, "NV122WUM-N41"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b43, &delay_200_500_e200, "NV140FHM-T09"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b56, &delay_200_500_e80, "NT140FHM-N47"), From 76650bcf2ae49106a9164406c90feba4c3135763 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Jul 2025 13:33:46 -0700 Subject: [PATCH 184/358] drm/xe/lrc: Reduce scope of empty lrc data The only case in which new lrc data is created from scratch is when it's called prior to recording the default lrc. There's no need to check for NULL init_data since in that case the function already failed: just move the allocation where it's needed. Reviewed-by: Tvrtko Ursulin Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250710-lrc-refactors-v2-1-a5e2ca03f6bd@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 0769d759b558..682c5b69fee8 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1157,7 +1157,6 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, struct xe_tile *tile = gt_to_tile(gt); struct xe_device *xe = gt_to_xe(gt); struct iosys_map map; - void *init_data = NULL; u32 arb_enable; u32 bo_flags; int err; @@ -1195,25 +1194,26 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt, hwe->fence_irq, hwe->name); - if (!gt->default_lrc[hwe->class]) { - init_data = empty_lrc_data(hwe); - if (!init_data) { - err = -ENOMEM; - goto err_lrc_finish; - } - } - /* * Init Per-Process of HW status Page, LRC / context state to known - * values + * values. If there's already a primed default_lrc, just copy it, otherwise + * it's the early submission to record the lrc: build a new empty one from + * scratch. */ map = __xe_lrc_pphwsp_map(lrc); - if (!init_data) { + if (gt->default_lrc[hwe->class]) { xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */ xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE, gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE, lrc_size - LRC_PPHWSP_SIZE); } else { + void *init_data = empty_lrc_data(hwe); + + if (!init_data) { + err = -ENOMEM; + goto err_lrc_finish; + } + xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size); kfree(init_data); } From e4cb5823ba3e2668ef5c164898e2aa2c0ad73742 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Jul 2025 13:33:47 -0700 Subject: [PATCH 185/358] drm/xe: Count dwords before allocating The bb allocation in emit_wa_job() is wrong in 2 ways: first it's allocating enough space for the 3DSTATE or hardcoding 4k depending on the engine. In the first case it doesn't account for the WAs and in the former it may not be sufficient. Secondly it's using the size instead of number of dwords, causing the buffer to be 4x bigger than needed: xe_bb_new() receives number of dwords as parameter and its declaration was also not following its implementation. Lastly, reword the debug message since it's not only about the LRC WAs anymore as it also include the 3DSTATE for render. While it's unlikely this is causing any real issue, let's calculate the needed space and allocate just enough. Reviewed-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250710-lrc-refactors-v2-2-a5e2ca03f6bd@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_bb.h | 2 +- drivers/gpu/drm/xe/xe_gt.c | 38 ++++++++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h index fafacd73dcc3..b5cc65506696 100644 --- a/drivers/gpu/drm/xe/xe_bb.h +++ b/drivers/gpu/drm/xe/xe_bb.h @@ -14,7 +14,7 @@ struct xe_gt; struct xe_exec_queue; struct xe_sched_job; -struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm); +struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm); struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb); struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 9dad4f79328e..134d430cce73 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -189,16 +189,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) long timeout; int count_rmw = 0; int count = 0; - - if (q->hwe->class == XE_ENGINE_CLASS_RENDER) - /* Big enough to emit all of the context's 3DSTATE */ - bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false); - else - /* Just pick a large BB size */ - bb = xe_bb_new(gt, SZ_4K, false); - - if (IS_ERR(bb)) - return PTR_ERR(bb); + size_t bb_len = 0; /* count RMW registers as those will be handled separately */ xa_for_each(&sr->xa, idx, entry) { @@ -208,11 +199,30 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) ++count_rmw; } - if (count || count_rmw) - xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name); + if (count) + bb_len += count * 2 + 1; + + if (count_rmw) + bb_len += count_rmw * 20 + 7; + + if (q->hwe->class == XE_ENGINE_CLASS_RENDER) + /* + * Big enough to emit all of the context's 3DSTATE via + * xe_lrc_emit_hwe_state_instructions() + */ + bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32); + + xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len); + + bb = xe_bb_new(gt, bb_len, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); if (count) { - /* emit single LRI with all non RMW regs */ + /* + * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per + * reg + 1 + */ bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); @@ -236,7 +246,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) } if (count_rmw) { - /* emit MI_MATH for each RMW reg */ + /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */ xa_for_each(&sr->xa, idx, entry) { if (entry->reg.masked || entry->clr_bits == ~0) From fab2cc0c09fd4e6ebfa645af0914fd5917478e7f Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Jul 2025 13:33:48 -0700 Subject: [PATCH 186/358] drm/xe/gt: Extract emit_job_sync() Both the nop and wa jobs are going through the same boiler plate calls to emit the job with a timeout and handling error for both bb and job. Extract emit_job_sync() so those functions create the bb, handling possible errors and delegate the part about really emitting the job and waiting for its completion. Reviewed-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250710-lrc-refactors-v2-3-a5e2ca03f6bd@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 54 ++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 134d430cce73..08be659b9334 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -146,30 +146,23 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) static void gt_reset_worker(struct work_struct *w); -static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb, + long timeout_jiffies) { struct xe_sched_job *job; - struct xe_bb *bb; struct dma_fence *fence; long timeout; - bb = xe_bb_new(gt, 4, false); - if (IS_ERR(bb)) - return PTR_ERR(bb); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); + if (IS_ERR(job)) return PTR_ERR(job); - } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); + timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies); dma_fence_put(fence); - xe_bb_free(bb, NULL); if (timeout < 0) return timeout; else if (!timeout) @@ -178,17 +171,28 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) return 0; } +static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q) +{ + struct xe_bb *bb; + int ret; + + bb = xe_bb_new(gt, 4, false); + if (IS_ERR(bb)) + return PTR_ERR(bb); + + ret = emit_job_sync(q, bb, HZ); + xe_bb_free(bb, NULL); + + return ret; +} + static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) { struct xe_reg_sr *sr = &q->hwe->reg_lrc; struct xe_reg_sr_entry *entry; + int count_rmw = 0, count = 0, ret; unsigned long idx; - struct xe_sched_job *job; struct xe_bb *bb; - struct dma_fence *fence; - long timeout; - int count_rmw = 0; - int count = 0; size_t bb_len = 0; /* count RMW registers as those will be handled separately */ @@ -293,25 +297,11 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) xe_lrc_emit_hwe_state_instructions(q, bb); - job = xe_bb_create_job(q, bb); - if (IS_ERR(job)) { - xe_bb_free(bb, NULL); - return PTR_ERR(job); - } + ret = emit_job_sync(q, bb, HZ); - xe_sched_job_arm(job); - fence = dma_fence_get(&job->drm.s_fence->finished); - xe_sched_job_push(job); - - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); xe_bb_free(bb, NULL); - if (timeout < 0) - return timeout; - else if (!timeout) - return -ETIME; - return 0; + return ret; } int xe_gt_record_default_lrcs(struct xe_gt *gt) From 6d891d22c627adaf9c759bceab0fc44f5ed6b8ae Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Jul 2025 13:33:50 -0700 Subject: [PATCH 187/358] drm/xe/lrc: Remove leftover TODO/FIXME There isn't anything to set for CTX_TIMESTAMP handling in the empty LRC: that is set on every LRC init since it should always start from 0 rather than the value saved in the image after first submission. The FIXME about perma-pinning also doesn't make much sense as we will always going to pin the lrc and the GGTT mapping has nothing to do with VM bind. Nuke these leftover comments. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250710-lrc-refactors-v2-5-a5e2ca03f6bd@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 682c5b69fee8..8bb2ca3af7d1 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -589,8 +589,6 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe) if (xe_gt_has_indirect_ring_state(hwe->gt)) regs[CTX_CONTEXT_CONTROL] |= _MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE); - - /* TODO: Timestamp */ } static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe) @@ -1181,10 +1179,6 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe, if (vm && vm->xef) /* userspace */ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE; - /* - * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address - * via VM bind calls. - */ lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, ttm_bo_type_kernel, bo_flags); From f4b538245f6a52fbe196b44a95c02aae566e3fb5 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Jul 2025 13:33:51 -0700 Subject: [PATCH 188/358] drm/xe/gt: Drop third submission for default context There's no need to submit the nop job again on the first queue. Any state needed is already saved when the first LRC is switched out. The comment is a little misleading regarding indirect W/A: first of all there's still no indirect W/A enabled and secondly, even after they are, there's no need to submit this job again for having their state propagated: the indirect W/A will actually run on every LRC switch. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250710-lrc-refactors-v2-6-a5e2ca03f6bd@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 08be659b9334..1cd1b551eb1f 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -363,14 +363,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt) goto put_nop_q; } - /* Reload golden LRC to record the effect of any indirect W/A */ - err = emit_nop_job(gt, q); - if (err) { - xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n", - hwe->name, ERR_PTR(err), q->guc->id); - goto put_nop_q; - } - xe_map_memcpy_from(xe, default_lrc, &q->lrc[0]->bo->vmap, xe_lrc_pphwsp_offset(q->lrc[0]), From aded26ccaaa87ec9d7665eb7be25be4c70672b28 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 10 Jul 2025 13:33:52 -0700 Subject: [PATCH 189/358] drm/xe: Waste fewer instructions in emit_wa_job() I was debugging some unrelated issue and noticed the current code was very verbose. We can improve it easily by using the more common batch buffer building pattern. Before: bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; c4d: 41 8b 56 10 mov 0x10(%r14),%edx c51: 49 8b 4e 08 mov 0x8(%r14),%rcx c55: 8d 72 01 lea 0x1(%rdx),%esi c58: 41 89 76 10 mov %esi,0x10(%r14) c5c: c7 04 91 01 00 08 15 movl $0x15080001,(%rcx,%rdx,4) bb->cs[bb->len++] = entry->reg.addr; c63: 8b 08 mov (%rax),%ecx c65: 41 8b 56 10 mov 0x10(%r14),%edx c69: 49 8b 76 08 mov 0x8(%r14),%rsi c6d: 81 e1 ff ff 3f 00 and $0x3fffff,%ecx c73: 8d 7a 01 lea 0x1(%rdx),%edi c76: 41 89 7e 10 mov %edi,0x10(%r14) c7a: 89 0c 96 mov %ecx,(%rsi,%rdx,4) ..etc.. After: *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; c52: 41 c7 04 24 01 00 08 movl $0x15080001,(%r12) c59: 15 *cs++ = entry->reg.addr; c5a: 8b 10 mov (%rax),%edx ..etc.. Resulting in the following binary change: add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-348 (-348) Function old new delta xe_gt_record_default_lrcs.cold 304 296 -8 xe_gt_record_default_lrcs 2200 1860 -340 Total: Before=13554, After=13206, chg -2.57% Signed-off-by: Tvrtko Ursulin Reviewed-by: Lucas De Marchi Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250710-lrc-refactors-v2-7-a5e2ca03f6bd@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 70 ++++++++++++++++++++----------------- drivers/gpu/drm/xe/xe_lrc.c | 12 ++++--- drivers/gpu/drm/xe/xe_lrc.h | 2 +- 3 files changed, 46 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 1cd1b551eb1f..e6c11e642901 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -194,6 +194,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) unsigned long idx; struct xe_bb *bb; size_t bb_len = 0; + u32 *cs; /* count RMW registers as those will be handled separately */ xa_for_each(&sr->xa, idx, entry) { @@ -222,13 +223,15 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (IS_ERR(bb)) return PTR_ERR(bb); + cs = bb->cs; + if (count) { /* * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per * reg + 1 */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count); xa_for_each(&sr->xa, idx, entry) { struct xe_reg reg = entry->reg; @@ -243,8 +246,8 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) val |= entry->set_bits; - bb->cs[bb->len++] = reg.addr; - bb->cs[bb->len++] = val; + *cs++ = reg.addr; + *cs++ = val; xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val); } } @@ -256,46 +259,49 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q) if (entry->reg.masked || entry->clr_bits == ~0) continue; - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; - bb->cs[bb->len++] = entry->reg.addr; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO; + *cs++ = entry->reg.addr; + *cs++ = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | - MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = entry->clr_bits; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = entry->set_bits; + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = entry->clr_bits; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = entry->set_bits; - bb->cs[bb->len++] = MI_MATH(8); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1); - bb->cs[bb->len++] = CS_ALU_INSTR_AND; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0); - bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2); - bb->cs[bb->len++] = CS_ALU_INSTR_OR; - bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = MI_MATH(8); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1); + *cs++ = CS_ALU_INSTR_AND; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); + *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0); + *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2); + *cs++ = CS_ALU_INSTR_OR; + *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU); - bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = entry->reg.addr; + *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = entry->reg.addr; xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n", entry->reg.addr, entry->clr_bits, entry->set_bits); } /* reset used GPR */ - bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO; - bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr; - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr; - bb->cs[bb->len++] = 0; + *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | + MI_LRI_LRM_CS_MMIO; + *cs++ = CS_GPR_REG(0, 0).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 1).addr; + *cs++ = 0; + *cs++ = CS_GPR_REG(0, 2).addr; + *cs++ = 0; } - xe_lrc_emit_hwe_state_instructions(q, bb); + cs = xe_lrc_emit_hwe_state_instructions(q, cs); + + bb->len = cs - bb->cs; ret = emit_job_sync(q, bb, HZ); diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 8bb2ca3af7d1..3a169011a2ad 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1888,7 +1888,7 @@ static const struct instr_state xe_hpg_svg_state[] = { { .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 }, }; -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb) +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs) { struct xe_gt *gt = q->hwe->gt; struct xe_device *xe = gt_to_xe(gt); @@ -1923,7 +1923,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b if (!state_table) { xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n", GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100); - return; + return cs; } for (int i = 0; i < state_table_size; i++) { @@ -1946,12 +1946,14 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b instr == CMD_3DSTATE_DRAWING_RECTANGLE) instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST; - bb->cs[bb->len] = instr; + *cs = instr; if (!is_single_dw) - bb->cs[bb->len] |= (num_dw - 2); + *cs |= (num_dw - 2); - bb->len += num_dw; + cs += num_dw; } + + return cs; } struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index eb6e8de8c939..b6c8053c581b 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -112,7 +112,7 @@ void xe_lrc_dump_default(struct drm_printer *p, struct xe_gt *gt, enum xe_engine_class); -void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb); +u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs); struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc); void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot); From f4d51b6ce51dfa9c0a238416f47dfe4726be4e70 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 10 Jul 2025 13:33:49 -0700 Subject: [PATCH 190/358] drm/xe/lrc: Add table with LRC layout Add a table to document the LRC's BO layout to make it easier to visualize how each region stacks on top of each other. Reviewed-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250710-lrc-refactors-v2-4-a5e2ca03f6bd@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_lrc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 3a169011a2ad..6d38411bdeba 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -43,6 +43,30 @@ #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K #define LRC_WA_BB_SIZE SZ_4K +/* + * Layout of the LRC and associated data allocated as + * lrc->bo: + * + * Region Size + * +============================+=================================+ <- __xe_lrc_ring_offset() + * | Ring | ring_size, see | + * | | xe_lrc_init() | + * +============================+=================================+ <- __xe_lrc_pphwsp_offset() + * | PPHWSP (includes SW state) | 4K | + * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset() + * | Engine Context Image | n * 4K, see | + * | | xe_gt_lrc_size() | + * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset() + * | Indirect Ring State Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_RING_STATE | + * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset() + * | Indirect Context Page | 0 or 4k, see | + * | | XE_LRC_FLAG_INDIRECT_CTX | + * +============================+=================================+ <- __xe_lrc_wa_bb_offset() + * | WA BB Per Ctx | 4k | + * +============================+=================================+ <- xe_bo_size(lrc->bo) + */ + static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) { From fd5fde69fd413b03e25fb3b8e2ce375c02e74b0d Mon Sep 17 00:00:00 2001 From: Diederik de Haas Date: Wed, 9 Jul 2025 15:15:16 +0200 Subject: [PATCH 191/358] dt-bindings: display: rockchip,dw-mipi-dsi: Drop address/size cells The "rockchip,dw-mipi-dsi" binding has allOf "snps,dw-mipi-dsi.yaml" which has allOf "dsi-controller.yaml", which already has #address-cells and #size-cells defined as '1' and '0' respectively. So drop this re-definition. Signed-off-by: Diederik de Haas Reviewed-by: "Rob Herring (Arm)" Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20250709132323.128757-4-didi.debian@cknow.org --- .../bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml index ccd71c5324af..0881e82deb11 100644 --- a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml +++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-mipi-dsi.yaml @@ -58,12 +58,6 @@ properties: power-domains: maxItems: 1 - "#address-cells": - const: 1 - - "#size-cells": - const: 0 - required: - compatible - clocks From 9f50b729dd61dfb9f4d7c66900d22a7c7353a8c0 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 11 Jul 2025 21:33:11 +0200 Subject: [PATCH 192/358] drm/xe/pf: Prepare to stop SR-IOV support prior GT reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of the resume or GT reset, the PF driver schedules work which is then used to complete restarting of the SR-IOV support, including resending to the GuC configurations of provisioned VFs. However, in case of short delay between those two actions, which could be seen by triggering a GT reset on the suspened device: $ echo 1 > /sys/kernel/debug/dri/0000:00:02.0/gt0/force_reset this PF worker might be still busy, which lead to errors due to just stopped or disabled GuC CTB communication: [ ] xe 0000:00:02.0: [drm:xe_gt_resume [xe]] GT0: resumed [ ] xe 0000:00:02.0: [drm] GT0: trying reset from force_reset_show [xe] [ ] xe 0000:00:02.0: [drm] GT0: reset queued [ ] xe 0000:00:02.0: [drm] GT0: reset started [ ] xe 0000:00:02.0: [drm:guc_ct_change_state [xe]] GT0: GuC CT communication channel stopped [ ] xe 0000:00:02.0: [drm:guc_ct_send_recv [xe]] GT0: H2G request 0x5503 canceled! [ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push VF1 12 config KLVs (-ECANCELED) [ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push VF1 configuration (-ECANCELED) [ ] xe 0000:00:02.0: [drm:guc_ct_change_state [xe]] GT0: GuC CT communication channel disabled [ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push VF2 12 config KLVs (-ENODEV) [ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push VF2 configuration (-ENODEV) [ ] xe 0000:00:02.0: [drm] GT0: PF: Failed to push 2 of 2 VFs configurations [ ] xe 0000:00:02.0: [drm:pf_worker_restart_func [xe]] GT0: PF: restart completed While this VFs reprovisioning will be successful during next spin of the worker, to avoid those errors, make sure to cancel restart worker if we are about to trigger next reset. Fixes: 411220808cee ("drm/xe/pf: Restart VFs provisioning after GT reset") Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250711193316.1920-2-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt.c | 3 +++ drivers/gpu/drm/xe/xe_gt_sriov_pf.c | 19 +++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_sriov_pf.h | 5 +++++ 3 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index e6c11e642901..c8eda36546d3 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -833,6 +833,9 @@ static int gt_reset(struct xe_gt *gt) goto err_out; } + if (IS_SRIOV_PF(gt_to_xe(gt))) + xe_gt_sriov_pf_stop_prepare(gt); + xe_uc_gucrc_disable(>->uc); xe_uc_stop_prepare(>->uc); xe_gt_pagefault_reset(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index c08efca6420e..35489fa81825 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -172,6 +172,25 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid) pf_clear_vf_scratch_regs(gt, vfid); } +static void pf_cancel_restart(struct xe_gt *gt) +{ + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + + if (cancel_work_sync(>->sriov.pf.workers.restart)) + xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n"); +} + +/** + * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support. + * @gt: the &xe_gt + * + * This function can only be called on the PF. + */ +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ + pf_cancel_restart(gt); +} + static void pf_restart(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index f474509411c0..e2b2ff8132dc 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -13,6 +13,7 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt); int xe_gt_sriov_pf_init(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); +void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt); void xe_gt_sriov_pf_restart(struct xe_gt *gt); #else static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) @@ -29,6 +30,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } +static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt) +{ +} + static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt) { } From 1c38dd6afa4a8ecce28e94da794fd1d205c30f51 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 11 Jul 2025 21:33:12 +0200 Subject: [PATCH 193/358] drm/xe/pf: Resend PF provisioning after GT reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we reload the GuC due to suspend/resume or GT reset then we have to resend not only any VFs provisioning data, but also PF configuration, like scheduling parameters (EQ, PT), as otherwise GuC will continue to use default values. Fixes: 411220808cee ("drm/xe/pf: Restart VFs provisioning after GT reset") Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250711193316.1920-3-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 9ba9dc7c4ee9..79b364fbe06d 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -2366,6 +2366,21 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static int pf_push_self_config(struct xe_gt *gt) +{ + int err; + + err = pf_push_full_vf_config(gt, PFID); + if (err) { + xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n", + ERR_PTR(err)); + return err; + } + + xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n"); + return 0; +} + static void fini_config(void *arg) { struct xe_gt *gt = arg; @@ -2389,9 +2404,17 @@ static void fini_config(void *arg) int xe_gt_sriov_pf_config_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); + int err; xe_gt_assert(gt, IS_SRIOV_PF(xe)); + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + if (err) + return err; + return devm_add_action_or_reset(xe->drm.dev, fini_config, gt); } @@ -2409,6 +2432,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt) unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt)); unsigned int fail = 0, skip = 0; + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_push_self_config(gt); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + for (n = 1; n <= total_vfs; n++) { if (xe_gt_sriov_pf_config_is_empty(gt, n)) skip++; From 92ba2032a18dd61ca65ac832134217493fbc8bcf Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 11 Jul 2025 21:33:13 +0200 Subject: [PATCH 194/358] drm/xe/pf: Move GGTT config KLVs encoding to helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In upcoming patch we will want to encode GGTT config KLVs based on raw numbers, without relying on the allocated GGTT node. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250711193316.1920-4-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 31 ++++++++++++++-------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 79b364fbe06d..e7aea89e7215 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -238,25 +238,34 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i } /* Return: number of configuration dwords written */ -static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +static u32 encode_ggtt(u32 *cfg, u64 start, u64 size, bool details) { u32 n = 0; - if (xe_ggtt_node_allocated(config->ggtt_region)) { - if (details) { - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); - cfg[n++] = lower_32_bits(config->ggtt_region->base.start); - cfg[n++] = upper_32_bits(config->ggtt_region->base.start); - } - - cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); - cfg[n++] = lower_32_bits(config->ggtt_region->base.size); - cfg[n++] = upper_32_bits(config->ggtt_region->base.size); + if (details) { + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START); + cfg[n++] = lower_32_bits(start); + cfg[n++] = upper_32_bits(start); } + cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE); + cfg[n++] = lower_32_bits(size); + cfg[n++] = upper_32_bits(size); + return n; } +/* Return: number of configuration dwords written */ +static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) +{ + struct xe_ggtt_node *node = config->ggtt_region; + + if (!xe_ggtt_node_allocated(node)) + return 0; + + return encode_ggtt(cfg, node->base.start, node->base.size, details); +} + /* Return: number of configuration dwords written */ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details) { From 68ae022278a1a756e1bd9bdd56ba8702eece4558 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 11 Jul 2025 21:33:14 +0200 Subject: [PATCH 195/358] drm/xe/pf: Force GuC virtualization mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default the GuC starts in the 'native' mode and enables the VGT mode (aka 'virtualization' mode) only after it receives at least one set of VF configuration data. While this happens naturally while PF begins VFs provisioning, we might need this sooner as some actions, like TLB_INVALIDATION_ALL(0x7002), is supported by the GuC only in the VGT mode. And this becomes a real problem if we would want to use above action to invalidate the LMTT early during VFs auto-provisioning, before VFs are enabled, as such H2G would be rejected: [ ] xe 0000:4d:00.0: [drm] *ERROR* GT0: FAST_REQ H2G fence 0x804e failed! e=0x30, h=0 [ ] xe 0000:4d:00.0: [drm] *ERROR* GT0: Fence 0x804e was used by action 0x7002 sent at: h2g_write+0x33e/0x870 [xe] __guc_ct_send_locked+0x1e1/0x1110 [xe] guc_ct_send_locked+0x9f/0x740 [xe] xe_guc_ct_send_locked+0x19/0x60 [xe] send_tlb_invalidation+0xc2/0x470 [xe] xe_gt_tlb_invalidation_all_async+0x45/0xa0 [xe] xe_gt_tlb_invalidation_all+0x4b/0xa0 [xe] lmtt_invalidate_hw+0x64/0x1a0 [xe] xe_lmtt_invalidate_hw+0x5c/0x340 [xe] pf_update_vf_lmtt+0x398/0xae0 [xe] pf_provision_vf_lmem+0x350/0xa60 [xe] xe_gt_sriov_pf_config_bulk_set_lmem+0xe2/0x410 [xe] xe_gt_sriov_pf_config_set_fair_lmem+0x1c6/0x620 [xe] xe_gt_sriov_pf_config_set_fair+0xd5/0x3f0 [xe] xe_pci_sriov_configure+0x360/0x1200 [xe] sriov_numvfs_store+0xbc/0x1d0 dev_attr_store+0x17/0x40 sysfs_kf_write+0x4a/0x80 kernfs_fop_write_iter+0x166/0x220 vfs_write+0x2ba/0x580 ksys_write+0x77/0x100 __x64_sys_write+0x19/0x30 x64_sys_call+0x2bf/0x2660 do_syscall_64+0x93/0x7a0 entry_SYSCALL_64_after_hwframe+0x76/0x7e [ ] xe 0000:4d:00.0: [drm] *ERROR* GT0: CT dequeue failed: -71 [ ] xe 0000:4d:00.0: [drm] GT0: trying reset from receive_g2h [xe] This could be mitigated by pushing earlier a PF self-configuration with some hard-coded values that cover unlimited access to the GGTT, use of all GuC contexts and doorbells. This step is sufficient for the GuC to switch into the VGT mode. Signed-off-by: Michal Wajdeczko Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250711193316.1920-5-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index e7aea89e7215..f2500b9f2726 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -341,6 +341,17 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid) } xe_gt_assert(gt, num_dwords <= max_cfg_dwords); + if (vfid == PFID) { + u64 ggtt_start = xe_wopcm_size(gt_to_xe(gt)); + u64 ggtt_size = gt_to_tile(gt)->mem.ggtt->size - ggtt_start; + + /* plain PF config data will never include a real GGTT region */ + xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true)); + + /* fake PF GGTT config covers full GGTT range except reserved WOPCM */ + num_dwords += encode_ggtt(cfg + num_dwords, ggtt_start, ggtt_size, true); + } + num_klvs = xe_guc_klv_count(cfg, num_dwords); err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords); @@ -2375,6 +2386,20 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid, return err; } +static void pf_prepare_self_config(struct xe_gt *gt) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, PFID); + + /* + * We want PF to be allowed to use all of context ID, doorbells IDs + * and whole usable GGTT area. While we can store ctxs/dbs numbers + * directly in the config structure, can't do the same with the GGTT + * configuration, so let it be prepared on demand while pushing KLVs. + */ + config->num_ctxs = GUC_ID_MAX; + config->num_dbs = GUC_NUM_DOORBELLS; +} + static int pf_push_self_config(struct xe_gt *gt) { int err; @@ -2418,6 +2443,7 @@ int xe_gt_sriov_pf_config_init(struct xe_gt *gt) xe_gt_assert(gt, IS_SRIOV_PF(xe)); mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + pf_prepare_self_config(gt); err = pf_push_self_config(gt); mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); From e497957fee3025ef72090b2ab29dd1070602bf6e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 11 Jul 2025 21:33:15 +0200 Subject: [PATCH 196/358] drm/xe/pf: Invalidate LMTT during LMEM unprovisioning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Invalidate LMTT immediately after removing VF's LMTT page tables and clearing root PTE in the LMTT PD to avoid any invalid access by the hardware (and VF) due to stale data. Signed-off-by: Michal Wajdeczko Cc: Michał Winiarski Cc: Piotr Piórkowski Cc: Matthew Brost Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250711193316.1920-6-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_device.h | 4 ++ drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 34 +++++++++++++ drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 1 + drivers/gpu/drm/xe/xe_lmtt.c | 54 +++++++++++++++++++++ drivers/gpu/drm/xe/xe_lmtt.h | 1 + 5 files changed, 94 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index f0eb8150f185..bc802e066a7d 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -131,6 +131,10 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe) for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \ for_each_if((gt__) = xe_device_get_gt((xe__), (id__))) +#define for_each_gt_on_tile(gt__, tile__, id__) \ + for_each_gt((gt__), (tile__)->xe, (id__)) \ + for_each_if((gt__)->tile == (tile__)) + static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt) { return >->pm.fw; diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 6088df8e159c..086c12ee3d9d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -330,6 +330,40 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) return 0; } +static int send_tlb_invalidation_all(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) +{ + u32 action[] = { + XE_GUC_ACTION_TLB_INVALIDATION_ALL, + 0, /* seqno, replaced in send_tlb_invalidation */ + MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL), + }; + + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); +} + +/** + * xe_gt_tlb_invalidation_all - Invalidate all TLBs across PF and all VFs. + * @gt: the &xe_gt structure + * @fence: the &xe_gt_tlb_invalidation_fence to be signaled on completion + * + * Send a request to invalidate all TLBs across PF and all VFs. + * + * Return: 0 on success, negative error code on error + */ +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence) +{ + int err; + + xe_gt_assert(gt, gt == fence->gt); + + err = send_tlb_invalidation_all(gt, fence); + if (err) + xe_gt_err(gt, "TLB invalidation request failed (%pe)", ERR_PTR(err)); + + return err; +} + /* * Ensure that roundup_pow_of_two(length) doesn't overflow. * Note that roundup_pow_of_two() operates on unsigned long, diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index 31072dbcad8e..f7f0f2eaf4b5 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -20,6 +20,7 @@ int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt); void xe_gt_tlb_invalidation_reset(struct xe_gt *gt); int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt); void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm); +int xe_gt_tlb_invalidation_all(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence); int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c index 381f576036d0..a2000307d5bf 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.c +++ b/drivers/gpu/drm/xe/xe_lmtt.c @@ -11,6 +11,7 @@ #include "xe_assert.h" #include "xe_bo.h" +#include "xe_gt_tlb_invalidation.h" #include "xe_lmtt.h" #include "xe_map.h" #include "xe_mmio.h" @@ -222,6 +223,58 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt) lmtt_setup_dir_ptr(lmtt); } +static int lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_gt_tlb_invalidation_fence fences[XE_MAX_GT_PER_TILE]; + struct xe_gt_tlb_invalidation_fence *fence = fences; + struct xe_tile *tile = lmtt_to_tile(lmtt); + struct xe_gt *gt; + int result = 0; + int err; + u8 id; + + for_each_gt_on_tile(gt, tile, id) { + xe_gt_tlb_invalidation_fence_init(gt, fence, true); + err = xe_gt_tlb_invalidation_all(gt, fence); + result = result ?: err; + fence++; + } + + lmtt_debug(lmtt, "num_fences=%d err=%d\n", (int)(fence - fences), result); + + /* + * It is fine to wait for all fences, even for those which covers the + * invalidation request that failed, as such fence should be already + * marked as signaled. + */ + fence = fences; + for_each_gt_on_tile(gt, tile, id) + xe_gt_tlb_invalidation_fence_wait(fence++); + + return result; +} + +/** + * xe_lmtt_invalidate_hw - Invalidate LMTT hardware. + * @lmtt: the &xe_lmtt to invalidate + * + * Send requests to all GuCs on this tile to invalidate all TLBs. + * + * This function should be called only when running as a PF driver. + */ +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt) +{ + struct xe_device *xe = lmtt_to_xe(lmtt); + int err; + + lmtt_assert(lmtt, IS_SRIOV_PF(xe)); + + err = lmtt_invalidate_hw(lmtt); + if (err) + xe_sriov_warn(xe, "LMTT%u invalidation failed (%pe)", + lmtt_to_tile(lmtt)->id, ERR_PTR(err)); +} + static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt, u64 pte, unsigned int idx) { @@ -276,6 +329,7 @@ static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid) return; lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid); + lmtt_invalidate_hw(lmtt); lmtt_assert(lmtt, pd->level > 0); lmtt_assert(lmtt, pt->level == pd->level - 1); diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h index cb10ef994db6..75a234fbf367 100644 --- a/drivers/gpu/drm/xe/xe_lmtt.h +++ b/drivers/gpu/drm/xe/xe_lmtt.h @@ -15,6 +15,7 @@ struct xe_lmtt_ops; #ifdef CONFIG_PCI_IOV int xe_lmtt_init(struct xe_lmtt *lmtt); void xe_lmtt_init_hw(struct xe_lmtt *lmtt); +void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt); int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range); int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset); void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid); From a81648768178f6adf171d98db486b4b2613f645a Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 11 Jul 2025 21:33:16 +0200 Subject: [PATCH 197/358] drm/xe/pf: Invalidate LMTT after completing changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once we finish populating all leaf pages in the VF's LMTT we should make sure that hardware will not access any stale data. Explicitly force LMTT invalidation (as it was already planned in the past). Signed-off-by: Michal Wajdeczko Cc: Michał Winiarski Cc: Piotr Piórkowski Reviewed-by: Piotr Piórkowski Link: https://lore.kernel.org/r/20250711193316.1920-7-michal.wajdeczko@intel.com --- drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index f2500b9f2726..494909f74eb2 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1347,7 +1347,17 @@ static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 si static void pf_force_lmtt_invalidate(struct xe_device *xe) { - /* TODO */ + struct xe_lmtt *lmtt; + struct xe_tile *tile; + unsigned int tid; + + xe_assert(xe, xe_device_has_lmtt(xe)); + xe_assert(xe, IS_SRIOV_PF(xe)); + + for_each_tile(tile, xe, tid) { + lmtt = &tile->sriov.pf.lmtt; + xe_lmtt_invalidate_hw(lmtt); + } } static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid) From 0a5dc1b67ef5c7e851b57764a2aab8cc4341a7b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:02 -0300 Subject: [PATCH 198/358] drm/sched: Rename DRM_GPU_SCHED_STAT_NOMINAL to DRM_GPU_SCHED_STAT_RESET MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Among the scheduler's statuses, the only one that indicates an error is DRM_GPU_SCHED_STAT_ENODEV. Any status other than DRM_GPU_SCHED_STAT_ENODEV signifies that the operation succeeded and the GPU is in a nominal state. However, to provide more information about the GPU's status, it is needed to convey more information than just "OK". Therefore, rename DRM_GPU_SCHED_STAT_NOMINAL to DRM_GPU_SCHED_STAT_RESET, which better communicates the meaning of this status. The status DRM_GPU_SCHED_STAT_RESET indicates that the GPU has hung, but it has been successfully reset and is now in a nominal state again. Reviewed-by: Philipp Stanner Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-1-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- drivers/accel/amdxdna/aie2_ctx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 4 ++-- drivers/gpu/drm/imagination/pvr_queue.c | 4 ++-- drivers/gpu/drm/lima/lima_sched.c | 6 +++--- drivers/gpu/drm/nouveau/nouveau_exec.c | 2 +- drivers/gpu/drm/nouveau/nouveau_sched.c | 2 +- drivers/gpu/drm/panfrost/panfrost_job.c | 6 +++--- drivers/gpu/drm/panthor/panthor_mmu.c | 2 +- drivers/gpu/drm/panthor/panthor_sched.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 2 +- drivers/gpu/drm/scheduler/tests/mock_scheduler.c | 2 +- drivers/gpu/drm/v3d/v3d_sched.c | 6 +++--- drivers/gpu/drm/xe/xe_guc_submit.c | 6 +++--- include/drm/gpu_scheduler.h | 4 ++-- 15 files changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c index f20999f2d668..2cff5419bd2f 100644 --- a/drivers/accel/amdxdna/aie2_ctx.c +++ b/drivers/accel/amdxdna/aie2_ctx.c @@ -361,7 +361,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job) aie2_hwctx_restart(xdna, hwctx); mutex_unlock(&xdna->dev_lock); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct drm_sched_backend_ops sched_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 87f7040d8a2b..ec73cf3a9024 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -175,7 +175,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) exit: amdgpu_vm_put_task_info(ti); drm_dev_exit(idx); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 76a3a3e517d8..7146069a9849 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -86,11 +86,11 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job drm_sched_resubmit_jobs(&gpu->sched); drm_sched_start(&gpu->sched, 0); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; out_no_timeout: list_add(&sched_job->list, &sched_job->sched->pending_list); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void etnaviv_sched_free_job(struct drm_sched_job *sched_job) diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index 5a41ee79fed6..fc415dd0d7a7 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -803,7 +803,7 @@ static void pvr_queue_start(struct pvr_queue *queue) * the scheduler, and re-assign parent fences in the middle. * * Return: - * * DRM_GPU_SCHED_STAT_NOMINAL. + * * DRM_GPU_SCHED_STAT_RESET. */ static enum drm_gpu_sched_stat pvr_queue_timedout_job(struct drm_sched_job *s_job) @@ -854,7 +854,7 @@ pvr_queue_timedout_job(struct drm_sched_job *s_job) drm_sched_start(sched, 0); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /** diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 954f4325b859..739e8c6c6d90 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -412,7 +412,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job */ if (dma_fence_is_signaled(task->fence)) { DRM_WARN("%s spurious timeout\n", lima_ip_name(ip)); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /* @@ -429,7 +429,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job if (dma_fence_is_signaled(task->fence)) { DRM_WARN("%s unexpectedly high interrupt latency\n", lima_ip_name(ip)); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /* @@ -467,7 +467,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job drm_sched_resubmit_jobs(&pipe->base); drm_sched_start(&pipe->base, 0); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void lima_sched_free_job(struct drm_sched_job *job) diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 41b7c608c905..edbbda78bac9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -189,7 +189,7 @@ nouveau_exec_job_timeout(struct nouveau_job *job) NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", chan->chid); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct nouveau_job_ops nouveau_exec_job_ops = { diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 7d9c3418e76b..0cc0bc9f9952 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -370,7 +370,7 @@ nouveau_sched_timedout_job(struct drm_sched_job *sched_job) { struct drm_gpu_scheduler *sched = sched_job->sched; struct nouveau_job *job = to_nouveau_job(sched_job); - enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_NOMINAL; + enum drm_gpu_sched_stat stat = DRM_GPU_SCHED_STAT_RESET; drm_sched_stop(sched, sched_job); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 5657106c2f7d..afcffe7f8fe9 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -755,7 +755,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job * spurious. Bail out. */ if (dma_fence_is_signaled(job->done_fence)) - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; /* * Panfrost IRQ handler may take a long time to process an interrupt @@ -770,7 +770,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job if (dma_fence_is_signaled(job->done_fence)) { dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n"); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", @@ -786,7 +786,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job atomic_set(&pfdev->reset.pending, 1); panfrost_reset(pfdev, sched_job); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void panfrost_reset_work(struct work_struct *work) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index ed3712f8d6a9..4140f697ba5a 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -2259,7 +2259,7 @@ static enum drm_gpu_sched_stat panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job) { WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!"); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static const struct drm_sched_backend_ops panthor_vm_bind_ops = { diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index a2248f692a03..8f17394cc82a 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -3241,7 +3241,7 @@ queue_timedout_job(struct drm_sched_job *sched_job) queue_start(queue); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void queue_free_job(struct drm_sched_job *sched_job) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index d94270cf8bf5..4f2593f0c7d9 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -527,7 +527,7 @@ static void drm_sched_job_timedout(struct work_struct *work) { struct drm_gpu_scheduler *sched; struct drm_sched_job *job; - enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_NOMINAL; + enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_RESET; sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c index 0d1d57213e05..ad96e8db13c4 100644 --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -231,7 +231,7 @@ mock_sched_timedout_job(struct drm_sched_job *sched_job) drm_sched_job_cleanup(sched_job); /* Mock job itself is freed by the kunit framework. */ - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void mock_sched_free_job(struct drm_sched_job *sched_job) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 35f131a46d07..e2b7f24d528e 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -741,7 +741,7 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) mutex_unlock(&v3d->reset_lock); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void @@ -773,7 +773,7 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, *timedout_ctra = ctra; v3d_sched_skip_reset(sched_job); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } return v3d_gpu_reset_for_timeout(v3d, sched_job); @@ -819,7 +819,7 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) job->timedout_batches = batches; v3d_sched_skip_reset(sched_job); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } return v3d_gpu_reset_for_timeout(v3d, sched_job); diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index df7a5a4eec74..ca2344b538c3 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1096,7 +1096,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } /* Kill the run_job entry point */ @@ -1265,7 +1265,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) /* Start fence signaling */ xe_hw_fence_irq_start(q->fence_irq); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; sched_enable: enable_scheduling(q); @@ -1278,7 +1278,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_RESET; } static void __guc_exec_queue_fini_async(struct work_struct *w) diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 190844370f48..ed300920996a 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -391,12 +391,12 @@ struct drm_sched_job { * enum drm_gpu_sched_stat - the scheduler's status * * @DRM_GPU_SCHED_STAT_NONE: Reserved. Do not use. - * @DRM_GPU_SCHED_STAT_NOMINAL: Operation succeeded. + * @DRM_GPU_SCHED_STAT_RESET: The GPU hung and successfully reset. * @DRM_GPU_SCHED_STAT_ENODEV: Error: Device is not available anymore. */ enum drm_gpu_sched_stat { DRM_GPU_SCHED_STAT_NONE, - DRM_GPU_SCHED_STAT_NOMINAL, + DRM_GPU_SCHED_STAT_RESET, DRM_GPU_SCHED_STAT_ENODEV, }; From 0b1217bfdfddf664c15954d1d51ee18ed88a2ccf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:03 -0300 Subject: [PATCH 199/358] drm/sched: Allow drivers to skip the reset and keep on running MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the DRM scheduler times out, it's possible that the GPU isn't hung; instead, a job just took unusually long (longer than the timeout) but is still running, and there is, thus, no reason to reset the hardware. This can occur in two scenarios: 1. The job is taking longer than the timeout, but the driver determined through a GPU-specific mechanism that the hardware is still making progress. Hence, the driver would like the scheduler to skip the timeout and treat the job as still pending from then onward. This happens in v3d, Etnaviv, and Xe. 2. Timeout has fired before the free-job worker. Consequently, the scheduler calls `sched->ops->timedout_job()` for a job that isn't timed out. These two scenarios are problematic because the job was removed from the `sched->pending_list` before calling `sched->ops->timedout_job()`, which means that when the job finishes, it won't be freed by the scheduler though `sched->ops->free_job()` - leading to a memory leak. To solve these problems, create a new `drm_gpu_sched_stat`, called DRM_GPU_SCHED_STAT_NO_HANG, which allows a driver to skip the reset. The new status will indicate that the job must be reinserted into `sched->pending_list`, and the hardware / driver will still complete that job. Reviewed-by: Philipp Stanner Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-2-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/scheduler/sched_main.c | 46 ++++++++++++++++++++++++-- include/drm/gpu_scheduler.h | 3 ++ 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 4f2593f0c7d9..e2cda28a1af4 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -366,11 +366,16 @@ static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched) { struct drm_sched_job *job; - spin_lock(&sched->job_list_lock); job = list_first_entry_or_null(&sched->pending_list, struct drm_sched_job, list); if (job && dma_fence_is_signaled(&job->s_fence->finished)) __drm_sched_run_free_queue(sched); +} + +static void drm_sched_run_free_queue_unlocked(struct drm_gpu_scheduler *sched) +{ + spin_lock(&sched->job_list_lock); + drm_sched_run_free_queue(sched); spin_unlock(&sched->job_list_lock); } @@ -523,6 +528,32 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) spin_unlock(&sched->job_list_lock); } +/** + * drm_sched_job_reinsert_on_false_timeout - reinsert the job on a false timeout + * @sched: scheduler instance + * @job: job to be reinserted on the pending list + * + * In the case of a "false timeout" - when a timeout occurs but the GPU isn't + * hung and is making progress, the scheduler must reinsert the job back into + * @sched->pending_list. Otherwise, the job and its resources won't be freed + * through the &struct drm_sched_backend_ops.free_job callback. + * + * This function must be used in "false timeout" cases only. + */ +static void drm_sched_job_reinsert_on_false_timeout(struct drm_gpu_scheduler *sched, + struct drm_sched_job *job) +{ + spin_lock(&sched->job_list_lock); + list_add(&job->list, &sched->pending_list); + + /* After reinserting the job, the scheduler enqueues the free-job work + * again if ready. Otherwise, a signaled job could be added to the + * pending list, but never freed. + */ + drm_sched_run_free_queue(sched); + spin_unlock(&sched->job_list_lock); +} + static void drm_sched_job_timedout(struct work_struct *work) { struct drm_gpu_scheduler *sched; @@ -556,6 +587,9 @@ static void drm_sched_job_timedout(struct work_struct *work) job->sched->ops->free_job(job); sched->free_guilty = false; } + + if (status == DRM_GPU_SCHED_STAT_NO_HANG) + drm_sched_job_reinsert_on_false_timeout(sched, job); } else { spin_unlock(&sched->job_list_lock); } @@ -578,6 +612,10 @@ static void drm_sched_job_timedout(struct work_struct *work) * This function is typically used for reset recovery (see the docu of * drm_sched_backend_ops.timedout_job() for details). Do not call it for * scheduler teardown, i.e., before calling drm_sched_fini(). + * + * As it's only used for reset recovery, drivers must not call this function + * in their &struct drm_sched_backend_ops.timedout_job callback when they + * skip a reset using &enum drm_gpu_sched_stat.DRM_GPU_SCHED_STAT_NO_HANG. */ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) { @@ -663,6 +701,10 @@ EXPORT_SYMBOL(drm_sched_stop); * drm_sched_backend_ops.timedout_job() for details). Do not call it for * scheduler startup. The scheduler itself is fully operational after * drm_sched_init() succeeded. + * + * As it's only used for reset recovery, drivers must not call this function + * in their &struct drm_sched_backend_ops.timedout_job callback when they + * skip a reset using &enum drm_gpu_sched_stat.DRM_GPU_SCHED_STAT_NO_HANG. */ void drm_sched_start(struct drm_gpu_scheduler *sched, int errno) { @@ -1184,7 +1226,7 @@ static void drm_sched_free_job_work(struct work_struct *w) if (job) sched->ops->free_job(job); - drm_sched_run_free_queue(sched); + drm_sched_run_free_queue_unlocked(sched); drm_sched_run_job_queue(sched); } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index ed300920996a..323a505e6e6a 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -393,11 +393,14 @@ struct drm_sched_job { * @DRM_GPU_SCHED_STAT_NONE: Reserved. Do not use. * @DRM_GPU_SCHED_STAT_RESET: The GPU hung and successfully reset. * @DRM_GPU_SCHED_STAT_ENODEV: Error: Device is not available anymore. + * @DRM_GPU_SCHED_STAT_NO_HANG: Contrary to scheduler's assumption, the GPU + * did not hang and is still running. */ enum drm_gpu_sched_stat { DRM_GPU_SCHED_STAT_NONE, DRM_GPU_SCHED_STAT_RESET, DRM_GPU_SCHED_STAT_ENODEV, + DRM_GPU_SCHED_STAT_NO_HANG, }; /** From 9b9b5a3605b9a5ef1d412e47b2ae70090c8d3580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:04 -0300 Subject: [PATCH 200/358] drm/sched: Make timeout KUnit tests faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As more KUnit tests are introduced to evaluate the basic capabilities of the `timedout_job()` hook, the test suite will continue to increase in duration. To reduce the overall running time of the test suite, decrease the scheduler's timeout for the timeout tests. Before this commit: [15:42:26] Elapsed time: 15.637s total, 0.002s configuring, 10.387s building, 5.229s running After this commit: [15:45:26] Elapsed time: 9.263s total, 0.002s configuring, 5.168s building, 4.037s running Reviewed-by: Tvrtko Ursulin Acked-by: Philipp Stanner Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-3-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/scheduler/tests/tests_basic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c index b1ae10c6bb37..8153c2e7af77 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -5,6 +5,8 @@ #include "sched_tests.h" +#define MOCK_TIMEOUT (HZ / 5) + /* * DRM scheduler basic tests should check the basic functional correctness of * the scheduler, including some very light smoke testing. More targeted tests, @@ -28,7 +30,7 @@ static void drm_sched_basic_exit(struct kunit *test) static int drm_sched_timeout_init(struct kunit *test) { - test->priv = drm_mock_sched_new(test, HZ); + test->priv = drm_mock_sched_new(test, MOCK_TIMEOUT); return 0; } @@ -268,14 +270,14 @@ static void drm_sched_basic_timeout(struct kunit *test) done = drm_mock_sched_job_wait_scheduled(job, HZ); KUNIT_ASSERT_TRUE(test, done); - done = drm_mock_sched_job_wait_finished(job, HZ / 2); + done = drm_mock_sched_job_wait_finished(job, MOCK_TIMEOUT / 2); KUNIT_ASSERT_FALSE(test, done); KUNIT_ASSERT_EQ(test, job->flags & DRM_MOCK_SCHED_JOB_TIMEDOUT, 0); - done = drm_mock_sched_job_wait_finished(job, HZ); + done = drm_mock_sched_job_wait_finished(job, MOCK_TIMEOUT); KUNIT_ASSERT_FALSE(test, done); KUNIT_ASSERT_EQ(test, From 1472e7549f84c472a9ebb9a8bb0aaafe985ea608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:05 -0300 Subject: [PATCH 201/358] drm/sched: Add new test for DRM_GPU_SCHED_STAT_NO_HANG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a test to submit a single job against a scheduler with the timeout configured and verify that if the job is still running, the timeout handler will skip the reset and allow the job to complete. Reviewed-by: Tvrtko Ursulin Reviewed-by: Philipp Stanner Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-4-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- .../gpu/drm/scheduler/tests/mock_scheduler.c | 5 +++ drivers/gpu/drm/scheduler/tests/sched_tests.h | 1 + drivers/gpu/drm/scheduler/tests/tests_basic.c | 43 +++++++++++++++++++ 3 files changed, 49 insertions(+) diff --git a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c index ad96e8db13c4..65acffc3fea8 100644 --- a/drivers/gpu/drm/scheduler/tests/mock_scheduler.c +++ b/drivers/gpu/drm/scheduler/tests/mock_scheduler.c @@ -218,6 +218,11 @@ mock_sched_timedout_job(struct drm_sched_job *sched_job) struct drm_mock_sched_job *job = drm_sched_job_to_mock_job(sched_job); unsigned long flags; + if (job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET) { + job->flags &= ~DRM_MOCK_SCHED_JOB_DONT_RESET; + return DRM_GPU_SCHED_STAT_NO_HANG; + } + spin_lock_irqsave(&sched->lock, flags); if (!dma_fence_is_signaled_locked(&job->hw_fence)) { list_del(&job->link); diff --git a/drivers/gpu/drm/scheduler/tests/sched_tests.h b/drivers/gpu/drm/scheduler/tests/sched_tests.h index 0eddfb8d89e6..63d4f2ac7074 100644 --- a/drivers/gpu/drm/scheduler/tests/sched_tests.h +++ b/drivers/gpu/drm/scheduler/tests/sched_tests.h @@ -97,6 +97,7 @@ struct drm_mock_sched_job { #define DRM_MOCK_SCHED_JOB_DONE 0x1 #define DRM_MOCK_SCHED_JOB_TIMEDOUT 0x2 +#define DRM_MOCK_SCHED_JOB_DONT_RESET 0x4 unsigned long flags; struct list_head link; diff --git a/drivers/gpu/drm/scheduler/tests/tests_basic.c b/drivers/gpu/drm/scheduler/tests/tests_basic.c index 8153c2e7af77..55eb142bd7c5 100644 --- a/drivers/gpu/drm/scheduler/tests/tests_basic.c +++ b/drivers/gpu/drm/scheduler/tests/tests_basic.c @@ -287,8 +287,51 @@ static void drm_sched_basic_timeout(struct kunit *test) drm_mock_sched_entity_free(entity); } +static void drm_sched_skip_reset(struct kunit *test) +{ + struct drm_mock_scheduler *sched = test->priv; + struct drm_mock_sched_entity *entity; + struct drm_mock_sched_job *job; + unsigned int i; + bool done; + + /* + * Submit a single job against a scheduler with the timeout configured + * and verify that if the job is still running, the timeout handler + * will skip the reset and allow the job to complete. + */ + + entity = drm_mock_sched_entity_new(test, + DRM_SCHED_PRIORITY_NORMAL, + sched); + job = drm_mock_sched_job_new(test, entity); + + job->flags = DRM_MOCK_SCHED_JOB_DONT_RESET; + + drm_mock_sched_job_submit(job); + + done = drm_mock_sched_job_wait_scheduled(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + done = drm_mock_sched_job_wait_finished(job, 2 * MOCK_TIMEOUT); + KUNIT_ASSERT_FALSE(test, done); + + KUNIT_ASSERT_EQ(test, + job->flags & DRM_MOCK_SCHED_JOB_DONT_RESET, + 0); + + i = drm_mock_sched_advance(sched, 1); + KUNIT_ASSERT_EQ(test, i, 1); + + done = drm_mock_sched_job_wait_finished(job, HZ); + KUNIT_ASSERT_TRUE(test, done); + + drm_mock_sched_entity_free(entity); +} + static struct kunit_case drm_sched_timeout_tests[] = { KUNIT_CASE(drm_sched_basic_timeout), + KUNIT_CASE(drm_sched_skip_reset), {} }; From 6b37fbacd087fbd517b6b276ca8bebd1dc052fb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:06 -0300 Subject: [PATCH 202/358] drm/v3d: Use DRM_GPU_SCHED_STAT_NO_HANG to skip the reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a CL/CSD job times out, we check if the GPU has made any progress since the last timeout. If so, instead of resetting the hardware, we skip the reset and allow the timer to be rearmed. This gives long-running jobs a chance to complete. Instead of manipulating scheduler's internals, inform the scheduler that the job did not actually timeout and no reset was performed through the new status code DRM_GPU_SCHED_STAT_NO_HANG. Reviewed-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-5-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/v3d/v3d_sched.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index e2b7f24d528e..cc85f1b19ac4 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -744,16 +744,6 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) return DRM_GPU_SCHED_STAT_RESET; } -static void -v3d_sched_skip_reset(struct drm_sched_job *sched_job) -{ - struct drm_gpu_scheduler *sched = sched_job->sched; - - spin_lock(&sched->job_list_lock); - list_add(&sched_job->list, &sched->pending_list); - spin_unlock(&sched->job_list_lock); -} - static enum drm_gpu_sched_stat v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, u32 *timedout_ctca, u32 *timedout_ctra) @@ -772,8 +762,7 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q, *timedout_ctca = ctca; *timedout_ctra = ctra; - v3d_sched_skip_reset(sched_job); - return DRM_GPU_SCHED_STAT_RESET; + return DRM_GPU_SCHED_STAT_NO_HANG; } return v3d_gpu_reset_for_timeout(v3d, sched_job); @@ -818,8 +807,7 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job) if (job->timedout_batches != batches) { job->timedout_batches = batches; - v3d_sched_skip_reset(sched_job); - return DRM_GPU_SCHED_STAT_RESET; + return DRM_GPU_SCHED_STAT_NO_HANG; } return v3d_gpu_reset_for_timeout(v3d, sched_job); From 8902c2b17a6ec723ab7924bc4113bef47603c0dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:07 -0300 Subject: [PATCH 203/358] drm/etnaviv: Use DRM_GPU_SCHED_STAT_NO_HANG to skip the reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Etnaviv can skip a hardware reset in two situations: 1. TDR has fired before the free-job worker and the timeout is spurious. 2. The GPU is still making progress on the front-end and we can give the job a chance to complete. Instead of manipulating scheduler's internals, inform the scheduler that the job did not actually timeout and no reset was performed through the new status code DRM_GPU_SCHED_STAT_NO_HANG. Reviewed-by: Lucas Stach Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-6-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index 7146069a9849..df4232d7e135 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -40,11 +40,11 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job int change; /* - * If the GPU managed to complete this jobs fence, the timout is - * spurious. Bail out. + * If the GPU managed to complete this jobs fence, the timeout has + * fired before free-job worker. The timeout is spurious, so bail out. */ if (dma_fence_is_signaled(submit->out_fence)) - goto out_no_timeout; + return DRM_GPU_SCHED_STAT_NO_HANG; /* * If the GPU is still making forward progress on the front-end (which @@ -70,7 +70,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job gpu->hangcheck_dma_addr = dma_addr; gpu->hangcheck_primid = primid; gpu->hangcheck_fence = gpu->completed_fence; - goto out_no_timeout; + return DRM_GPU_SCHED_STAT_NO_HANG; } /* block scheduler */ @@ -87,10 +87,6 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job drm_sched_start(&gpu->sched, 0); return DRM_GPU_SCHED_STAT_RESET; - -out_no_timeout: - list_add(&sched_job->list, &sched_job->sched->pending_list); - return DRM_GPU_SCHED_STAT_RESET; } static void etnaviv_sched_free_job(struct drm_sched_job *sched_job) From 53dcd0eaa271e870ca5d0b203be67b468214c1bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:08 -0300 Subject: [PATCH 204/358] drm/xe: Use DRM_GPU_SCHED_STAT_NO_HANG to skip the reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Xe can skip the reset if TDR has fired before the free job worker and can also re-arm the timeout timer in some scenarios. Instead of manipulating scheduler's internals, inform the scheduler that the job did not actually timeout and no reset was performed through the new status code DRM_GPU_SCHED_STAT_NO_HANG. Note that, in the first case, there is no need to restart submission if it hasn't been stopped. Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-7-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/xe/xe_guc_submit.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index ca2344b538c3..b28bb75e11c1 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1092,12 +1092,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * list so job can be freed and kick scheduler ensuring free job is not * lost. */ - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - xe_sched_add_pending_job(sched, job); - xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_RESET; - } + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) + return DRM_GPU_SCHED_STAT_NO_HANG; /* Kill the run_job entry point */ xe_sched_submission_stop(sched); @@ -1275,10 +1271,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * but there is not currently an easy way to do in DRM scheduler. With * some thought, do this in a follow up. */ - xe_sched_add_pending_job(sched, job); xe_sched_submission_start(sched); - - return DRM_GPU_SCHED_STAT_RESET; + return DRM_GPU_SCHED_STAT_NO_HANG; } static void __guc_exec_queue_fini_async(struct work_struct *w) From 9fb32803dfba63697080db7969bc3aa1bf323dc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Mon, 14 Jul 2025 19:07:09 -0300 Subject: [PATCH 205/358] drm/panfrost: Use DRM_GPU_SCHED_STAT_NO_HANG to skip the reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Panfrost can skip the reset if TDR has fired before the free-job worker. Currently, since Panfrost doesn't take any action on these scenarios, the job is being leaked, considering that `free_job()` won't be called. To avoid such leaks, inform the scheduler that the job did not actually timeout and no reset was performed through the new status code DRM_GPU_SCHED_STAT_NO_HANG. Reviewed-by: Steven Price Link: https://lore.kernel.org/r/20250714-sched-skip-reset-v6-8-5c5ba4f55039@igalia.com Signed-off-by: Maíra Canal --- drivers/gpu/drm/panfrost/panfrost_job.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index afcffe7f8fe9..842e012cdc68 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -751,11 +751,11 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job int js = panfrost_job_get_slot(job); /* - * If the GPU managed to complete this jobs fence, the timeout is - * spurious. Bail out. + * If the GPU managed to complete this jobs fence, the timeout has + * fired before free-job worker. The timeout is spurious, so bail out. */ if (dma_fence_is_signaled(job->done_fence)) - return DRM_GPU_SCHED_STAT_RESET; + return DRM_GPU_SCHED_STAT_NO_HANG; /* * Panfrost IRQ handler may take a long time to process an interrupt @@ -770,7 +770,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job if (dma_fence_is_signaled(job->done_fence)) { dev_warn(pfdev->dev, "unexpectedly high interrupt latency\n"); - return DRM_GPU_SCHED_STAT_RESET; + return DRM_GPU_SCHED_STAT_NO_HANG; } dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", From a622663dbf874ae5ae2326a9878ceb410c8a34a4 Mon Sep 17 00:00:00 2001 From: Andy Yan Date: Tue, 15 Jul 2025 13:47:52 +0800 Subject: [PATCH 206/358] drm/bridge: megachips-stdpxxxx-ge-b850v3-fw: Fix a compile error due to bridge->detect parameter changes Fix the compile error due to bridge->detect parameter changes. Reported-by: Dixit Ashutosh Closes: https://lore.kernel.org/dri-devel/175250667117.3567548.8371527247937906463.b4-ty@oss.qualcomm.com/T/#m8ecd00a05a330bc9c76f11c981daafcb30a7c2e0 Fixes: 5d156a9c3d5e ("drm/bridge: Pass down connector to drm bridge detect hook") Signed-off-by: Andy Yan Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20250715054754.800765-1-andyshrk@163.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index de57f8a9e98c..c9e6505cbd88 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -142,7 +142,7 @@ ge_b850v3_lvds_bridge_detect(struct drm_bridge *bridge, struct drm_connector *co static enum drm_connector_status ge_b850v3_lvds_detect(struct drm_connector *connector, bool force) { - return ge_b850v3_lvds_bridge_detect(&ge_b850v3_lvds_ptr->bridge); + return ge_b850v3_lvds_bridge_detect(&ge_b850v3_lvds_ptr->bridge, connector); } static const struct drm_connector_funcs ge_b850v3_lvds_connector_funcs = { From 0bcc0f5e98bebd05e44261df3c33d274084eab60 Mon Sep 17 00:00:00 2001 From: Dale Whinham Date: Mon, 14 Jul 2025 18:35:38 +0100 Subject: [PATCH 207/358] dt-bindings: display: panel: samsung,atna30dw01: document ATNA30DW01 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Samsung ATNA30DW01 panel is a 13" AMOLED eDP panel. It is similar to the ATNA33XC20 except that it is smaller and has a higher resolution. Tested-by: Jérôme de Bretagne Signed-off-by: Dale Whinham Acked-by: Rob Herring (Arm) Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20250714173554.14223-3-daleyo@gmail.com --- .../devicetree/bindings/display/panel/samsung,atna33xc20.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml index 31f0c0f038e4..e36659340ef3 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml @@ -19,6 +19,8 @@ properties: - const: samsung,atna33xc20 - items: - enum: + # Samsung 13" 3K (2880×1920 pixels) eDP AMOLED panel + - samsung,atna30dw01 # Samsung 14" WQXGA+ (2880×1800 pixels) eDP AMOLED panel - samsung,atna40yk20 # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel From 8ff4a4b98d1f82544460a9e9c04dbe9d0ac0322f Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Sat, 5 Jul 2025 08:45:08 +0530 Subject: [PATCH 208/358] drm/amdgpu: Use cached partition mode, if valid For current partition mode queries, return the mode cached in partition manager whenever it's valid. Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index c8fcafeb6864..c417f8689220 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -218,15 +218,27 @@ int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) return __amdgpu_xcp_switch_partition_mode(xcp_mgr, xcp_mgr->mode); } +static bool __amdgpu_xcp_is_cached_mode_valid(struct amdgpu_xcp_mgr *xcp_mgr) +{ + if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) + return true; + + if (!amdgpu_sriov_vf(xcp_mgr->adev) && + xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + return true; + + if (xcp_mgr->mode != AMDGPU_XCP_MODE_NONE && + xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS) + return true; + + return false; +} + int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) { int mode; - if (!amdgpu_sriov_vf(xcp_mgr->adev) && - xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) - return xcp_mgr->mode; - - if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) + if (__amdgpu_xcp_is_cached_mode_valid(xcp_mgr)) return xcp_mgr->mode; if (!(flags & AMDGPU_XCP_FL_LOCKED)) From 730ea5074dac1b105717316be5d9c18b09829385 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 09:38:27 -0400 Subject: [PATCH 209/358] drm/amdgpu/gfx9: fix kiq locking in KCQ reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ring test needs to be inside the lock. Fixes: fdbd69486b46 ("drm/amdgpu/gfx9: wait for reset done before remap") Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: Jiadong Zhu --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 76ba664efecb..4c61157405b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7238,8 +7238,8 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { DRM_ERROR("fail to remap queue\n"); return r; From 08f116c59310728ea8b7e9dc3086569006c861cf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 09:42:23 -0400 Subject: [PATCH 210/358] drm/amdgpu/gfx9.4.3: fix kiq locking in KCQ reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ring test needs to be inside the lock. Fixes: 4c953e53cc34 ("drm/amdgpu/gfx_9.4.3: wait for reset done before remap") Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: Jiadong Zhu --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index daed0f187bda..5f92975cc305 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3615,9 +3615,8 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { dev_err(adev->dev, "fail to remap queue\n"); return r; From a4b2ba8f631d3e44b30b9b46ee290fbfe608b7d0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 09:56:35 -0400 Subject: [PATCH 211/358] drm/amdgpu/gfx10: fix kiq locking in KCQ reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ring test needs to be inside the lock. Fixes: 097af47d3cfb ("drm/amdgpu/gfx10: wait for reset done before remap") Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: Jiadong Zhu --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 5e099b5dc9a3..d739bfb20383 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9625,9 +9625,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; @@ -9663,9 +9662,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) return r; From d115a63f816035f976e14b7eba8f14e8e33c0945 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 11:26:14 -0400 Subject: [PATCH 212/358] drm/amdgpu/vcn4: add additional ring reset error checking Start and stop can fail, so add checks. Fixes: b8b6e6f1654d ("drm/amd: Add per-ring reset for vcn v4.0.0 use") Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index eec9133e1b2c..5bbce8544fef 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1979,8 +1979,12 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, return -EOPNOTSUPP; drm_sched_wqueue_stop(&ring->sched); - vcn_v4_0_stop(vinst); - vcn_v4_0_start(vinst); + r = vcn_v4_0_stop(vinst); + if (r) + return r; + r = vcn_v4_0_start(vinst); + if (r) + return r; r = amdgpu_ring_test_helper(ring); if (r) From 1b556bcc3837441b9f75d2c7df44e8e312b550e7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 11:28:33 -0400 Subject: [PATCH 213/358] drm/amdgpu/vcn4.0.5: add additional ring reset error checking Start and stop can fail, so add checks. Fixes: d1a46cdd0053 ("drm/amd: Add per-ring reset for vcn v4.0.5 use") Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 7e37ddea6355..6000c528ad6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1477,8 +1477,12 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, return -EOPNOTSUPP; drm_sched_wqueue_stop(&ring->sched); - vcn_v4_0_5_stop(vinst); - vcn_v4_0_5_start(vinst); + r = vcn_v4_0_5_stop(vinst); + if (r) + return r; + r = vcn_v4_0_5_start(vinst); + if (r) + return r; r = amdgpu_ring_test_helper(ring); if (r) From 7a5b69d60e448e134c7afa023e2a960d012b7a4f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 11:30:14 -0400 Subject: [PATCH 214/358] drm/amdgpu/vcn5: add additional ring reset error checking Start and stop can fail, so add checks. Fixes: b54695dae995 ("drm/amd: Add per-ring reset for vcn v5.0.0 use") Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher Cc: Mario Limonciello --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 47c0bcc9e7d8..3d3b4254bd72 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1204,8 +1204,12 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, return -EOPNOTSUPP; drm_sched_wqueue_stop(&ring->sched); - vcn_v5_0_0_stop(vinst); - vcn_v5_0_0_start(vinst); + r = vcn_v5_0_0_stop(vinst); + if (r) + return r; + r = vcn_v5_0_0_start(vinst); + if (r) + return r; r = amdgpu_ring_test_helper(ring); if (r) From fa301127ba9a22f40b4261f569f1fc8b3d66e04e Mon Sep 17 00:00:00 2001 From: Han Gao Date: Wed, 9 Jul 2025 14:51:38 +0800 Subject: [PATCH 215/358] drm/amdkfd: enable kfd on LoongArch systems KFD has been confirmed that can run on LoongArch systems. It's necessary to support CONFIG_HSA_AMD on LoongArch. Signed-off-by: Han Gao Signed-off-by: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index 62e88e5362e9..16e12c9913f9 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -5,7 +5,7 @@ config HSA_AMD bool "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT)) + depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64 || (RISCV && 64BIT) || (LOONGARCH && 64BIT)) select HMM_MIRROR select MMU_NOTIFIER select DRM_AMDGPU_USERPTR From 48cb9c3b21474864beb9e19cb4f7b4ccb50de77b Mon Sep 17 00:00:00 2001 From: Ce Sun Date: Fri, 11 Jul 2025 17:57:25 +0800 Subject: [PATCH 216/358] drm/amdgpu: The interrupt source was not released When the driver is unloaded, the interrupt source of the rma device is not released, resulting in the failure of hw_init when loading again using bad_page_threshold. Signed-off-by: Ce Sun Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 183fa33c2434..8112ffc85995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -624,7 +624,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type) { /* When the threshold is reached,the interrupt source may not be enabled.return -EINVAL */ - if (amdgpu_ras_is_rma(adev)) + if (amdgpu_ras_is_rma(adev) && !amdgpu_irq_enabled(adev, src, type)) return -EINVAL; if (!adev->irq.installed) From 04d57f4462a6c39f04711550aa60c42c8ed5d25d Mon Sep 17 00:00:00 2001 From: Dominik Kaszewski Date: Tue, 24 Jun 2025 12:40:25 +0200 Subject: [PATCH 217/358] drm/amd/display: Workaround for stuck I2C arbitrage [Why] When booting without an HDMI display connected, the I2C registers are not initialized correctly, leading to DC_I2C_ARBITRATION register getting stuck with DC_I2C_REG_RW_CNTL_STATUS == USED_BY_SW. [How] * Correct TOCTOU race condition in engine acquire logic which did not check against DMUB trying to acquire it at the same time. * Deassert SOFT_RESET before acquire, as it can block access to other I2C registers. * Add a workaround in release, checking that after triggerring DC_I2C_SW_DONE_USING_I2C_REG, DC_I2C_REG_RW_CNTL_STATUS != USED_BY_SW. If necessary, trigger DC_I2C_SW_DONE_USING_I2C_REG again. * Remove unnecessary clear of DC_I2C_SW_USE_I2C_REG_REQ, which engine ignores according to specification. Reviewed-by: Alvin Lee Signed-off-by: Dominik Kaszewski Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce/dce_i2c_hw.c | 59 ++++++++++++++++--- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index d28826c3ae5f..4e06468a6284 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -292,9 +292,35 @@ static void set_speed( FN(DC_I2C_DDC1_SPEED, DC_I2C_DDC1_THRESHOLD), 2); } +static bool acquire_engine(struct dce_i2c_hw *dce_i2c_hw) +{ + uint32_t arbitrate = 0; + + REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate); + switch (arbitrate) { + case DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW: + return true; + case DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_HW: + return false; + case DC_I2C_STATUS__DC_I2C_STATUS_IDLE: + default: + break; + } + + REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, true); + REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate); + if (arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW) + return false; + + return true; +} + static bool setup_engine( struct dce_i2c_hw *dce_i2c_hw) { + // Deassert soft reset to unblock I2C engine registers + REG_UPDATE(DC_I2C_CONTROL, DC_I2C_SOFT_RESET, false); + uint32_t i2c_setup_limit = I2C_SETUP_TIME_LIMIT_DCE; uint32_t reset_length = 0; @@ -309,8 +335,8 @@ static bool setup_engine( REG_UPDATE_N(SETUP, 1, FN(DC_I2C_DDC1_SETUP, DC_I2C_DDC1_CLK_EN), 1); - /* we have checked I2c not used by DMCU, set SW use I2C REQ to 1 to indicate SW using it*/ - REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_USE_I2C_REG_REQ, 1); + if (!acquire_engine(dce_i2c_hw)) + return false; /*set SW requested I2c speed to default, if API calls in it will be override later*/ set_speed(dce_i2c_hw, dce_i2c_hw->ctx->dc->caps.i2c_speed_in_khz); @@ -319,9 +345,8 @@ static bool setup_engine( i2c_setup_limit = dce_i2c_hw->setup_limit; /* Program pin select */ - REG_UPDATE_6(DC_I2C_CONTROL, + REG_UPDATE_5(DC_I2C_CONTROL, DC_I2C_GO, 0, - DC_I2C_SOFT_RESET, 0, DC_I2C_SEND_RESET, 0, DC_I2C_SW_STATUS_RESET, 1, DC_I2C_TRANSACTION_COUNT, 0, @@ -351,6 +376,26 @@ static bool setup_engine( return true; } +/** + * If we boot without an HDMI display, the I2C engine does not get initialized + * correctly. One of its symptoms is that SW_USE_I2C does not get cleared after + * acquire, so that after setting SW_DONE_USING_I2C on release, the engine gets + * immediately reacquired by SW, preventing DMUB from using it. + */ +static void cntl_stuck_hw_workaround(struct dce_i2c_hw *dce_i2c_hw) +{ + uint32_t arbitrate = 0; + + REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate); + if (arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW) + return; + + // Still acquired after release, release again as a workaround + REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, true); + REG_GET(DC_I2C_ARBITRATION, DC_I2C_REG_RW_CNTL_STATUS, &arbitrate); + ASSERT(arbitrate != DC_I2C_STATUS__DC_I2C_STATUS_USED_BY_SW); +} + static void release_engine( struct dce_i2c_hw *dce_i2c_hw) { @@ -378,9 +423,9 @@ static void release_engine( /*for HW HDCP Ri polling failure w/a test*/ set_speed(dce_i2c_hw, dce_i2c_hw->ctx->dc->caps.i2c_speed_in_khz_hdcp); - /* Release I2C after reset, so HW or DMCU could use it */ - REG_UPDATE_2(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, 1, - DC_I2C_SW_USE_I2C_REG_REQ, 0); + // Release I2C engine so it can be used by HW or DMCU, automatically clears SW_USE_I2C + REG_UPDATE(DC_I2C_ARBITRATION, DC_I2C_SW_DONE_USING_I2C_REG, true); + cntl_stuck_hw_workaround(dce_i2c_hw); if (dce_i2c_hw->ctx->dc->debug.enable_mem_low_power.bits.i2c) { if (dce_i2c_hw->regs->DIO_MEM_PWR_CTRL) From 26ad78fffc66886207d793527775120b45166200 Mon Sep 17 00:00:00 2001 From: Yihan Zhu Date: Thu, 19 Jun 2025 15:26:30 -0400 Subject: [PATCH 218/358] drm/amd/display: MPC basic allocation logic and TMZ [WHY & HOW] Adding basic logic to allocate unused RMCM block and TMZ support. Reviewed-by: Krunoslav Kovac Signed-off-by: Yihan Zhu Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 2 + .../gpu/drm/amd/display/dc/core/dc_state.c | 2 + .../gpu/drm/amd/display/dc/core/dc_stream.c | 67 +++++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 7 ++ drivers/gpu/drm/amd/display/dc/dc_stream.h | 11 +++ drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + .../amd/display/dc/hubp/dcn401/dcn401_hubp.c | 4 +- .../amd/display/dc/hubp/dcn401/dcn401_hubp.h | 2 +- .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 2 +- .../gpu/drm/amd/display/dc/inc/core_types.h | 3 + drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h | 2 +- 11 files changed, 98 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c744aa9d830f..f37ed24b2430 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -976,6 +976,8 @@ static bool dc_construct_ctx(struct dc *dc, if (!dc_ctx) return false; + dc_stream_init_rmcm_3dlut(dc); + dc_ctx->cgs_device = init_params->cgs_device; dc_ctx->driver_context = init_params->driver; dc_ctx->dc = dc; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_state.c b/drivers/gpu/drm/amd/display/dc/core/dc_state.c index 47712a4aec55..883054bb18e7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_state.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_state.c @@ -427,6 +427,8 @@ enum dc_status dc_state_remove_stream( return DC_ERROR_UNEXPECTED; } + dc_stream_release_3dlut_for_stream(dc, stream); + dc_stream_release(state->streams[i]); state->stream_count--; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index b883fb24fa12..4d6bc9fd4faa 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -856,6 +856,73 @@ void dc_stream_log(const struct dc *dc, const struct dc_stream_state *stream) } } +/* +* dc_stream_get_3dlut() +* Requirements: +* 1. Is stream already owns an RMCM instance, return it. +* 2. If it doesn't and we don't need to allocate, return NULL. +* 3. If there's a free RMCM instance, assign to stream and return it. +* 4. If no free RMCM instances, return NULL. +*/ + +struct dc_rmcm_3dlut *dc_stream_get_3dlut_for_stream( + const struct dc *dc, + const struct dc_stream_state *stream, + bool allocate_one) +{ + unsigned int num_rmcm = dc->caps.color.mpc.num_rmcm_3dluts; + + // see if one is allocated for this stream + for (int i = 0; i < num_rmcm; i++) { + if (dc->res_pool->rmcm_3dlut[i].isInUse && + dc->res_pool->rmcm_3dlut[i].stream == stream) + return &dc->res_pool->rmcm_3dlut[i]; + } + + //case: not found one, and dont need to allocate + if (!allocate_one) + return NULL; + + //see if there is an unused 3dlut, allocate + for (int i = 0; i < num_rmcm; i++) { + if (!dc->res_pool->rmcm_3dlut[i].isInUse) { + dc->res_pool->rmcm_3dlut[i].isInUse = true; + dc->res_pool->rmcm_3dlut[i].stream = stream; + return &dc->res_pool->rmcm_3dlut[i]; + } + } + + //dont have a 3dlut + return NULL; +} + + +void dc_stream_release_3dlut_for_stream( + const struct dc *dc, + const struct dc_stream_state *stream) +{ + struct dc_rmcm_3dlut *rmcm_3dlut = + dc_stream_get_3dlut_for_stream(dc, stream, false); + + if (rmcm_3dlut) { + rmcm_3dlut->isInUse = false; + rmcm_3dlut->stream = NULL; + rmcm_3dlut->protection_bits = 0; + } +} + + +void dc_stream_init_rmcm_3dlut(struct dc *dc) +{ + unsigned int num_rmcm = dc->caps.color.mpc.num_rmcm_3dluts; + + for (int i = 0; i < num_rmcm; i++) { + dc->res_pool->rmcm_3dlut[i].isInUse = false; + dc->res_pool->rmcm_3dlut[i].stream = NULL; + dc->res_pool->rmcm_3dlut[i].protection_bits = 0; + } +} + /* * Finds the greatest index in refresh_rate_hz that contains a value <= refresh */ diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index ce1957c7862f..6825b8a9af59 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -246,6 +246,7 @@ struct mpc_color_caps { uint16_t ogam_ram : 1; uint16_t ocsc : 1; uint16_t num_3dluts : 3; + uint16_t num_rmcm_3dluts : 3; uint16_t shared_3d_lut:1; struct rom_curve_caps ogam_rom_caps; struct lut3d_caps mcm_3d_lut_caps; @@ -1294,6 +1295,12 @@ union dc_3dlut_state { }; +struct dc_rmcm_3dlut { + bool isInUse; + const struct dc_stream_state *stream; + uint8_t protection_bits; +}; + struct dc_3dlut { struct kref refcount; struct tetrahedral_params lut_3d; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 341d2ffb64b1..5fc6fea211de 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -579,6 +579,17 @@ bool dc_stream_set_gamut_remap(struct dc *dc, bool dc_stream_program_csc_matrix(struct dc *dc, struct dc_stream_state *stream); +struct dc_rmcm_3dlut *dc_stream_get_3dlut_for_stream( + const struct dc *dc, + const struct dc_stream_state *stream, + bool allocate_one); + +void dc_stream_release_3dlut_for_stream( + const struct dc *dc, + const struct dc_stream_state *stream); + +void dc_stream_init_rmcm_3dlut(struct dc *dc); + struct pipe_ctx *dc_stream_get_pipe_ctx(struct dc_stream_state *stream); void dc_dmub_update_dirty_rect(struct dc *dc, diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index b203ed020cd5..a0829bc0708a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -1313,6 +1313,7 @@ struct dc_cm2_func_luts { bool mpc_3dlut_enable; bool rmcm_3dlut_enable; bool mpc_mcm_post_blend; + uint8_t rmcm_tmz; } lut3d_data; const struct dc_transfer_func *lut1d_func; }; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index baed31611477..705b98b1b6cc 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -86,11 +86,11 @@ void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_WIDTH, width); } -void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled) +void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, uint8_t protection_bits) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); - REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, protection_enabled ? 1 : 0); + REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, protection_bits); } void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h index 6e1d4c90ddd4..608e6153fa68 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h @@ -333,7 +333,7 @@ void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r); -void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled); +void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, uint8_t protection_bits); void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 9ad29be925e2..39910f73ecd0 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -335,7 +335,7 @@ static void dcn10_log_hubp_states(struct dc *dc, void *log_ctx) struct dcn_fl_regs_st *fl_regs = &s->fl_regs; if (!s->blank_en) { - DTN_INFO("[%2d]: %5xh %6xh %5d %6d %2xh %2xh %6xh %6d %8d %8d %7d %8xh %5x %5x %5x", + DTN_INFO("[%2d]: %5xh %6xh %5d %6d %8xh %2xh %6xh %6d %8d %8d %7d %8xh %5x %5x %5x", pool->hubps[i]->inst, fl_regs->lut_enable, fl_regs->lut_done, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index c9454fe1cd05..f0d7185153b2 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -67,6 +67,8 @@ struct resource_context; struct clk_bw_params; struct dc_mcache_params; +#define MAX_RMCM_INST 2 + struct resource_funcs { enum engine_id (*get_preferred_eng_id_dpia)(unsigned int dpia_index); void (*destroy)(struct resource_pool **pool); @@ -286,6 +288,7 @@ struct resource_pool { struct hpo_dp_link_encoder *hpo_dp_link_enc[MAX_HPO_DP2_LINK_ENCODERS]; struct dc_3dlut *mpc_lut[MAX_PIPES]; struct dc_transfer_func *mpc_shaper[MAX_PIPES]; + struct dc_rmcm_3dlut rmcm_3dlut[MAX_RMCM_INST]; struct { unsigned int xtalin_clock_inKhz; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index b610beb075d5..cee29e89ec5c 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -282,7 +282,7 @@ struct hubp_funcs { void (*hubp_enable_3dlut_fl)(struct hubp *hubp, bool enable); void (*hubp_program_3dlut_fl_addressing_mode)(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode); void (*hubp_program_3dlut_fl_width)(struct hubp *hubp, enum hubp_3dlut_fl_width width); - void (*hubp_program_3dlut_fl_tmz_protected)(struct hubp *hubp, bool protection_enabled); + void (*hubp_program_3dlut_fl_tmz_protected)(struct hubp *hubp, uint8_t protection_bits); void (*hubp_program_3dlut_fl_crossbar)(struct hubp *hubp, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, From a1619668d41f6f3c26b5dc5bff68456eeaa02cbe Mon Sep 17 00:00:00 2001 From: Karthi Kandasamy Date: Wed, 11 Jun 2025 15:46:27 +0200 Subject: [PATCH 219/358] drm/amd/display: Make dcn401_initialize_min_clocks() available to other compilation units [Why & How] Expose dcn401_initialize_min_clocks() for future use and add additional check for IP register. Reviewed-by: Nevenko Stupar Signed-off-by: Karthi Kandasamy Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dsc/dsc.h | 1 + .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 14 ++++++++------ .../drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h | 1 + 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h index 1ebce5426a58..b0bd1f9425b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dsc.h @@ -108,6 +108,7 @@ struct dsc_funcs { void (*dsc_disable)(struct display_stream_compressor *dsc); void (*dsc_disconnect)(struct display_stream_compressor *dsc); void (*dsc_wait_disconnect_pending_clear)(struct display_stream_compressor *dsc); + void (*dsc_get_single_enc_caps)(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index a0d61df07f22..cc9f40d97af2 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -51,7 +51,7 @@ #define FN(reg_name, field_name) \ hws->shifts->field_name, hws->masks->field_name -static void dcn401_initialize_min_clocks(struct dc *dc) +void dcn401_initialize_min_clocks(struct dc *dc) { struct dc_clocks *clocks = &dc->current_state->bw_ctx.bw.dcn.clk; @@ -2632,10 +2632,12 @@ void dcn401_plane_atomic_power_down(struct dc *dc, DC_LOGGER_INIT(dc->ctx->logger); - REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); - if (org_ip_request_cntl == 0) - REG_SET(DC_IP_REQUEST_CNTL, 0, - IP_REQUEST_EN, 1); + if (REG(DC_IP_REQUEST_CNTL)) { + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, + IP_REQUEST_EN, 1); + } if (hws->funcs.dpp_pg_control) hws->funcs.dpp_pg_control(hws, dpp->inst, false); @@ -2646,7 +2648,7 @@ void dcn401_plane_atomic_power_down(struct dc *dc, hubp->funcs->hubp_reset(hubp); dpp->funcs->dpp_reset(dpp); - if (org_ip_request_cntl == 0) + if (org_ip_request_cntl == 0 && REG(DC_IP_REQUEST_CNTL)) REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h index 781cf0efccc6..2621b7725267 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.h @@ -109,4 +109,5 @@ void dcn401_detect_pipe_changes( void dcn401_plane_atomic_power_down(struct dc *dc, struct dpp *dpp, struct hubp *hubp); +void dcn401_initialize_min_clocks(struct dc *dc); #endif /* __DC_HWSS_DCN401_H__ */ From d7b618bc41ee3d44c070212dff93949702ede997 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Fri, 20 Jun 2025 16:23:43 -0400 Subject: [PATCH 220/358] drm/amd/display: Refactor DSC cap calculations [WHY] DSC block level should only be responsible for reporting single DSC instance capabilities. Factoring in ODM combine requirements should be handled in dc_dsc.c. Both components should acquire clocks from clk_mgr to determine throughput capabilities instead of relying on hard coded values as these can differ by SoC and SKU. [HOW] 1) Add dsc_get_single_enc_caps to acquire single DSC instance capabilities (replacing dsc_get_enc_caps), factoring in DSCCLK 2) add build_dsc_enc_caps to combine single DSC instance capabilities 3) account for max pixel rate per pipe (DISPCLK) when calculating minimum slice count Reviewed-by: Wenjing Liu Signed-off-by: Dillon Varone Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../dc/clk_mgr/dcn401/dcn401_clk_mgr.c | 30 +++ .../dc/clk_mgr/dcn401/dcn401_clk_mgr.h | 2 + drivers/gpu/drm/amd/display/dc/dc.h | 2 + drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 192 +++++++++++++++--- .../amd/display/dc/dsc/dcn401/dcn401_dsc.c | 24 +-- .../amd/display/dc/dsc/dcn401/dcn401_dsc.h | 1 - .../gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 12 ++ .../dc/resource/dcn20/dcn20_resource.c | 2 + .../dc/resource/dcn201/dcn201_resource.c | 2 + .../dc/resource/dcn21/dcn21_resource.c | 2 + .../dc/resource/dcn30/dcn30_resource.c | 2 + .../dc/resource/dcn301/dcn301_resource.c | 2 + .../dc/resource/dcn302/dcn302_resource.c | 2 + .../dc/resource/dcn303/dcn303_resource.c | 2 + .../dc/resource/dcn31/dcn31_resource.c | 2 + .../dc/resource/dcn314/dcn314_resource.c | 2 + .../dc/resource/dcn315/dcn315_resource.c | 2 + .../dc/resource/dcn316/dcn316_resource.c | 2 + .../dc/resource/dcn32/dcn32_resource.c | 2 + .../dc/resource/dcn321/dcn321_resource.c | 2 + .../dc/resource/dcn35/dcn35_resource.c | 2 + .../dc/resource/dcn351/dcn351_resource.c | 2 + .../dc/resource/dcn36/dcn36_resource.c | 2 + .../dc/resource/dcn401/dcn401_resource.c | 2 + 24 files changed, 253 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 514a5efda102..724484573a2a 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -1500,6 +1500,35 @@ static int dcn401_get_dispclk_from_dentist(struct clk_mgr *clk_mgr_base) return 0; } +unsigned int dcn401_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + unsigned int num_clk_levels; + + switch (clk_type) { + case CLK_TYPE_DISPCLK: + num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; + return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK) ? + clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 : + clk_mgr->base.boot_snapshot.dispclk; + case CLK_TYPE_DPPCLK: + num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dppclk_levels; + return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DPPCLK) ? + clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dppclk_mhz * 1000 : + clk_mgr->base.boot_snapshot.dppclk; + case CLK_TYPE_DSCCLK: + num_clk_levels = clk_mgr->base.bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; + return dcn401_is_ppclk_dpm_enabled(clk_mgr, PPCLK_DISPCLK) ? + clk_mgr->base.bw_params->clk_table.entries[num_clk_levels - 1].dispclk_mhz * 1000 / 3 : + clk_mgr->base.boot_snapshot.dispclk / 3; + default: + break; + } + + return 0; +} + static struct clk_mgr_funcs dcn401_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .get_dtb_ref_clk_frequency = dcn401_get_dtb_ref_freq_khz, @@ -1516,6 +1545,7 @@ static struct clk_mgr_funcs dcn401_funcs = { .get_hard_min_memclk = dcn401_get_hard_min_memclk, .get_hard_min_fclk = dcn401_get_hard_min_fclk, .is_dc_mode_present = dcn401_is_dc_mode_present, + .get_max_clock_khz = dcn401_get_max_clock_khz, }; struct clk_mgr_internal *dcn401_clk_mgr_construct( diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h index 616e964df96d..97a1ce1e8a9e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h @@ -112,4 +112,6 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct(struct dc_context *ctx, void dcn401_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr); +unsigned int dcn401_get_max_clock_khz(struct clk_mgr *clk_mgr_base, enum clk_type clk_type); + #endif /* __DCN401_CLK_MGR_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 6825b8a9af59..a160671d2a01 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -348,6 +348,8 @@ struct dc_caps { struct dc_scl_caps scl_caps; uint8_t num_of_host_routers; uint8_t num_of_dpias_per_host_router; + /* limit of the ODM only, could be limited by other factors (like pipe count)*/ + uint8_t max_odm_combine_factor; }; struct dc_bug_wa { diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index 11535922b5ff..a454d16e6586 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -30,6 +30,9 @@ #include "rc_calc.h" #include "fixed31_32.h" +#include "clk_mgr.h" +#include "resource.h" + #define DC_LOGGER \ dsc->ctx->logger @@ -149,6 +152,11 @@ uint32_t dc_bandwidth_in_kbps_from_timing( } /* Forward Declerations */ +static unsigned int get_min_slice_count_for_odm( + const struct display_stream_compressor *dsc, + const struct dsc_enc_caps *dsc_enc_caps, + const struct dc_crtc_timing *timing); + static bool decide_dsc_bandwidth_range( const uint32_t min_bpp_x16, const uint32_t max_bpp_x16, @@ -183,6 +191,7 @@ static bool setup_dsc_config( const struct dc_crtc_timing *timing, const struct dc_dsc_config_options *options, const enum dc_link_encoding_format link_encoding, + int min_slice_count, struct dc_dsc_config *dsc_cfg); static bool dsc_buff_block_size_from_dpcd(int dpcd_buff_block_size, int *buff_block_size) @@ -442,7 +451,6 @@ bool dc_dsc_parse_dsc_dpcd(const struct dc *dc, return true; } - /* If DSC is possbile, get DSC bandwidth range based on [min_bpp, max_bpp] target bitrate range and * timing's pixel clock and uncompressed bandwidth. * If DSC is not possible, leave '*range' untouched. @@ -458,6 +466,7 @@ bool dc_dsc_compute_bandwidth_range( struct dc_dsc_bw_range *range) { bool is_dsc_possible = false; + unsigned int min_slice_count; struct dsc_enc_caps dsc_enc_caps; struct dsc_enc_caps dsc_common_caps; struct dc_dsc_config config = {0}; @@ -469,12 +478,14 @@ bool dc_dsc_compute_bandwidth_range( get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); + min_slice_count = get_min_slice_count_for_odm(dsc, &dsc_enc_caps, timing); + is_dsc_possible = intersect_dsc_caps(dsc_sink_caps, &dsc_enc_caps, timing->pixel_encoding, &dsc_common_caps); if (is_dsc_possible) is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, 0, timing, - &options, link_encoding, &config); + &options, link_encoding, min_slice_count, &config); if (is_dsc_possible) is_dsc_possible = decide_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, @@ -525,20 +536,152 @@ void dc_dsc_dump_decoder_caps(const struct display_stream_compressor *dsc, DC_LOG_DSC("\tis_dp %d", dsc_sink_caps->is_dp); } + +static void build_dsc_enc_combined_slice_caps( + const struct dsc_enc_caps *single_dsc_enc_caps, + struct dsc_enc_caps *dsc_enc_caps, + unsigned int max_odm_combine_factor) +{ + /* 1-16 slice configurations, single DSC */ + dsc_enc_caps->slice_caps.raw |= single_dsc_enc_caps->slice_caps.raw; + + /* 2x DSC's */ + if (max_odm_combine_factor >= 2) { + /* 1 + 1 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_1; + + /* 2 + 2 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_2; + + /* 4 + 4 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; + + /* 8 + 8 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_8; + } + + /* 3x DSC's */ + if (max_odm_combine_factor >= 3) { + /* 4 + 4 + 4 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; + } + + /* 4x DSC's */ + if (max_odm_combine_factor >= 4) { + /* 1 + 1 + 1 + 1 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_1; + + /* 2 + 2 + 2 + 2 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_2; + + /* 3 + 3 + 3 + 3 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_3; + + /* 4 + 4 + 4 + 4 */ + dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 |= single_dsc_enc_caps->slice_caps.bits.NUM_SLICES_4; + } +} + +static void build_dsc_enc_caps( + const struct display_stream_compressor *dsc, + struct dsc_enc_caps *dsc_enc_caps) +{ + unsigned int max_dscclk_khz; + unsigned int num_dsc; + unsigned int max_odm_combine_factor; + struct dsc_enc_caps single_dsc_enc_caps; + + struct dc *dc; + + memset(&single_dsc_enc_caps, 0, sizeof(struct dsc_enc_caps)); + + if (!dsc || !dsc->ctx || !dsc->ctx->dc || !dsc->funcs->dsc_get_single_enc_caps) + return; + + dc = dsc->ctx->dc; + + if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_max_clock_khz || !dc->res_pool) + return; + + /* get max DSCCLK from clk_mgr */ + max_dscclk_khz = dc->clk_mgr->funcs->get_max_clock_khz(dc->clk_mgr, CLK_TYPE_DSCCLK); + + dsc->funcs->dsc_get_single_enc_caps(&single_dsc_enc_caps, max_dscclk_khz); + + /* global capabilities */ + dsc_enc_caps->dsc_version = single_dsc_enc_caps.dsc_version; + dsc_enc_caps->lb_bit_depth = single_dsc_enc_caps.lb_bit_depth; + dsc_enc_caps->is_block_pred_supported = single_dsc_enc_caps.is_block_pred_supported; + dsc_enc_caps->max_slice_width = single_dsc_enc_caps.max_slice_width; + dsc_enc_caps->bpp_increment_div = single_dsc_enc_caps.bpp_increment_div; + dsc_enc_caps->color_formats.raw = single_dsc_enc_caps.color_formats.raw; + dsc_enc_caps->color_depth.raw = single_dsc_enc_caps.color_depth.raw; + + /* expand per DSC capabilities to global */ + max_odm_combine_factor = dc->caps.max_odm_combine_factor; + num_dsc = dc->res_pool->res_cap->num_dsc; + max_odm_combine_factor = min(max_odm_combine_factor, num_dsc); + dsc_enc_caps->max_total_throughput_mps = + single_dsc_enc_caps.max_total_throughput_mps * + max_odm_combine_factor; + + /* check slice counts possible for with ODM combine */ + build_dsc_enc_combined_slice_caps(&single_dsc_enc_caps, dsc_enc_caps, max_odm_combine_factor); +} + +static inline uint32_t dsc_div_by_10_round_up(uint32_t value) +{ + return (value + 9) / 10; +} + +static unsigned int get_min_slice_count_for_odm( + const struct display_stream_compressor *dsc, + const struct dsc_enc_caps *dsc_enc_caps, + const struct dc_crtc_timing *timing) +{ + unsigned int max_dispclk_khz; + + /* get max pixel rate and combine caps */ + max_dispclk_khz = dsc_enc_caps->max_total_throughput_mps * 1000; + if (dsc && dsc->ctx->dc) { + if (dsc->ctx->dc->clk_mgr && + dsc->ctx->dc->clk_mgr->funcs->get_max_clock_khz) { + /* dispclk is available */ + max_dispclk_khz = dsc->ctx->dc->clk_mgr->funcs->get_max_clock_khz(dsc->ctx->dc->clk_mgr, CLK_TYPE_DISPCLK); + } + } + + /* consider minimum odm slices required due to + * 1) display pipe throughput (dispclk) + * 2) max image width per slice + */ + return dc_fixpt_ceil(dc_fixpt_max( + dc_fixpt_div_int(dc_fixpt_from_int(dsc_div_by_10_round_up(timing->pix_clk_100hz)), + max_dispclk_khz), // throughput + dc_fixpt_div_int(dc_fixpt_from_int(timing->h_addressable + timing->h_border_left + timing->h_border_right), + dsc_enc_caps->max_slice_width))); // slice width +} + static void get_dsc_enc_caps( const struct display_stream_compressor *dsc, struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) { - // This is a static HW query, so we can use any DSC - memset(dsc_enc_caps, 0, sizeof(struct dsc_enc_caps)); - if (dsc) { + + if (!dsc) + return; + + /* check if reported cap global or only for a single DCN DSC enc */ + if (dsc->funcs->dsc_get_enc_caps) { if (!dsc->ctx->dc->debug.disable_dsc) dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz); - if (dsc->ctx->dc->debug.native422_support) - dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1; + } else { + build_dsc_enc_caps(dsc, dsc_enc_caps); } + + if (dsc->ctx->dc->debug.native422_support) + dsc_enc_caps->color_formats.bits.YCBCR_NATIVE_422 = 1; } /* Returns 'false' if no intersection was found for at least one capability. @@ -621,11 +764,6 @@ static bool intersect_dsc_caps( return true; } -static inline uint32_t dsc_div_by_10_round_up(uint32_t value) -{ - return (value + 9) / 10; -} - static uint32_t compute_bpp_x16_from_target_bandwidth( const uint32_t bandwidth_in_kbps, const struct dc_crtc_timing *timing, @@ -910,11 +1048,11 @@ static bool setup_dsc_config( const struct dc_crtc_timing *timing, const struct dc_dsc_config_options *options, const enum dc_link_encoding_format link_encoding, + int min_slices_h, struct dc_dsc_config *dsc_cfg) { struct dsc_enc_caps dsc_common_caps; int max_slices_h = 0; - int min_slices_h = 0; int num_slices_h = 0; int pic_width; int slice_width; @@ -1018,12 +1156,9 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; - min_slices_h = pic_width / dsc_common_caps.max_slice_width; - if (pic_width % dsc_common_caps.max_slice_width) - min_slices_h++; - min_slices_h = fit_num_slices_up(dsc_common_caps.slice_caps, min_slices_h); + /* increase minimum slice count to meet sink throughput limitations */ while (min_slices_h <= max_slices_h) { int pix_clk_per_slice_khz = dsc_div_by_10_round_up(timing->pix_clk_100hz) / min_slices_h; if (pix_clk_per_slice_khz <= sink_per_slice_throughput_mps * 1000) @@ -1032,14 +1167,12 @@ static bool setup_dsc_config( min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h); } - is_dsc_possible = (min_slices_h <= max_slices_h); - - if (pic_width % min_slices_h != 0) - min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first? - - if (min_slices_h == 0 && max_slices_h == 0) - is_dsc_possible = false; + /* increase minimum slice count to meet divisibility requirements */ + while (pic_width % min_slices_h != 0 && min_slices_h <= max_slices_h) { + min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h); + } + is_dsc_possible = (min_slices_h <= max_slices_h) && max_slices_h != 0; if (!is_dsc_possible) goto done; @@ -1162,12 +1295,19 @@ bool dc_dsc_compute_config( { bool is_dsc_possible = false; struct dsc_enc_caps dsc_enc_caps; - + unsigned int min_slice_count; get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); + + min_slice_count = get_min_slice_count_for_odm(dsc, &dsc_enc_caps, timing); + is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, target_bandwidth_kbps, - timing, options, link_encoding, dsc_cfg); + timing, + options, + link_encoding, + min_slice_count, + dsc_cfg); return is_dsc_possible; } diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c index af5de564faec..7bd92ae8b13e 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.c @@ -9,17 +9,14 @@ #include "dsc/dscc_types.h" #include "dsc/rc_calc.h" -#define MAX_THROUGHPUT_PER_DSC_100HZ 20000000 -#define MAX_DSC_UNIT_COMBINE 4 - static void dsc_write_to_registers(struct display_stream_compressor *dsc, const struct dsc_reg_values *reg_vals); /* Object I/F functions */ //static void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); //static bool dsc401_get_packed_pps(struct display_stream_compressor *dsc, const struct dsc_config *dsc_cfg, uint8_t *dsc_packed_pps); +static void dsc401_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz); static const struct dsc_funcs dcn401_dsc_funcs = { - .dsc_get_enc_caps = dsc401_get_enc_caps, .dsc_read_state = dsc401_read_state, .dsc_validate_stream = dsc401_validate_stream, .dsc_set_config = dsc401_set_config, @@ -28,6 +25,7 @@ static const struct dsc_funcs dcn401_dsc_funcs = { .dsc_disable = dsc401_disable, .dsc_disconnect = dsc401_disconnect, .dsc_wait_disconnect_pending_clear = dsc401_wait_disconnect_pending_clear, + .dsc_get_single_enc_caps = dsc401_get_single_enc_caps, }; /* Macro definitios for REG_SET macros*/ @@ -64,22 +62,14 @@ void dsc401_construct(struct dcn401_dsc *dsc, dsc->max_image_width = 5184; } -void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz) +static void dsc401_get_single_enc_caps(struct dsc_enc_caps *dsc_enc_caps, unsigned int max_dscclk_khz) { - int min_dsc_unit_required = (pixel_clock_100Hz + MAX_THROUGHPUT_PER_DSC_100HZ - 1) / MAX_THROUGHPUT_PER_DSC_100HZ; - dsc_enc_caps->dsc_version = 0x21; /* v1.2 - DP spec defined it in reverse order and we kept it */ - /* 1 slice is only supported with 1 DSC unit */ - dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = min_dsc_unit_required == 1 ? 1 : 0; - /* 2 slice is only supported with 1 or 2 DSC units */ - dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 = (min_dsc_unit_required == 1 || min_dsc_unit_required == 2) ? 1 : 0; - /* 3 slice is only supported with 1 DSC unit */ - dsc_enc_caps->slice_caps.bits.NUM_SLICES_3 = min_dsc_unit_required == 1 ? 1 : 0; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_1 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_2 = 1; + dsc_enc_caps->slice_caps.bits.NUM_SLICES_3 = 1; dsc_enc_caps->slice_caps.bits.NUM_SLICES_4 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_8 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_12 = 1; - dsc_enc_caps->slice_caps.bits.NUM_SLICES_16 = 1; dsc_enc_caps->lb_bit_depth = 13; dsc_enc_caps->is_block_pred_supported = true; @@ -93,7 +83,7 @@ void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100H dsc_enc_caps->color_depth.bits.COLOR_DEPTH_8_BPC = 1; dsc_enc_caps->color_depth.bits.COLOR_DEPTH_10_BPC = 1; dsc_enc_caps->color_depth.bits.COLOR_DEPTH_12_BPC = 1; - dsc_enc_caps->max_total_throughput_mps = MAX_THROUGHPUT_PER_DSC_100HZ * MAX_DSC_UNIT_COMBINE; + dsc_enc_caps->max_total_throughput_mps = max_dscclk_khz * 3 / 1000; dsc_enc_caps->max_slice_width = 5184; /* (including 64 overlap pixels for eDP MSO mode) */ dsc_enc_caps->bpp_increment_div = 16; /* 1/16th of a bit */ diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h index 1d927d8e83bf..7acd57eb4f42 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h +++ b/drivers/gpu/drm/amd/display/dc/dsc/dcn401/dcn401_dsc.h @@ -341,7 +341,6 @@ void dsc401_set_config(struct display_stream_compressor *dsc, const struct dsc_c void dsc401_enable(struct display_stream_compressor *dsc, int opp_pipe); void dsc401_disable(struct display_stream_compressor *dsc); void dsc401_disconnect(struct display_stream_compressor *dsc); -void dsc401_get_enc_caps(struct dsc_enc_caps *dsc_enc_caps, int pixel_clock_100Hz); void dsc401_wait_disconnect_pending_clear(struct display_stream_compressor *dsc); #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 3b736f4687a6..7d66e62b6be6 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -100,6 +100,17 @@ struct dcn301_clk_internal { #define MAX_NUM_DPM_LVL 8 #define WM_SET_COUNT 4 +enum clk_type { + CLK_TYPE_DCFCLK, + CLK_TYPE_FCLK, + CLK_TYPE_MCLK, + CLK_TYPE_SOCCLK, + CLK_TYPE_DTBCLK, + CLK_TYPE_DISPCLK, + CLK_TYPE_DPPCLK, + CLK_TYPE_DSCCLK, + CLK_TYPE_COUNT +}; struct clk_limit_table_entry { unsigned int voltage; /* milivolts withh 2 fractional bits */ @@ -326,6 +337,7 @@ struct clk_mgr_funcs { bool (*is_dc_mode_present)(struct clk_mgr *clk_mgr); + unsigned int (*get_max_clock_khz)(struct clk_mgr *clk_mgr_base, enum clk_type clk_type); }; struct clk_mgr { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 067a93420a23..f9cbdad3ef37 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -2736,6 +2736,8 @@ static bool dcn20_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 2; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index 43fa2cb117f3..e4a1338d21e0 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -1285,6 +1285,8 @@ static bool dcn201_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 2; + dc->cap_funcs = cap_funcs; return true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c index 238d7f8beb7c..918742a42ded 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn21/dcn21_resource.c @@ -1684,6 +1684,8 @@ static bool dcn21_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 2; + dc->cap_funcs = cap_funcs; return true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c index 4d4635e01eb6..895349d9ca07 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c @@ -2586,6 +2586,8 @@ static bool dcn30_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c index 121a86a59833..82a205a7c25c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn301/dcn301_resource.c @@ -1706,6 +1706,8 @@ static bool dcn301_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; return true; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c index 012c5fd52cb1..3345068a878c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c @@ -1481,6 +1481,8 @@ static bool dcn302_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c index a8d0b4686f9a..3479e1eab4cd 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c @@ -1414,6 +1414,8 @@ static bool dcn303_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c index 88afa59d17b0..3ed7f50554e2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn31/dcn31_resource.c @@ -2204,6 +2204,8 @@ static bool dcn31_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = dcn3_1_ip.max_num_dpp; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index dedf7bce6ece..de708fdc1e80 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -2119,6 +2119,8 @@ static bool dcn314_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = dcn3_14_ip.max_num_dpp; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c index d110be626bc2..82cc78c291d8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn315/dcn315_resource.c @@ -2142,6 +2142,8 @@ static bool dcn315_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = dcn3_15_ip.max_num_dpp; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c index 939811858ff7..636110e48d01 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn316/dcn316_resource.c @@ -2010,6 +2010,8 @@ static bool dcn316_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = dcn3_16_ip.max_num_dpp; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c index 9ffa10189eee..9917b366f00c 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c @@ -2500,6 +2500,8 @@ static bool dcn32_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index c53266e16c58..061c0907d802 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1999,6 +1999,8 @@ static bool dcn321_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c index e327dee9be21..1d1a002f6d54 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c @@ -2154,6 +2154,8 @@ static bool dcn35_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = pool->base.pipe_count; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c index 2f2976afc229..47912e0861a2 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c @@ -2125,6 +2125,8 @@ static bool dcn351_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c index 5b7848496a70..9ba91e214dda 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn36/dcn36_resource.c @@ -2127,6 +2127,8 @@ static bool dcn36_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; dc->dcn_ip->max_num_dpp = pool->base.pipe_count; diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index a9d989f20405..b3988e38d0a6 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -2190,6 +2190,8 @@ static bool dcn401_resource_construct( for (i = 0; i < dc->caps.max_planes; ++i) dc->caps.planes[i] = plane_cap; + dc->caps.max_odm_combine_factor = 4; + dc->cap_funcs = cap_funcs; if (dc->ctx->dc_bios->fw_info.oem_i2c_present) { From aef3af22a4560cad02f864c5ebd6372b516cceeb Mon Sep 17 00:00:00 2001 From: Ilya Bakoulin Date: Tue, 24 Jun 2025 16:51:16 -0400 Subject: [PATCH 221/358] drm/amd/display: Add definitions to support DID Type5 descriptors [Why/How] Add the timing source needed to support DID Type5. Reviewed-by: Aric Cyr Signed-off-by: Ilya Bakoulin Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index a0829bc0708a..229a754b7b6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -263,6 +263,7 @@ enum dc_timing_source { TIMING_SOURCE_EDID_4BYTE, TIMING_SOURCE_EDID_CEA_DISPLAYID_VTDB, TIMING_SOURCE_EDID_CEA_RID, + TIMING_SOURCE_EDID_DISPLAYID_TYPE5, TIMING_SOURCE_VBIOS, TIMING_SOURCE_CV, TIMING_SOURCE_TV, From 3f2b24a1ef359d4a8313dc919bf772aeb6b54c9b Mon Sep 17 00:00:00 2001 From: Fudongwang Date: Tue, 24 Jun 2025 17:49:47 +0800 Subject: [PATCH 222/358] drm/amd/display: Monitor patch to ignore EDID audio SAB check [Why & How] Some monitor have audio output but SAB data is zero. Skip check this in this case. Reviewed-by: Charlene Liu Reviewed-by: Jun Lei Signed-off-by: Fudongwang Signed-off-by: Ivan Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 +++- drivers/gpu/drm/amd/display/dc/dc_types.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 854fc51f159c..4d6181e7c612 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3940,7 +3940,9 @@ enum dc_status resource_map_pool_resources( /* TODO: Add check if ASIC support and EDID audio */ if (!stream->converter_disable_audio && dc_is_audio_capable_signal(pipe_ctx->stream->signal) && - stream->audio_info.mode_count && stream->audio_info.flags.all) { + stream->audio_info.mode_count && + (stream->audio_info.flags.all || + (stream->sink && stream->sink->edid_caps.panel_patch.skip_audio_sab_check))) { pipe_ctx->stream_res.audio = find_first_free_audio( &context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id, dc_ctx->dce_version); diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h index 229a754b7b6e..375ca2f13b7a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_types.h @@ -175,6 +175,7 @@ struct dc_panel_patch { unsigned int embedded_tiled_slave; unsigned int disable_fams; unsigned int skip_avmute; + unsigned int skip_audio_sab_check; unsigned int mst_start_top_delay; unsigned int remove_sink_ext_caps; unsigned int disable_colorimetry; From f354556e29f40ef44fa8b13dc914817db3537e20 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Thu, 26 Jun 2025 16:36:17 -0400 Subject: [PATCH 223/358] drm/amd/display: limit clear_update_flags to dcn32 and above [why] dc has some code out of sync: dc_commit_updates_for_stream handles v1/v2/v3, but dc_update_planes_and_stream makes v1 asic to use v2. as a reression fix: limit clear_update_flags to dcn32 or newer asic. need to follow up that v1 asic using v2 issue. Reviewed-by: Syed Hassan Signed-off-by: Charlene Liu Signed-off-by: Ivan Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f37ed24b2430..cee45fe7cec9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5443,8 +5443,7 @@ bool dc_update_planes_and_stream(struct dc *dc, else ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); - - if (ret) + if (ret && dc->ctx->dce_version >= DCN_VERSION_3_2) clear_update_flags(srf_updates, surface_count, stream); return ret; @@ -5475,7 +5474,7 @@ void dc_commit_updates_for_stream(struct dc *dc, ret = update_planes_and_stream_v1(dc, srf_updates, surface_count, stream, stream_update, state); - if (ret) + if (ret && dc->ctx->dce_version >= DCN_VERSION_3_2) clear_update_flags(srf_updates, surface_count, stream); } From 2ee27baf5c7cbaa97d58288c46de12dfe47cde78 Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Mon, 31 Mar 2025 12:35:11 -0400 Subject: [PATCH 224/358] drm/amd/display: Notify display idle on D3 [Why & How] Display idle notification shall be sent by driver on D3 entry. Implement notification to DMUB and PMFW. Reviewed-by: Duncan Ma Signed-off-by: Duncan Ma Signed-off-by: Ivan Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 9 +++++++++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index cee45fe7cec9..c31f7f8e409f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5547,6 +5547,15 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state) dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config); } break; + case DC_ACPI_CM_POWER_STATE_D3: + if (dc->caps.ips_support) + dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); + + if (dc->caps.ips_v2_support) { + if (dc->clk_mgr->funcs->set_low_power_state) + dc->clk_mgr->funcs->set_low_power_state(dc->clk_mgr); + } + break; default: ASSERT(dc->current_state->stream_count == 0); dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index a160671d2a01..aa1b976cf40d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -311,6 +311,7 @@ struct dc_caps { bool dmcub_support; bool zstate_support; bool ips_support; + bool ips_v2_support; uint32_t num_of_internal_disp; enum dp_protocol_version max_dp_protocol_version; unsigned int mall_size_per_mem_channel; From f9dbe8eb1b3d0120271c455e209731000cedc23f Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Tue, 1 Apr 2025 00:14:01 -0400 Subject: [PATCH 225/358] drm/amd/display: Adding missing driver code for IPSv2.0 [Why & How] Aligned IPS FW state with DMCUB IPS FW state Added debug option disable_ips_rcg to modify RCG behaviour in IPS modes. Updated existing debug option disable_ips to align with new changes introduced by IPSv2.0 Reviewed-by: Duncan Ma Signed-off-by: Leo Chen Signed-off-by: Ivan Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 51 ++++++++++++++----- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 +- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index aa1b976cf40d..8a09c5f487d3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -515,6 +515,7 @@ struct dc_config { bool EnableMinDispClkODM; bool enable_auto_dpm_test_logs; unsigned int disable_ips; + unsigned int disable_ips_rcg; unsigned int disable_ips_in_vpb; bool disable_ips_in_dpms_off; bool usb4_bw_alloc_support; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 0a47d1a3515b..c10e603b54af 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1269,12 +1269,16 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) new_signals.bits.allow_ips1 = 1; new_signals.bits.allow_ips2 = 1; new_signals.bits.allow_z10 = 1; + // New in IPSv2.0 + new_signals.bits.allow_ips1z8 = 1; } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS1) { new_signals.bits.allow_ips1 = 1; } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2) { + // IPSv1.0 only new_signals.bits.allow_pg = 1; new_signals.bits.allow_ips1 = 1; } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_IPS2_Z10) { + // IPSv1.0 only new_signals.bits.allow_pg = 1; new_signals.bits.allow_ips1 = 1; new_signals.bits.allow_ips2 = 1; @@ -1286,6 +1290,8 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) new_signals.bits.allow_ips1 = 1; new_signals.bits.allow_ips2 = 1; new_signals.bits.allow_z10 = 1; + // New in IPSv2.0 + new_signals.bits.allow_ips1z8 = 1; } else { /* RCG only */ new_signals.bits.allow_pg = 0; @@ -1293,8 +1299,21 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) new_signals.bits.allow_ips2 = 0; new_signals.bits.allow_z10 = 0; } + } else if (dc->config.disable_ips == DMUB_IPS_DISABLE_Z8_RETENTION) { + new_signals.bits.allow_pg = 1; + new_signals.bits.allow_ips1 = 1; + new_signals.bits.allow_ips2 = 1; + new_signals.bits.allow_z10 = 1; + } + // Setting RCG allow bits (IPSv2.0) + if (dc->config.disable_ips_rcg == DMUB_IPS_RCG_ENABLE) { + new_signals.bits.allow_ips0_rcg = 1; + new_signals.bits.allow_ips1_rcg = 1; + } else if (dc->config.disable_ips_rcg == DMUB_IPS0_RCG_DISABLE) { + new_signals.bits.allow_ips1_rcg = 1; + } else if (dc->config.disable_ips_rcg == DMUB_IPS1_RCG_DISABLE) { + new_signals.bits.allow_ips0_rcg = 1; } - ips_driver->signals = new_signals; dc_dmub_srv->driver_signals = ips_driver->signals; } @@ -1318,7 +1337,7 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) { struct dc_dmub_srv *dc_dmub_srv; - uint32_t rcg_exit_count = 0, ips1_exit_count = 0, ips2_exit_count = 0; + uint32_t rcg_exit_count = 0, ips1_exit_count = 0, ips2_exit_count = 0, ips1z8_exit_count = 0; if (dc->debug.dmcub_emulation) return; @@ -1338,31 +1357,34 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) rcg_exit_count = ips_fw->rcg_exit_count; ips1_exit_count = ips_fw->ips1_exit_count; ips2_exit_count = ips_fw->ips2_exit_count; + ips1z8_exit_count = ips_fw->ips1_z8ret_exit_count; ips_driver->signals.all = 0; dc_dmub_srv->driver_signals = ips_driver->signals; DC_LOG_IPS( - "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u) (count rcg=%u ips1=%u ips2=%u)", + "%s (allow ips1=%u ips2=%u) (commit ips1=%u ips2=%u ips1z8=%u) (count rcg=%u ips1=%u ips2=%u ips1_z8=%u)", __func__, ips_driver->signals.bits.allow_ips1, ips_driver->signals.bits.allow_ips2, ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit, + ips_fw->signals.bits.ips1z8_commit, ips_fw->rcg_entry_count, ips_fw->ips1_entry_count, - ips_fw->ips2_entry_count); + ips_fw->ips2_entry_count, + ips_fw->ips1_z8ret_entry_count); /* Note: register access has technically not resumed for DCN here, but we * need to be message PMFW through our standard register interface. */ dc_dmub_srv->needs_idle_wake = false; - if ((prev_driver_signals.bits.allow_ips2 || prev_driver_signals.all == 0) && + if (!dc->caps.ips_v2_support && ((prev_driver_signals.bits.allow_ips2 || prev_driver_signals.all == 0) && (!dc->debug.optimize_ips_handshake || - ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle)) { + ips_fw->signals.bits.ips2_commit || !ips_fw->signals.bits.in_idle))) { DC_LOG_IPS( - "wait IPS2 eval (ips1_commit=%u ips2_commit=%u)", + "wait IPS2 eval (ips1_commit=%u ips2_commit=%u )", ips_fw->signals.bits.ips1_commit, ips_fw->signals.bits.ips2_commit); @@ -1422,28 +1444,31 @@ static void dc_dmub_srv_exit_low_power_state(const struct dc *dc) dc_dmub_srv_notify_idle(dc, false); if (prev_driver_signals.bits.allow_ips1 || prev_driver_signals.all == 0) { DC_LOG_IPS( - "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u)", + "wait for IPS1 commit clear (ips1_commit=%u ips2_commit=%u ips1z8=%u)", ips_fw->signals.bits.ips1_commit, - ips_fw->signals.bits.ips2_commit); + ips_fw->signals.bits.ips2_commit, + ips_fw->signals.bits.ips1z8_commit); while (ips_fw->signals.bits.ips1_commit) udelay(1); DC_LOG_IPS( - "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u)", + "wait for IPS1 commit clear done (ips1_commit=%u ips2_commit=%u ips1z8=%u)", ips_fw->signals.bits.ips1_commit, - ips_fw->signals.bits.ips2_commit); + ips_fw->signals.bits.ips2_commit, + ips_fw->signals.bits.ips1z8_commit); } } if (!dc_dmub_srv_is_hw_pwr_up(dc->ctx->dmub_srv, true)) ASSERT(0); - DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u)", + DC_LOG_IPS("%s exit (count rcg=%u ips1=%u ips2=%u ips1z8=%u)", __func__, rcg_exit_count, ips1_exit_count, - ips2_exit_count); + ips2_exit_count, + ips1z8_exit_count); } void dc_dmub_srv_set_power_state(struct dc_dmub_srv *dc_dmub_srv, enum dc_acpi_cm_power_state power_state) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 5cf5dd5831fc..938a07cdcfec 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -875,7 +875,7 @@ enum dmub_shared_state_feature_id { /** * struct dmub_shared_state_ips_fw - Firmware signals for IPS. */ -union dmub_shared_state_ips_fw_signals { + union dmub_shared_state_ips_fw_signals { struct { uint32_t ips1_commit : 1; /**< 1 if in IPS1 or IPS0 RCG */ uint32_t ips2_commit : 1; /**< 1 if in IPS2 */ @@ -890,7 +890,7 @@ union dmub_shared_state_ips_fw_signals { /** * struct dmub_shared_state_ips_signals - Firmware signals for IPS. */ -union dmub_shared_state_ips_driver_signals { + union dmub_shared_state_ips_driver_signals { struct { uint32_t allow_pg : 1; /**< 1 if PG is allowed */ uint32_t allow_ips1 : 1; /**< 1 is IPS1 is allowed */ From a8a21bafa1b964ea566c69152c3299fefcb6c0c5 Mon Sep 17 00:00:00 2001 From: Duncan Ma Date: Fri, 2 May 2025 12:40:48 -0400 Subject: [PATCH 226/358] drm/amd/display: Notify DMUB on HW Release [Why & How] DMUB shall be notified on driver hardware release. Implement notification. Reviewed-by: Duncan Ma Signed-off-by: Duncan Ma Signed-off-by: Ivan Lipski Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 17 +++++++++++++++++ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 6 ++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index c10e603b54af..b4525b1fc11b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -2176,3 +2176,20 @@ bool dmub_lsdma_send_poll_reg_write_command(struct dc_dmub_srv *dc_dmub_srv, uin return result; } +void dc_dmub_srv_release_hw(const struct dc *dc) +{ + struct dc_dmub_srv *dc_dmub_srv = dc->ctx->dmub_srv; + union dmub_rb_cmd cmd = {0}; + + if (!dc_dmub_srv || !dc_dmub_srv->dmub) + return; + + memset(&cmd, 0, sizeof(cmd)); + cmd.idle_opt_notify_idle.header.type = DMUB_CMD__IDLE_OPT; + cmd.idle_opt_notify_idle.header.sub_type = DMUB_CMD__IDLE_OPT_RELEASE_HW; + cmd.idle_opt_notify_idle.header.payload_bytes = + sizeof(cmd.idle_opt_notify_idle) - + sizeof(cmd.idle_opt_notify_idle.header); + + dm_execute_dmub_cmd(dc->ctx, &cmd, DM_DMUB_WAIT_TYPE_WAIT); +} diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 1f1c155be30e..8ea320f21269 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -291,4 +291,10 @@ bool dc_dmub_srv_ips_query_residency_info(const struct dc_context *ctx, uint8_t struct dmub_ips_residency_info *driver_info, enum ips_residency_mode ips_mode); +/** + * dc_dmub_srv_release_hw() - Notifies DMUB service that HW access is no longer required. + * + * @dc - pointer to DC object + */ +void dc_dmub_srv_release_hw(const struct dc *dc); #endif /* _DMUB_DC_SRV_H_ */ From c5c33903d7aed0f16cc300c27da984f89a159cf2 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 15 May 2025 14:16:39 -0400 Subject: [PATCH 227/358] drm/amd/display: Add static pg implementations for future use [Why & How] Add static pg implementations and debug flags for future use. Reviewed-by: Duncan Ma Signed-off-by: Leo Chen Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 4 ++++ drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8a09c5f487d3..b4fe5859fa5f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -819,6 +819,7 @@ enum pg_hw_resources { PG_DCHVM, PG_DWB, PG_HPO, + PG_DCOH, PG_HW_RESOURCES_NUM_ELEMENT }; @@ -962,6 +963,9 @@ struct dc_debug_options { bool disable_dsc_power_gate; bool disable_optc_power_gate; bool disable_hpo_power_gate; + bool disable_io_clk_power_gate; + bool disable_mem_power_gate; + bool disable_dio_power_gate; int dsc_min_slice_height_override; int dsc_bpp_increment_div; bool disable_pplib_wm_range; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h index 00ea3864dd4d..44f86cc2d1d6 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/pg_cntl.h @@ -46,6 +46,8 @@ struct pg_cntl_funcs { void (*opp_pg_control)(struct pg_cntl *pg_cntl, unsigned int opp_inst, bool power_on); void (*optc_pg_control)(struct pg_cntl *pg_cntl, unsigned int optc_inst, bool power_on); void (*dwb_pg_control)(struct pg_cntl *pg_cntl, bool power_on); + void (*mem_pg_control)(struct pg_cntl *pg_cntl, bool power_on); + void (*dio_pg_control)(struct pg_cntl *pg_cntl, bool power_on); void (*init_pg_status)(struct pg_cntl *pg_cntl); }; From 42fcf48f7b3037bc885db2e0c19ec9941029a134 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 16 Jun 2025 15:33:06 -0400 Subject: [PATCH 228/358] drm/amd/display: New Behavior for debug option disable_ips_in_vpb [Why & How] To facilitate debugging, the following behaviors are defined for existing debug option disable_ips_in_vpb 0 - Enable IPS in LVP - let driver decide (legacy) 1 - Disable IPS in LVP 2 - Enable IPS1 and RCG in LVP 3 - Enable IPS1 Z8, IPS1 and RCG in LVP Reviewed-by: Duncan Ma Signed-off-by: Leo Chen Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 7 +++++++ drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index b4525b1fc11b..f5ef1a07078e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1314,6 +1314,13 @@ static void dc_dmub_srv_notify_idle(const struct dc *dc, bool allow_idle) } else if (dc->config.disable_ips_rcg == DMUB_IPS1_RCG_DISABLE) { new_signals.bits.allow_ips0_rcg = 1; } + // IPS dynamic allow bits (IPSv2 change, vpb use case) + if (dc->config.disable_ips_in_vpb == DMUB_IPS_VPB_ENABLE_IPS1_AND_RCG) { + new_signals.bits.allow_dynamic_ips1 = 1; + } else if (dc->config.disable_ips_in_vpb == DMUB_IPS_VPB_ENABLE_ALL) { + new_signals.bits.allow_dynamic_ips1 = 1; + new_signals.bits.allow_dynamic_ips1_z8 = 1; + } ips_driver->signals = new_signals; dc_dmub_srv->driver_signals = ips_driver->signals; } diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 938a07cdcfec..fed2d3999305 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -790,6 +790,13 @@ enum dmub_ips_rcg_disable_type { DMUB_IPS_RCG_DISABLE = 3 }; +enum dmub_ips_in_vpb_disable_type { + DMUB_IPS_VPB_RCG_ONLY = 0, // Legacy behaviour + DMUB_IPS_VPB_DISABLE_ALL = 1, + DMUB_IPS_VPB_ENABLE_IPS1_AND_RCG = 2, + DMUB_IPS_VPB_ENABLE_ALL = 3 // Enable IPS1 Z8, IPS1 and RCG +}; + #define DMUB_IPS1_ALLOW_MASK 0x00000001 #define DMUB_IPS2_ALLOW_MASK 0x00000002 #define DMUB_IPS1_COMMIT_MASK 0x00000004 From 504f9bdd3a1588604b0452bfe927ff86e5f6e6df Mon Sep 17 00:00:00 2001 From: Michael Strauss Date: Tue, 18 Feb 2025 09:23:44 -0500 Subject: [PATCH 229/358] drm/amd/display: Fix FIXED_VS retimer clock gen source override [WHY] For SQ128 pattern some vendor-specific overrides are required. Previously a hardcoded clock gen source value was incorrectly programmed, causing our override to retimer's clock source override to be ignored. Due to some PHY issues on certain APU programs, we see failures on retimer bypass ports extend to electrical testing downstream of PHY due to some host clock jitter which the retimer follows. [HOW] Fix typo to use correct clock gen source override of 0xC4 rather than 0x4C. Reviewed-by: Charlene Liu Signed-off-by: Michael Strauss Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c index 116ff37126e7..55c5148de800 100644 --- a/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/hwss/link_hwss_hpo_fixed_vs_pe_retimer_dp.c @@ -74,7 +74,7 @@ static void dp_hpo_fixed_vs_pe_retimer_set_tx_ffe(struct dc_link *link, static void dp_hpo_fixed_vs_pe_retimer_program_override_test_pattern(struct dc_link *link, struct encoder_set_dp_phy_pattern_param *tp_params) { - uint8_t clk_src = 0x4C; + uint8_t clk_src = 0xC4; uint8_t pattern = 0x4F; /* SQ128 */ const uint8_t vendor_lttpr_write_data_pg0[4] = {0x1, 0x11, 0x0, 0x0}; From 3bfce48b109fcb9d561a39f69f073dfc1df55ead Mon Sep 17 00:00:00 2001 From: Ovidiu Bunea Date: Wed, 12 Mar 2025 23:07:39 -0400 Subject: [PATCH 230/358] drm/amd/display: Add support for Panel Replay on DP1 eDP (panel_inst=1) [why & how] DP1 eDP is still considered a single-eDP case and should support Panel Replay. Modify secondary eDP policy to reflect this and update Replay state accordingly. Reviewed-by: Charlene Liu Signed-off-by: Ovidiu Bunea Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index fcd3d86ad517..5d77d0912ee9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -211,6 +211,7 @@ static void dmub_replay_set_coasting_vtotal(struct dmub_replay *dmub, pCmd->header.type = DMUB_CMD__REPLAY; pCmd->header.sub_type = DMUB_CMD__REPLAY_SET_COASTING_VTOTAL; pCmd->header.payload_bytes = sizeof(struct dmub_cmd_replay_set_coasting_vtotal_data); + pCmd->replay_set_coasting_vtotal_data.panel_inst = panel_inst; pCmd->replay_set_coasting_vtotal_data.coasting_vtotal = (coasting_vtotal & 0xFFFF); pCmd->replay_set_coasting_vtotal_data.coasting_vtotal_high = (coasting_vtotal & 0xFFFF0000) >> 16; From 1f26214d268bc2176d9b2374731a6ec4f369030d Mon Sep 17 00:00:00 2001 From: Ovidiu Bunea Date: Thu, 27 Mar 2025 16:36:17 -0400 Subject: [PATCH 231/358] drm/amd/display: Add HPO encoder support to Replay [why & how] UHBR link rate capable eDPs will use HPO for encoding. Need to pass HPO stream and link encoder instances to DMCUB for Replay FSM to know which instances to use. Reviewed-by: Charlene Liu Signed-off-by: Ovidiu Bunea Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce/dmub_replay.c | 42 +++++++++++++++++-- .../gpu/drm/amd/display/dc/dce/dmub_replay.h | 2 +- .../link/protocols/link_edp_panel_control.c | 2 +- .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 25 +++++++++++ 4 files changed, 66 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 5d77d0912ee9..e7a318e26d38 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -4,6 +4,7 @@ #include "dc.h" #include "dc_dmub_srv.h" +#include "dc_dp_types.h" #include "dmub/dmub_srv.h" #include "core_types.h" #include "dmub_replay.h" @@ -43,21 +44,45 @@ static void dmub_replay_get_state(struct dmub_replay *dmub, enum replay_state *s /* * Enable/Disable Replay. */ -static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait, uint8_t panel_inst) +static void dmub_replay_enable(struct dmub_replay *dmub, bool enable, bool wait, uint8_t panel_inst, + struct dc_link *link) { union dmub_rb_cmd cmd; struct dc_context *dc = dmub->ctx; uint32_t retry_count; enum replay_state state = REPLAY_STATE_0; + struct pipe_ctx *pipe_ctx = NULL; + struct resource_context *res_ctx = &link->ctx->dc->current_state->res_ctx; + uint8_t i; memset(&cmd, 0, sizeof(cmd)); cmd.replay_enable.header.type = DMUB_CMD__REPLAY; cmd.replay_enable.data.panel_inst = panel_inst; cmd.replay_enable.header.sub_type = DMUB_CMD__REPLAY_ENABLE; - if (enable) + if (enable) { cmd.replay_enable.data.enable = REPLAY_ENABLE; - else + // hpo stream/link encoder assignments are not static, need to update everytime we try to enable replay + if (link->cur_link_settings.link_rate >= LINK_RATE_UHBR10) { + for (i = 0; i < MAX_PIPES; i++) { + if (res_ctx && + res_ctx->pipe_ctx[i].stream && + res_ctx->pipe_ctx[i].stream->link && + res_ctx->pipe_ctx[i].stream->link == link && + res_ctx->pipe_ctx[i].stream->link->connector_signal == SIGNAL_TYPE_EDP) { + pipe_ctx = &res_ctx->pipe_ctx[i]; + //TODO: refactor for multi edp support + break; + } + } + + if (!pipe_ctx) + return; + + cmd.replay_enable.data.hpo_stream_enc_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; + cmd.replay_enable.data.hpo_link_enc_inst = pipe_ctx->link_res.hpo_dp_link_enc->inst; + } + } else cmd.replay_enable.data.enable = REPLAY_DISABLE; cmd.replay_enable.header.payload_bytes = sizeof(struct dmub_rb_cmd_replay_enable_data); @@ -149,6 +174,17 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, copy_settings_data->digbe_inst = replay_context->digbe_inst; copy_settings_data->digfe_inst = replay_context->digfe_inst; + if (link->cur_link_settings.link_rate >= LINK_RATE_UHBR10) { + if (pipe_ctx->stream_res.hpo_dp_stream_enc) + copy_settings_data->hpo_stream_enc_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; + else + copy_settings_data->hpo_stream_enc_inst = 0; + if (pipe_ctx->link_res.hpo_dp_link_enc) + copy_settings_data->hpo_link_enc_inst = pipe_ctx->link_res.hpo_dp_link_enc->inst; + else + copy_settings_data->hpo_link_enc_inst = 0; + } + if (pipe_ctx->plane_res.dpp) copy_settings_data->dpp_inst = pipe_ctx->plane_res.dpp->inst; else diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h index e6346c0ffc0e..ccbe385e132c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.h @@ -19,7 +19,7 @@ struct dmub_replay_funcs { void (*replay_get_state)(struct dmub_replay *dmub, enum replay_state *state, uint8_t panel_inst); void (*replay_enable)(struct dmub_replay *dmub, bool enable, bool wait, - uint8_t panel_inst); + uint8_t panel_inst, struct dc_link *link); bool (*replay_copy_settings)(struct dmub_replay *dmub, struct dc_link *link, struct replay_context *replay_context, uint8_t panel_inst); void (*replay_set_power_opt)(struct dmub_replay *dmub, unsigned int power_opt, diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index 035795042a01..a4258b5b77be 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -937,7 +937,7 @@ bool edp_set_replay_allow_active(struct dc_link *link, const bool *allow_active, // TODO: Handle mux change case if force_static is set // If force_static is set, just change the replay_allow_active state directly if (replay != NULL && link->replay_settings.replay_feature_enabled) - replay->funcs->replay_enable(replay, *allow_active, wait, panel_inst); + replay->funcs->replay_enable(replay, *allow_active, wait, panel_inst, link); link->replay_settings.replay_allow_active = *allow_active; } diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index fed2d3999305..c587b3441e07 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -4047,6 +4047,14 @@ struct dmub_cmd_replay_copy_settings_data { * DIG BE HW instance. */ uint8_t digbe_inst; + /** + * @hpo_stream_enc_inst: HPO stream encoder instance + */ + uint8_t hpo_stream_enc_inst; + /** + * @hpo_link_enc_inst: HPO link encoder instance + */ + uint8_t hpo_link_enc_inst; /** * AUX HW instance. */ @@ -4091,6 +4099,11 @@ struct dmub_cmd_replay_copy_settings_data { * Use for AUX-less ALPM LFPS wake operation */ struct dmub_alpm_auxless_data auxless_alpm_data; + + /** + * @pad: Align structure to 4 byte boundary. + */ + uint8_t pad[2]; }; /** @@ -4146,6 +4159,18 @@ struct dmub_rb_cmd_replay_enable_data { * This does not support HDMI/DP2 for now. */ uint8_t phy_rate; + /** + * @hpo_stream_enc_inst: HPO stream encoder instance + */ + uint8_t hpo_stream_enc_inst; + /** + * @hpo_link_enc_inst: HPO link encoder instance + */ + uint8_t hpo_link_enc_inst; + /** + * @pad: Align structure to 4 byte boundary. + */ + uint8_t pad[2]; }; /** From fa699acb8e9be2341ee318077fa119acc7d5f329 Mon Sep 17 00:00:00 2001 From: Clayton King Date: Thu, 19 Jun 2025 13:54:26 -0400 Subject: [PATCH 232/358] drm/amd/display: Free memory allocation [WHY] Free memory to avoid memory leak Reviewed-by: Joshua Aberback Signed-off-by: Clayton King Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 724484573a2a..b59703467128 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -1606,7 +1606,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct( clk_mgr->base.bw_params = kzalloc(sizeof(*clk_mgr->base.bw_params), GFP_KERNEL); if (!clk_mgr->base.bw_params) { BREAK_TO_DEBUGGER(); - kfree(clk_mgr); + kfree(clk_mgr401); return NULL; } @@ -1617,6 +1617,7 @@ struct clk_mgr_internal *dcn401_clk_mgr_construct( if (!clk_mgr->wm_range_table) { BREAK_TO_DEBUGGER(); kfree(clk_mgr->base.bw_params); + kfree(clk_mgr401); return NULL; } From 18f0817d2e9af479a40a1be4d83a849894d6b3f8 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 2 May 2025 09:51:04 -0400 Subject: [PATCH 233/358] drm/amd/display: Initial support for SmartMux SmartMux is a mechanism to switch the GPU being used for scanout in a hybrid configuration. This is used for devices with an eDP and two GPUs. This is only valid when the system has a physical switch (Multiplexer) in the board to switch between the two GPUs. When a graphically intensive workload like a game is being run, the system can be switch the active display to the dGPU, so that we can avoid copying the buffer from dGPU to APU for scanout. This helps with latency and FPS. When power consumption is preferred, the system can be switched to the APU. Reviewed-by: Alvin Lee Signed-off-by: Aurabindo Pillai Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 2 +- .../gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 2 +- .../drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h | 3 +- .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c | 4 +- .../display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c | 36 ++++++ .../display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h | 31 +++++ .../dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c | 118 ++++++++++++++++++ .../dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h | 34 +++++ .../drm/amd/display/dc/core/dc_hw_sequencer.c | 26 ++++ drivers/gpu/drm/amd/display/dc/dc.h | 8 ++ .../amd/display/dc/hwss/dce110/dce110_hwseq.c | 24 ++++ .../amd/display/dc/hwss/dcn31/dcn31_hwseq.c | 13 ++ .../drm/amd/display/dc/hwss/hw_sequencer.h | 3 + .../gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 2 + .../gpu/drm/amd/display/dc/link/link_dpms.c | 19 ++- .../drm/amd/display/dc/link/link_factory.c | 8 +- .../dc/link/protocols/link_dp_capability.c | 19 +++ .../link/protocols/link_edp_panel_control.c | 17 +++ .../link/protocols/link_edp_panel_control.h | 1 + .../amd/display/modules/power/power_helpers.h | 2 + 20 files changed, 366 insertions(+), 6 deletions(-) create mode 100644 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c create mode 100644 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h create mode 100644 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c create mode 100644 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index d9955c5d2e5e..60021671b386 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -112,7 +112,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) ############################################################################### # DCN30 ############################################################################### -CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o +CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o dcn30m_clk_mgr.o dcn30m_clk_mgr_smu_msg.o AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 4c3e58c730b1..33b9d36619ff 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -67,7 +67,7 @@ int clk_mgr_helper_get_active_display_cnt( if (dc_state_get_stream_subvp_type(context, stream) == SUBVP_PHANTOM) continue; - if (!stream->dpms_off || (stream_status && stream_status->plane_count)) + if (!stream->dpms_off || dc->is_switch_in_progress_dest || (stream_status && stream_status->plane_count)) display_count++; } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h index fa09c594fd36..06da34676965 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h @@ -56,6 +56,7 @@ #define DALSMC_MSG_SetDisplayRefreshFromMall 0xF #define DALSMC_MSG_SetExternalClientDfCstateAllow 0x10 #define DALSMC_MSG_BacoAudioD3PME 0x11 -#define DALSMC_Message_Count 0x12 +#define DALSMC_MSG_SmartAccess 0x12 +#define DALSMC_Message_Count 0x13 #endif diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 8083a553c60e..ef77fcd164ed 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -30,6 +30,7 @@ #include "dce100/dce_clk_mgr.h" #include "dcn30/dcn30_clk_mgr.h" #include "dml/dcn30/dcn30_fpu.h" +#include "dcn30/dcn30m_clk_mgr.h" #include "reg_helper.h" #include "core_types.h" #include "dm_helpers.h" @@ -498,7 +499,8 @@ static struct clk_mgr_funcs dcn3_funcs = { .are_clock_states_equal = dcn3_are_clock_states_equal, .enable_pme_wa = dcn3_enable_pme_wa, .notify_link_rate_change = dcn30_notify_link_rate_change, - .is_smu_present = dcn3_is_smu_present + .is_smu_present = dcn3_is_smu_present, + .set_smartmux_switch = dcn30m_set_smartmux_switch }; static void dcn3_init_clocks_fpga(struct clk_mgr *clk_mgr) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c new file mode 100644 index 000000000000..8e8a11c7437e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.c @@ -0,0 +1,36 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "clk_mgr_internal.h" +#include "dcn30/dcn30m_clk_mgr.h" +#include "dcn30m_clk_mgr_smu_msg.h" + + +uint32_t dcn30m_set_smartmux_switch(struct clk_mgr *clk_mgr_base, uint32_t pins_to_set) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + return dcn30m_smu_set_smart_mux_switch(clk_mgr, pins_to_set); +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h new file mode 100644 index 000000000000..757985b2eadc --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr.h @@ -0,0 +1,31 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN30M_CLK_MGR_H__ +#define __DCN30M_CLK_MGR_H__ + +uint32_t dcn30m_set_smartmux_switch(struct clk_mgr *clk_mgr_base, uint32_t pins_to_set); + +#endif //__DCN30M_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c new file mode 100644 index 000000000000..0dd0583ff21e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.c @@ -0,0 +1,118 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dcn30m_clk_mgr_smu_msg.h" + +#include "clk_mgr_internal.h" +#include "reg_helper.h" +#include "dm_helpers.h" + +#include "dalsmc.h" + +#define mmDAL_MSG_REG 0x1628A +#define mmDAL_ARG_REG 0x16273 +#define mmDAL_RESP_REG 0x16274 + +#define REG(reg_name) \ + mm ## reg_name + +#include "logger_types.h" +#undef DC_LOGGER +#define DC_LOGGER \ + CTX->logger +#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); } + + +/* + * Function to be used instead of REG_WAIT macro because the wait ends when + * the register is NOT EQUAL to zero, and because the translation in msg_if.h + * won't work with REG_WAIT. + */ +static uint32_t dcn30m_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, + unsigned int delay_us, unsigned int max_retries) +{ + uint32_t reg = 0; + + do { + reg = REG_READ(DAL_RESP_REG); + if (reg) + break; + + if (delay_us >= 1000) + msleep(delay_us/1000); + else if (delay_us > 0) + udelay(delay_us); + } while (max_retries--); + + /* handle DALSMC_Result_CmdRejectedBusy? */ + + /* Log? */ + + return reg; +} + +static bool dcn30m_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, + uint32_t msg_id, uint32_t param_in, uint32_t *param_out) +{ + uint32_t result; + /* Wait for response register to be ready */ + dcn30m_smu_wait_for_response(clk_mgr, 10, 200000); + + /* Clear response register */ + REG_WRITE(DAL_RESP_REG, 0); + + /* Set the parameter register for the SMU message */ + REG_WRITE(DAL_ARG_REG, param_in); + + /* Trigger the message transaction by writing the message ID */ + REG_WRITE(DAL_MSG_REG, msg_id); + + result = dcn30m_smu_wait_for_response(clk_mgr, 10, 200000); + + if (IS_SMU_TIMEOUT(result)) + dm_helpers_smu_timeout(CTX, msg_id, param_in, 10 * 200000); + + /* Wait for response */ + if (result == DALSMC_Result_OK) { + if (param_out) + *param_out = REG_READ(DAL_ARG_REG); + + return true; + } + + return false; +} + +uint32_t dcn30m_smu_set_smart_mux_switch(struct clk_mgr_internal *clk_mgr, uint32_t pins_to_set) +{ + uint32_t response = 0; + + smu_print("SMU Set SmartMux Switch: switch_dgpu = %d\n", pins_to_set); + + dcn30m_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SmartAccess, pins_to_set, &response); + + return response; +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h new file mode 100644 index 000000000000..8a59a473fc5e --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30m_clk_mgr_smu_msg.h @@ -0,0 +1,34 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_ +#define DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_ + +#include "core_types.h" + +struct clk_mgr_internal; + +uint32_t dcn30m_smu_set_smart_mux_switch(struct clk_mgr_internal *clk_mgr, uint32_t pins_to_set); +#endif /* DAL_DC_DCN30M_CLK_MGR_SMU_MSG_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 7014b8d000bb..ec4e80e5b6eb 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -427,6 +427,32 @@ void get_hdr_visual_confirm_color( } } +/* Visual Confirm color definition for Smart Mux */ +void get_smartmux_visual_confirm_color( + struct dc *dc, + struct tg_color *color) +{ + uint32_t color_value = MAX_TG_COLOR_VALUE; + + const struct tg_color sm_ver_colors[5] = { + {0, 0, 0}, /* SMUX_MUXCONTROL_UNSUPPORTED - Black */ + {0, MAX_TG_COLOR_VALUE, 0}, /* SMUX_MUXCONTROL_v10 - Green */ + {0, MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE}, /* SMUX_MUXCONTROL_v15 - Cyan */ + {MAX_TG_COLOR_VALUE, MAX_TG_COLOR_VALUE, 0}, /* SMUX_MUXCONTROL_MDM - Yellow */ + {MAX_TG_COLOR_VALUE, 0, MAX_TG_COLOR_VALUE}, /* SMUX_MUXCONTROL_vUNKNOWN - Magenta*/ + }; + + if (dc->caps.is_apu) { + /* APU driving the eDP */ + *color = sm_ver_colors[dc->config.smart_mux_version]; + } else { + /* dGPU driving the eDP - red */ + color->color_r_cr = color_value; + color->color_g_y = 0; + color->color_b_cb = 0; + } +} + /* Visual Confirm color definition for VABC */ void get_vabc_visual_confirm_color( struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b4fe5859fa5f..59c07756130d 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -505,6 +505,7 @@ struct dc_config { bool use_spl; bool prefer_easf; bool use_pipe_ctx_sync_logic; + int smart_mux_version; bool ignore_dpref_ss; bool enable_mipi_converter_optimization; bool use_default_clock_table; @@ -541,6 +542,7 @@ enum visual_confirm { VISUAL_CONFIRM_SWAPCHAIN = 6, VISUAL_CONFIRM_FAMS = 7, VISUAL_CONFIRM_SWIZZLE = 9, + VISUAL_CONFIRM_SMARTMUX_DGPU = 10, VISUAL_CONFIRM_REPLAY = 12, VISUAL_CONFIRM_SUBVP = 14, VISUAL_CONFIRM_MCLK_SWITCH = 16, @@ -1639,6 +1641,8 @@ struct dc_scratch_space { struct gpio *hpd_gpio; enum dc_link_fec_state fec_state; + bool is_dds; + bool is_display_mux_present; bool link_powered_externally; // Used to bypass hardware sequencing delays when panel is powered down forcibly struct dc_panel_config panel_config; @@ -1693,6 +1697,10 @@ struct dc { /* Require to maintain clocks and bandwidth for UEFI enabled HW */ + /* For eDP to know the switching state of SmartMux */ + bool is_switch_in_progress_orig; + bool is_switch_in_progress_dest; + /* FBC compressor */ struct compressor *fbc_compressor; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c index 252e862449a2..4ea13d0bf815 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dce110/dce110_hwseq.c @@ -1686,6 +1686,19 @@ enum dc_status dce110_apply_single_controller_ctx_to_hw( if (dc_is_dp_signal(pipe_ctx->stream->signal)) dc->link_srv->dp_trace_source_sequence(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG); + /* Temporary workaround to perform DSC programming ahead of stream enablement + * for smartmux/SPRS + * TODO: Remove SmartMux/SPRS checks once movement of DSC programming is generalized + */ + if (pipe_ctx->stream->timing.flags.DSC) { + if ((pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + ((link->dc->config.smart_mux_version && link->dc->is_switch_in_progress_dest) + || link->is_dds || link->skip_implict_edp_power_control)) && + (dc_is_dp_signal(pipe_ctx->stream->signal) || + dc_is_virtual_signal(pipe_ctx->stream->signal))) + dc->link_srv->set_dsc_enable(pipe_ctx, true); + } + if (!stream->dpms_off) dc->link_srv->set_dpms_on(context, pipe_ctx); @@ -1927,6 +1940,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) can_apply_edp_fast_boot = dc_validate_boot_timing(dc, edp_stream->sink, &edp_stream->timing); + + // For Mux-platform, the default value is false. + // Disable fast boot during mux switching. + // The flag would be clean after switching done. + if (dc->is_switch_in_progress_dest && edp_link->is_dds) + can_apply_edp_fast_boot = false; + edp_stream->apply_edp_fast_boot_optimization = can_apply_edp_fast_boot; if (can_apply_edp_fast_boot) { DC_LOG_EVENT_LINK_TRAINING("eDP fast boot Enable\n"); @@ -1970,6 +1990,10 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) if (edp_with_sink_num) edp_link_with_sink = edp_links_with_sink[0]; + // During a mux switch, powering down the HW blocks and then enabling + // the link via a DPCD SET_POWER write causes a brief flash + keep_edp_vdd_on |= dc->is_switch_in_progress_dest; + if (!can_apply_edp_fast_boot && !can_apply_seamless_boot) { if (edp_link_with_sink && !keep_edp_vdd_on) { /*turn off backlight before DP_blank and encoder powered down*/ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c index 5ba3999991b0..8ba934b83957 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_hwseq.c @@ -562,6 +562,19 @@ static void dcn31_reset_back_end_for_pipe( else if (pipe_ctx->stream_res.audio) dc->hwss.disable_audio_stream(pipe_ctx); + /* Temporary workaround to perform DSC programming ahead of pipe reset + * for smartmux/SPRS + * TODO: Remove SmartMux/SPRS checks once movement of DSC programming is generalized + */ + if (pipe_ctx->stream->timing.flags.DSC) { + if ((pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + ((link->dc->config.smart_mux_version && link->dc->is_switch_in_progress_dest) + || link->is_dds || link->skip_implict_edp_power_control)) && + (dc_is_dp_signal(pipe_ctx->stream->signal) || + dc_is_virtual_signal(pipe_ctx->stream->signal))) + dc->link_srv->set_dsc_enable(pipe_ctx, false); + } + /* free acquired resources */ if (pipe_ctx->stream_res.audio) { /*disable az_endpoint*/ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 3a0795045bc6..9df8030e37f7 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -502,6 +502,9 @@ void get_hdr_visual_confirm_color( void get_mpctree_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color); +void get_smartmux_visual_confirm_color( + struct dc *dc, + struct tg_color *color); void get_vabc_visual_confirm_color( struct pipe_ctx *pipe_ctx, struct tg_color *color); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h index 7d66e62b6be6..2c9a4a12bd8a 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h @@ -337,6 +337,8 @@ struct clk_mgr_funcs { bool (*is_dc_mode_present)(struct clk_mgr *clk_mgr); + uint32_t (*set_smartmux_switch)(struct clk_mgr *clk_mgr, uint32_t pins_to_set); + unsigned int (*get_max_clock_khz)(struct clk_mgr *clk_mgr_base, enum clk_type clk_type); }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index bd51b279ad14..8c8682f743d6 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -140,7 +140,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init) } } - if (((!link->wa_flags.dp_keep_receiver_powered) || hw_init) && + if (((!dc->is_switch_in_progress_dest) && ((!link->wa_flags.dp_keep_receiver_powered) || hw_init)) && (link->type != dc_connection_none)) dpcd_write_rx_power_ctrl(link, false); } @@ -2537,6 +2537,14 @@ void link_set_dpms_on( !pipe_ctx->next_odm_pipe) { pipe_ctx->stream->dpms_off = false; update_psp_stream_config(pipe_ctx, false); + + if (link->is_dds) { + uint32_t post_oui_delay = 30; // 30ms + + dpcd_set_source_specific_data(link); + msleep(post_oui_delay); + } + return; } @@ -2629,6 +2637,15 @@ void link_set_dpms_on( dp_is_128b_132b_signal(pipe_ctx)) update_sst_payload(pipe_ctx, true); + /* Corruption was observed on systems with display mux when stream gets + * enabled after the mux switch. Having a small delay between link + * training and stream unblank resolves the corruption issue. + * This is workaround. + */ + if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP && + link->is_display_mux_present) + msleep(20); + dc->hwss.unblank_stream(pipe_ctx, &pipe_ctx->stream->link->cur_link_settings); diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index c5f4e803be84..de1143dbbd25 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -539,10 +539,16 @@ static bool construct_phy(struct dc_link *link, break; case CONNECTOR_ID_EDP: + // If smartmux is supported, only create the link on the primary eDP. + // Dual eDP is not supported with smartmux. + if (!(!link->dc->config.smart_mux_version || dc_ctx->dc_edp_id_count == 0)) + goto create_fail; + link->connector_signal = SIGNAL_TYPE_EDP; if (link->hpd_gpio) { - if (!link->dc->config.allow_edp_hotplug_detection) + if (!link->dc->config.allow_edp_hotplug_detection + && !is_smartmux_suported(link)) link->irq_source_hpd = DC_IRQ_SOURCE_INVALID; switch (link->dc->config.allow_edp_hotplug_detection) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 0f965380a9b4..651926e547b9 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1388,6 +1388,21 @@ void dpcd_set_source_specific_data(struct dc_link *link) struct dpcd_amd_signature amd_signature = {0}; struct dpcd_amd_device_id amd_device_id = {0}; + if (link->is_dds) { + uint8_t dpcd_dp_edp_backlight_mode = 0; + + /* + * Write 0 to bits 0:1 for dp_edp_backlight_mode_set register + * if platform is DDS + */ + core_link_read_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, + &dpcd_dp_edp_backlight_mode, sizeof(uint8_t)); + dpcd_dp_edp_backlight_mode &= ~0x3; + + core_link_write_dpcd(link, DP_EDP_BACKLIGHT_MODE_SET_REGISTER, + &dpcd_dp_edp_backlight_mode, sizeof(uint8_t)); + } + amd_device_id.device_id_byte1 = (uint8_t)(link->ctx->asic_id.chip_id); amd_device_id.device_id_byte2 = @@ -1543,6 +1558,10 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link) return false; link->dpcd_sink_ext_caps.raw = dpcd_data; + if (link->is_dds && !link->dpcd_sink_ext_caps.bits.oled) { + link->dpcd_sink_ext_caps.raw = 0; + return false; + } if (core_link_read_dpcd(link, DP_EDP_GENERAL_CAP_2, &edp_general_cap2, 1) != DC_OK) return false; diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c index a4258b5b77be..e7927b8f5ba3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.c @@ -161,6 +161,9 @@ bool edp_set_backlight_level_nits(struct dc_link *link, link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)) return false; + if (link->is_dds && !link->dpcd_caps.panel_luminance_control) + return true; + // use internal backlight control if dmub capabilities are not present if (link->backlight_control_type == BACKLIGHT_CONTROL_VESA_AUX && !link->dc->caps.dmub_caps.aux_backlight_support) { @@ -243,6 +246,8 @@ bool edp_get_backlight_level_nits(struct dc_link *link, link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)) return false; + if (link->is_dds) + return false; if (!core_link_read_dpcd(link, DP_SOURCE_BACKLIGHT_CURRENT_PEAK, dpcd_backlight_get.raw, sizeof(union dpcd_source_backlight_get))) @@ -269,6 +274,8 @@ bool edp_backlight_enable_aux(struct dc_link *link, bool enable) link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT)) return false; + if (link->is_dds) + return true; if (core_link_write_dpcd(link, DP_SOURCE_BACKLIGHT_ENABLE, &backlight_enable, 1) != DC_OK) return false; @@ -1194,6 +1201,16 @@ int edp_get_target_backlight_pwm(const struct dc_link *link) return (int) abm->funcs->get_target_backlight(abm); } +bool is_smartmux_suported(struct dc_link *link) +{ + if (link->dc->caps.is_apu) + return false; + if (!link->dc->config.smart_mux_version) + return false; + + return true; +} + static void edp_set_assr_enable(const struct dc *pDC, struct dc_link *link, struct link_resource *link_res, bool enable) { diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h index bcfa6ac5d4e7..4a475d5b9dde 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_edp_panel_control.h @@ -30,6 +30,7 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link); void dp_set_panel_mode(struct dc_link *link, enum dp_panel_mode panel_mode); bool set_default_brightness_aux(struct dc_link *link); +bool is_smartmux_suported(struct dc_link *link); void edp_panel_backlight_power_on(struct dc_link *link, bool wait_for_hpd); int edp_get_backlight_level(const struct dc_link *link); bool edp_get_backlight_level_nits(struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index 758a8aa31fbe..391209a3bf29 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -79,4 +79,6 @@ bool psr_su_set_dsc_slice_height(struct dc *dc, struct dc_link *link, bool fill_custom_backlight_caps(unsigned int config_no, struct dm_acpi_atif_backlight_caps *caps); void reset_replay_dsync_error_count(struct dc_link *link); +void change_replay_to_psr(struct dc_link *link); +void change_psr_to_replay(struct dc_link *link); #endif /* MODULES_POWER_POWER_HELPERS_H_ */ From ad19aa07445c2313c996040408a18b75db8f604c Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Wed, 2 Jul 2025 15:34:30 -0400 Subject: [PATCH 234/358] drm/amd/display: Revert "Add DPP & HUBP reset if power gate enabled on DCN314" This reverts commit 99e25e4683d7cfdf79dcc328e11bb6c924c77566. [Why & How] This commit caused a blank screen on internal display when projecting to an external display on DCN314. Reviewed-by: Aric Cyr Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../amd/display/dc/hwss/dcn314/dcn314_hwseq.c | 37 ++----------------- .../amd/display/dc/hwss/dcn314/dcn314_hwseq.h | 2 - .../amd/display/dc/hwss/dcn314/dcn314_init.c | 2 +- 3 files changed, 4 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index a40e119d8582..e68f21fd5f0f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -55,15 +55,15 @@ #include "dcn20/dcn20_optc.h" #include "dcn30/dcn30_cm_common.h" -#define DC_LOGGER_INIT(logger) \ - struct dal_logger *dc_logger = logger +#define DC_LOGGER_INIT(logger) #define CTX \ hws->ctx #define REG(reg)\ hws->regs->reg #define DC_LOGGER \ - dc_logger + stream->ctx->logger + #undef FN #define FN(reg_name, field_name) \ @@ -76,8 +76,6 @@ static void update_dsc_on_stream(struct pipe_ctx *pipe_ctx, bool enable) struct pipe_ctx *odm_pipe; int opp_cnt = 1; - DC_LOGGER_INIT(stream->ctx->logger); - ASSERT(dsc); for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) opp_cnt++; @@ -530,32 +528,3 @@ void dcn314_disable_link_output(struct dc_link *link, apply_symclk_on_tx_off_wa(link); } - - -void dcn314_plane_atomic_power_down(struct dc *dc, - struct dpp *dpp, - struct hubp *hubp) -{ - struct dce_hwseq *hws = dc->hwseq; - DC_LOGGER_INIT(dc->ctx->logger); - - if (REG(DC_IP_REQUEST_CNTL)) { - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); - - if (hws->funcs.dpp_pg_control) { - hws->funcs.dpp_pg_control(hws, dpp->inst, false); - dpp->funcs->dpp_reset(dpp); - } - - if (hws->funcs.hubp_pg_control) { - hws->funcs.hubp_pg_control(hws, hubp->inst, false); - hubp->funcs->hubp_reset(hubp); - } - - REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); - DC_LOG_DEBUG("Power gated front end %d\n", hubp->inst); - } - - if (hws->funcs.dpp_root_clock_control) - hws->funcs.dpp_root_clock_control(hws, dpp->inst, false); -} diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h index 12a57b79edfb..2305ad282f21 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.h @@ -47,6 +47,4 @@ void dcn314_dpp_root_clock_control(struct dce_hwseq *hws, unsigned int dpp_inst, void dcn314_disable_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal); -void dcn314_plane_atomic_power_down(struct dc *dc, struct dpp *dpp, struct hubp *hubp); - #endif /* __DC_HWSS_DCN314_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index 6963d25608ac..f5112742edf9 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -137,7 +137,7 @@ static const struct hwseq_private_funcs dcn314_private_funcs = { .disable_vga = dcn20_disable_vga, .bios_golden_init = dcn10_bios_golden_init, .plane_atomic_disable = dcn20_plane_atomic_disable, - .plane_atomic_power_down = dcn314_plane_atomic_power_down, + .plane_atomic_power_down = dcn10_plane_atomic_power_down, .enable_power_gating_plane = dcn314_enable_power_gating_plane, .dpp_root_clock_control = dcn314_dpp_root_clock_control, .hubp_pg_control = dcn31_hubp_pg_control, From 340231cdceec2c45995d773a358ca3c341f151aa Mon Sep 17 00:00:00 2001 From: Melissa Wen Date: Mon, 7 Jul 2025 16:52:05 -0400 Subject: [PATCH 235/358] drm/amd/display: Disable CRTC degamma LUT for DCN401 In DCN401 pre-blending degamma LUT isn't affecting cursor as in previous DCN version. As this is not the behavior close to what is expected for CRTC degamma LUT, disable CRTC degamma LUT property in this HW. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/4176 --- When enabling HDR on KDE, it takes the first CRTC 1D LUT available and apply a color transformation (Gamma 2.2 -> PQ). AMD driver usually advertises a CRTC degamma LUT as the first CRTC 1D LUT, but it's actually applied pre-blending. In previous HW version, it seems to work fine because the 1D LUT was applied to cursor too, but DCN401 presents a different behavior and the 1D LUT isn't affecting the hardware cursor. To address the wrong gamma on cursor with HDR (see the link), I came up with this patch that disables CRTC degamma LUT in this hw, since it presents a different behavior than others. With this KDE sees CRTC regamma LUT as the first post-blending 1D LUT available. This is actually more consistent with AMD color pipeline. It was tested by the reporter, since I don't have the HW available for local testing and debugging. Melissa --- Reviewed-by: Harry Wentland Signed-off-by: Melissa Wen Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 87058271b00c..2551823382f8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -728,7 +728,16 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm, * support programmable degamma anywhere. */ is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch; - drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0, + /* Dont't enable DRM CRTC degamma property for DCN401 since the + * pre-blending degamma LUT doesn't apply to cursor, and therefore + * can't work similar to a post-blending degamma LUT as in other hw + * versions. + * TODO: revisit it once KMS plane color API is merged. + */ + drm_crtc_enable_color_mgmt(&acrtc->base, + (is_dcn && + dm->adev->dm.dc->ctx->dce_version != DCN_VERSION_4_01) ? + MAX_COLOR_LUT_ENTRIES : 0, true, MAX_COLOR_LUT_ENTRIES); drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES); From 660261df61fb7fd972aae0f09b17c2b899f1e282 Mon Sep 17 00:00:00 2001 From: ganglxie Date: Wed, 2 Jul 2025 16:56:22 +0800 Subject: [PATCH 236/358] drm/amdgpu: refine eeprom data check add eeprom data checksum check before driver unload. reset eeprom and save correct data to eeprom when check failed Signed-off-by: ganglxie Reviewed-by: Tao Zhou Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 28 +++++++++++++++++++ .../gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h | 2 ++ 3 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1c54b2e5a225..648a829559e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2512,6 +2512,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) struct drm_device *dev = pci_get_drvdata(pdev); struct amdgpu_device *adev = drm_to_adev(dev); + amdgpu_ras_eeprom_check_and_recover(adev); amdgpu_xcp_dev_unplug(adev); amdgpu_gmc_prepare_nps_mode_change(adev); drm_dev_unplug(dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 54838746f97d..9bda9ad13f88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -1531,3 +1531,31 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) return res < 0 ? res : 0; } + +void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control; + int res; + + if (!__is_ras_eeprom_supported(adev) || !ras) + return; + control = &ras->eeprom_control; + if (!control->is_eeprom_valid) + return; + res = __verify_ras_table_checksum(control); + if (res) { + dev_warn(adev->dev, + "RAS table incorrect checksum or error:%d, try to recover\n", + res); + if (!amdgpu_ras_eeprom_reset_table(control)) + if (!amdgpu_ras_save_bad_pages(adev, NULL)) + if (!__verify_ras_table_checksum(control)) { + dev_info(adev->dev, "RAS table recovery succeed\n"); + return; + } + dev_err(adev->dev, "RAS table recovery failed\n"); + control->is_eeprom_valid = false; + } + return; +} \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 35c69ac3dbeb..ebfca4cb5688 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -161,6 +161,8 @@ void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control); +void amdgpu_ras_eeprom_check_and_recover(struct amdgpu_device *adev); + extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops; extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops; From 48ee3d8e5e0eff999e3031524e0b29e1e0f18d51 Mon Sep 17 00:00:00 2001 From: ganglxie Date: Fri, 11 Jul 2025 16:20:12 +0800 Subject: [PATCH 237/358] drm/amdgpu: refine bad page loading when in the same nps mode when loading bad page in the same nps mode, need to set the other fields fields in eeprom records manually besides retired_page Signed-off-by: ganglxie Reviewed-by: Tao Zhou Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c508697a3412..a0a14370745e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2857,6 +2857,13 @@ static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev, if (amdgpu_umc_pages_in_a_row(adev, err_data, bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; + for (i = 0; i < adev->umc.retire_unit; i++) { + err_data->err_addr[i].address = bps[0].address; + err_data->err_addr[i].mem_channel = bps[0].mem_channel; + err_data->err_addr[i].bank = bps[0].bank; + err_data->err_addr[i].err_type = bps[0].err_type; + err_data->err_addr[i].mcumc_id = bps[0].mcumc_id; + } } else { if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data)) return -EINVAL; @@ -2888,6 +2895,7 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, struct eeprom_table_record *bps, struct ras_err_data *err_data, enum amdgpu_memory_partition nps) { + int i = 0; enum amdgpu_memory_partition save_nps; save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; @@ -2897,6 +2905,13 @@ static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, if (amdgpu_umc_pages_in_a_row(adev, err_data, bps->retired_page << AMDGPU_GPU_PAGE_SHIFT)) return -EINVAL; + for (i = 0; i < adev->umc.retire_unit; i++) { + err_data->err_addr[i].address = bps->address; + err_data->err_addr[i].mem_channel = bps->mem_channel; + err_data->err_addr[i].bank = bps->bank; + err_data->err_addr[i].err_type = bps->err_type; + err_data->err_addr[i].mcumc_id = bps->mcumc_id; + } } else { if (bps->address) { if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data)) From 612ec7c69d04cb58beb1332c2806da9f2f47a3ae Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 15 Jul 2025 11:50:53 +0200 Subject: [PATCH 238/358] drm/radeon: Do not hold console lock while suspending clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The radeon driver holds the console lock while suspending in-kernel DRM clients. This creates a circular dependency with the client-list mutex, which is supposed to be acquired first. Reported when combining radeon with another DRM driver. Therefore, do not take the console lock in radeon, but let the fbdev DRM client acquire the lock when needed. This is what all other DRM drivers so. Signed-off-by: Thomas Zimmermann Reported-by: Jeff Johnson Closes: https://lore.kernel.org/dri-devel/0a087cfd-bd4c-48f1-aa2f-4a3b12593935@oss.qualcomm.com/ Suggested-by: Ville Syrjälä Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index bbd39348a7ab..6f50cfdfe5a2 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1635,11 +1635,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, pci_set_power_state(pdev, PCI_D3hot); } - if (notify_clients) { - console_lock(); - drm_client_dev_suspend(dev, true); - console_unlock(); - } + if (notify_clients) + drm_client_dev_suspend(dev, false); + return 0; } From fff8e0504499a929f26e2fb7cf7e2c9854e37b91 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Tue, 15 Jul 2025 11:50:54 +0200 Subject: [PATCH 239/358] drm/radeon: Do not hold console lock during resume The function radeon_resume_kms() acquires the console lock. It is inconsistent, as it depends on the notify_client argument. That lock then covers a number of suspend operations that are unrelated to the console. Remove the calls to console_lock() and console_unlock() from the radeon function. The console lock is only required by DRM's fbdev emulation, which acquires it as necessary. Also fixes a possible circular dependency between the console lock and the client-list mutex, where the mutex is supposed to be taken first. Signed-off-by: Thomas Zimmermann Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 6f50cfdfe5a2..7a3e510327b7 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -26,7 +26,6 @@ * Jerome Glisse */ -#include #include #include #include @@ -1659,17 +1658,11 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool notify_clients) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - if (notify_clients) { - console_lock(); - } if (resume) { pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - if (pci_enable_device(pdev)) { - if (notify_clients) - console_unlock(); + if (pci_enable_device(pdev)) return -1; - } } /* resume AGP if in use */ radeon_agp_resume(rdev); @@ -1745,10 +1738,8 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool notify_clients) if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) radeon_pm_compute_clocks(rdev); - if (notify_clients) { - drm_client_dev_resume(dev, true); - console_unlock(); - } + if (notify_clients) + drm_client_dev_resume(dev, false); return 0; } From 8b824e9d2d0acf9f8c7f33fa8afd6016e8bb9ab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 10 Jul 2025 15:45:20 +0200 Subject: [PATCH 240/358] drm/ttm: fix locking in test ttm_bo_validate_no_placement_signaled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test works even without it, but lockdep starts screaming when it is activated. Trivially fix it by acquiring the lock before we try to allocate something. Signed-off-by: Christian König Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250710144129.1803-1-christian.koenig@amd.com --- drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c index 3148f5d3dbd6..38f476787302 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c @@ -542,14 +542,15 @@ static void ttm_bo_validate_no_placement_signaled(struct kunit *test) bo->ttm = old_tt; } - err = ttm_resource_alloc(bo, place, &bo->resource, NULL); - KUNIT_EXPECT_EQ(test, err, 0); - KUNIT_ASSERT_EQ(test, man->usage, size); - placement = kunit_kzalloc(test, sizeof(*placement), GFP_KERNEL); KUNIT_ASSERT_NOT_NULL(test, placement); ttm_bo_reserve(bo, false, false, NULL); + + err = ttm_resource_alloc(bo, place, &bo->resource, NULL); + KUNIT_EXPECT_EQ(test, err, 0); + KUNIT_ASSERT_EQ(test, man->usage, size); + err = ttm_bo_validate(bo, placement, &ctx); ttm_bo_unreserve(bo); From 76689eb526673d2dfab82d49c41e03caaff838fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 10 Jul 2025 16:25:21 +0200 Subject: [PATCH 241/358] drm/ttm: remove ttm_bo_validate_swapout test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test is quite fragile since it tries to allocate halve available system memory + 1 page. If the system has either not enough memory to make the allocation work with other things running in parallel or to much memory so the allocation fails as to large/invalid the test will fail. Completely remove the test. We already validate swapout on the device level and that test seems to be stable. Signed-off-by: Christian König Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250710144129.1803-2-christian.koenig@amd.com --- .../gpu/drm/ttm/tests/ttm_bo_validate_test.c | 51 ------------------- 1 file changed, 51 deletions(-) diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c index 38f476787302..1bcc67977f48 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_validate_test.c @@ -758,56 +758,6 @@ static void ttm_bo_validate_move_fence_not_signaled(struct kunit *test) ttm_mock_manager_fini(priv->ttm_dev, snd_mem); } -static void ttm_bo_validate_swapout(struct kunit *test) -{ - unsigned long size_big, size = ALIGN(BO_SIZE, PAGE_SIZE); - enum ttm_bo_type bo_type = ttm_bo_type_device; - struct ttm_buffer_object *bo_small, *bo_big; - struct ttm_test_devices *priv = test->priv; - struct ttm_operation_ctx ctx = { }; - struct ttm_placement *placement; - u32 mem_type = TTM_PL_TT; - struct ttm_place *place; - struct sysinfo si; - int err; - - si_meminfo(&si); - size_big = ALIGN(((u64)si.totalram * si.mem_unit / 2), PAGE_SIZE); - - ttm_mock_manager_init(priv->ttm_dev, mem_type, size_big + size); - - place = ttm_place_kunit_init(test, mem_type, 0); - placement = ttm_placement_kunit_init(test, place, 1); - - bo_small = kunit_kzalloc(test, sizeof(*bo_small), GFP_KERNEL); - KUNIT_ASSERT_NOT_NULL(test, bo_small); - - drm_gem_private_object_init(priv->drm, &bo_small->base, size); - - err = ttm_bo_init_reserved(priv->ttm_dev, bo_small, bo_type, placement, - PAGE_SIZE, &ctx, NULL, NULL, - &dummy_ttm_bo_destroy); - KUNIT_EXPECT_EQ(test, err, 0); - dma_resv_unlock(bo_small->base.resv); - - bo_big = ttm_bo_kunit_init(test, priv, size_big, NULL); - - dma_resv_lock(bo_big->base.resv, NULL); - err = ttm_bo_validate(bo_big, placement, &ctx); - dma_resv_unlock(bo_big->base.resv); - - KUNIT_EXPECT_EQ(test, err, 0); - KUNIT_EXPECT_NOT_NULL(test, bo_big->resource); - KUNIT_EXPECT_EQ(test, bo_big->resource->mem_type, mem_type); - KUNIT_EXPECT_EQ(test, bo_small->resource->mem_type, TTM_PL_SYSTEM); - KUNIT_EXPECT_TRUE(test, bo_small->ttm->page_flags & TTM_TT_FLAG_SWAPPED); - - ttm_bo_put(bo_big); - ttm_bo_put(bo_small); - - ttm_mock_manager_fini(priv->ttm_dev, mem_type); -} - static void ttm_bo_validate_happy_evict(struct kunit *test) { u32 mem_type = TTM_PL_VRAM, mem_multihop = TTM_PL_TT, @@ -1202,7 +1152,6 @@ static struct kunit_case ttm_bo_validate_test_cases[] = { KUNIT_CASE(ttm_bo_validate_move_fence_signaled), KUNIT_CASE_PARAM(ttm_bo_validate_move_fence_not_signaled, ttm_bo_validate_wait_gen_params), - KUNIT_CASE(ttm_bo_validate_swapout), KUNIT_CASE(ttm_bo_validate_happy_evict), KUNIT_CASE(ttm_bo_validate_all_pinned_evict), KUNIT_CASE(ttm_bo_validate_allowed_only_evict), From 1d043d6c00b010c186ad0ddab0a0b9bd648e9bf1 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 15 Jul 2025 13:03:50 +0200 Subject: [PATCH 242/358] drm/sitronix/st7571-i2c: Fix encoder callbacks function names It seems the driver took some inspiration from ssd130x and some of the functions (encoder callbacks) were not renamed to use a st7571_ prefix. Reviewed-by: Marcus Folkesson Link: https://lore.kernel.org/r/20250715110411.448343-2-javierm@redhat.com Signed-off-by: Javier Martinez Canillas --- drivers/gpu/drm/sitronix/st7571-i2c.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/sitronix/st7571-i2c.c b/drivers/gpu/drm/sitronix/st7571-i2c.c index eec846892962..634b426e2874 100644 --- a/drivers/gpu/drm/sitronix/st7571-i2c.c +++ b/drivers/gpu/drm/sitronix/st7571-i2c.c @@ -550,8 +550,8 @@ static const struct drm_crtc_funcs st7571_crtc_funcs = { * Encoder */ -static void ssd130x_encoder_atomic_enable(struct drm_encoder *encoder, - struct drm_atomic_state *state) +static void st7571_encoder_atomic_enable(struct drm_encoder *encoder, + struct drm_atomic_state *state) { struct drm_device *drm = encoder->dev; struct st7571_device *st7571 = drm_to_st7571(drm); @@ -565,8 +565,8 @@ static void ssd130x_encoder_atomic_enable(struct drm_encoder *encoder, st7571_send_command_list(st7571, &command, 1); } -static void ssd130x_encoder_atomic_disable(struct drm_encoder *encoder, - struct drm_atomic_state *state) +static void st7571_encoder_atomic_disable(struct drm_encoder *encoder, + struct drm_atomic_state *state) { struct drm_device *drm = encoder->dev; struct st7571_device *st7571 = drm_to_st7571(drm); @@ -581,8 +581,8 @@ static const struct drm_encoder_funcs st7571_encoder_funcs = { }; static const struct drm_encoder_helper_funcs st7571_encoder_helper_funcs = { - .atomic_enable = ssd130x_encoder_atomic_enable, - .atomic_disable = ssd130x_encoder_atomic_disable, + .atomic_enable = st7571_encoder_atomic_enable, + .atomic_disable = st7571_encoder_atomic_disable, }; /* From 720799d9462ccade1deb8d05d8b63e2cfd7f4e41 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 15 Jul 2025 13:03:51 +0200 Subject: [PATCH 243/358] drm/sitronix/st7571-i2c: Log probe deferral cause for GPIO get failure The driver already uses the dev_err_probe() helper (that only prints error messages for the -EPROBE_DEFER case) when fails to get any other resource. Also do the same when it fails to obtain the reset GPIO. Reviewed-by: Marcus Folkesson Link: https://lore.kernel.org/r/20250715110411.448343-3-javierm@redhat.com Signed-off-by: Javier Martinez Canillas --- drivers/gpu/drm/sitronix/st7571-i2c.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sitronix/st7571-i2c.c b/drivers/gpu/drm/sitronix/st7571-i2c.c index 634b426e2874..fd9d6c701cc4 100644 --- a/drivers/gpu/drm/sitronix/st7571-i2c.c +++ b/drivers/gpu/drm/sitronix/st7571-i2c.c @@ -804,7 +804,9 @@ static int st7571_parse_dt(struct st7571_device *st7571) st7571->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(st7571->reset)) - return PTR_ERR(st7571->reset); + return dev_err_probe(dev, PTR_ERR(st7571->reset), + "Failed to get reset gpio\n"); + return 0; } From d9ace6d5508020040fa39edbc72a1c544a99bbbe Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 15 Jul 2025 13:03:52 +0200 Subject: [PATCH 244/358] drm/sitronix/st7571-i2c: Add an indirection level to parse DT Other Sitronix display controllers might need a different parsing DT logic, so lets add a .parse_dt callback to struct st7571_panel_data. Suggested-by: Thomas Zimmermann Reviewed-by: Marcus Folkesson Link: https://lore.kernel.org/r/20250715110411.448343-4-javierm@redhat.com Signed-off-by: Javier Martinez Canillas --- drivers/gpu/drm/sitronix/st7571-i2c.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sitronix/st7571-i2c.c b/drivers/gpu/drm/sitronix/st7571-i2c.c index fd9d6c701cc4..f9c4fedb3cca 100644 --- a/drivers/gpu/drm/sitronix/st7571-i2c.c +++ b/drivers/gpu/drm/sitronix/st7571-i2c.c @@ -92,6 +92,7 @@ struct st7571_panel_constraints { struct st7571_panel_data { int (*init)(struct st7571_device *st7571); + int (*parse_dt)(struct st7571_device *st7571); struct st7571_panel_constraints constraints; }; @@ -881,7 +882,7 @@ static int st7571_probe(struct i2c_client *client) i2c_set_clientdata(client, st7571); st7571->pdata = device_get_match_data(&client->dev); - ret = st7571_parse_dt(st7571); + ret = st7571->pdata->parse_dt(st7571); if (ret) return ret; @@ -964,6 +965,7 @@ static void st7571_remove(struct i2c_client *client) struct st7571_panel_data st7571_config = { .init = st7571_lcd_init, + .parse_dt = st7571_parse_dt, .constraints = { .min_nlines = 1, .max_nlines = 128, From d2bfb999640fcc5759ddae5ea9a5b98a03da9fd3 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 15 Jul 2025 13:03:53 +0200 Subject: [PATCH 245/358] dt-bindings: display: Add Sitronix ST7567 LCD Controller Sitronix ST7567 is a monochrome Dot Matrix LCD Controller. Reviewed-by: Rob Herring (Arm) Reviewed-by: Marcus Folkesson Link: https://lore.kernel.org/r/20250715110411.448343-5-javierm@redhat.com Signed-off-by: Javier Martinez Canillas --- .../bindings/display/sitronix,st7567.yaml | 63 +++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 64 insertions(+) create mode 100644 Documentation/devicetree/bindings/display/sitronix,st7567.yaml diff --git a/Documentation/devicetree/bindings/display/sitronix,st7567.yaml b/Documentation/devicetree/bindings/display/sitronix,st7567.yaml new file mode 100644 index 000000000000..e8a5b8ad18fe --- /dev/null +++ b/Documentation/devicetree/bindings/display/sitronix,st7567.yaml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/sitronix,st7567.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sitronix ST7567 Display Controller + +maintainers: + - Javier Martinez Canillas + +description: + Sitronix ST7567 is a driver and controller for monochrome + dot matrix LCD panels. + +allOf: + - $ref: panel/panel-common.yaml# + +properties: + compatible: + const: sitronix,st7567 + + reg: + maxItems: 1 + + width-mm: true + height-mm: true + panel-timing: true + +required: + - compatible + - reg + - width-mm + - height-mm + - panel-timing + +additionalProperties: false + +examples: + - | + i2c { + #address-cells = <1>; + #size-cells = <0>; + + display@3f { + compatible = "sitronix,st7567"; + reg = <0x3f>; + width-mm = <37>; + height-mm = <27>; + + panel-timing { + hactive = <128>; + vactive = <64>; + hback-porch = <0>; + vback-porch = <0>; + clock-frequency = <0>; + hfront-porch = <0>; + hsync-len = <0>; + vfront-porch = <0>; + vsync-len = <0>; + }; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index ee2ef9d9db2a..d97e091b1742 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7835,6 +7835,7 @@ F: drivers/gpu/drm/sitronix/st7586.c DRM DRIVER FOR SITRONIX ST7571 PANELS M: Marcus Folkesson S: Maintained +F: Documentation/devicetree/bindings/display/sitronix,st7567.yaml F: Documentation/devicetree/bindings/display/sitronix,st7571.yaml F: drivers/gpu/drm/sitronix/st7571-i2c.c From a55863ba4c9ea9febe81ecf7dba36e7989a37b7e Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 15 Jul 2025 13:03:54 +0200 Subject: [PATCH 246/358] drm/sitronix/st7571-i2c: Add support for the ST7567 Controller The Sitronix ST7567 is a monochrome Dot Matrix LCD Controller that has SPI, I2C and parallel interfaces. The st7571-i2c driver only has support for I2C so displays using other transport interfaces are currently not supported. The DRM_FORMAT_R1 pixel format and data commands are the same than what is used by the ST7571 controller, so only is needed a different callback that implements the expected initialization sequence for the ST7567 chip. Reviewed-by: Marcus Folkesson Reviewed-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250715110411.448343-6-javierm@redhat.com Signed-off-by: Javier Martinez Canillas --- drivers/gpu/drm/sitronix/st7571-i2c.c | 75 +++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/drivers/gpu/drm/sitronix/st7571-i2c.c b/drivers/gpu/drm/sitronix/st7571-i2c.c index f9c4fedb3cca..453eb7e045e5 100644 --- a/drivers/gpu/drm/sitronix/st7571-i2c.c +++ b/drivers/gpu/drm/sitronix/st7571-i2c.c @@ -68,6 +68,9 @@ #define ST7571_SET_COLOR_MODE(c) (0x10 | FIELD_PREP(GENMASK(0, 0), (c))) #define ST7571_COMMAND_SET_NORMAL (0x00) +/* ST7567 commands */ +#define ST7567_SET_LCD_BIAS(m) (0xa2 | FIELD_PREP(GENMASK(0, 0), (m))) + #define ST7571_PAGE_HEIGHT 8 #define DRIVER_NAME "st7571" @@ -774,6 +777,32 @@ static int st7571_validate_parameters(struct st7571_device *st7571) return 0; } +static int st7567_parse_dt(struct st7571_device *st7567) +{ + struct device *dev = &st7567->client->dev; + struct device_node *np = dev->of_node; + struct display_timing dt; + int ret; + + ret = of_get_display_timing(np, "panel-timing", &dt); + if (ret) { + dev_err(dev, "Failed to get display timing from DT\n"); + return ret; + } + + of_property_read_u32(np, "width-mm", &st7567->width_mm); + of_property_read_u32(np, "height-mm", &st7567->height_mm); + + st7567->pformat = &st7571_monochrome; + st7567->bpp = 1; + + st7567->startline = dt.vfront_porch.typ; + st7567->nlines = dt.vactive.typ; + st7567->ncols = dt.hactive.typ; + + return 0; +} + static int st7571_parse_dt(struct st7571_device *st7571) { struct device *dev = &st7571->client->dev; @@ -819,6 +848,38 @@ static void st7571_reset(struct st7571_device *st7571) gpiod_set_value_cansleep(st7571->reset, 0); } +static int st7567_lcd_init(struct st7571_device *st7567) +{ + /* + * Most of the initialization sequence is taken directly from the + * referential initial code in the ST7567 datasheet. + */ + u8 commands[] = { + ST7571_DISPLAY_OFF, + + ST7567_SET_LCD_BIAS(1), + + ST7571_SET_SEG_SCAN_DIR(0), + ST7571_SET_COM_SCAN_DIR(1), + + ST7571_SET_REGULATOR_REG(4), + ST7571_SET_CONTRAST_MSB, + ST7571_SET_CONTRAST_LSB(0x20), + + ST7571_SET_START_LINE_MSB, + ST7571_SET_START_LINE_LSB(st7567->startline), + + ST7571_SET_POWER(0x4), /* Power Control, VC: ON, VR: OFF, VF: OFF */ + ST7571_SET_POWER(0x6), /* Power Control, VC: ON, VR: ON, VF: OFF */ + ST7571_SET_POWER(0x7), /* Power Control, VC: ON, VR: ON, VF: ON */ + + ST7571_SET_REVERSE(0), + ST7571_SET_ENTIRE_DISPLAY_ON(0), + }; + + return st7571_send_command_list(st7567, commands, ARRAY_SIZE(commands)); +} + static int st7571_lcd_init(struct st7571_device *st7571) { /* @@ -963,6 +1024,18 @@ static void st7571_remove(struct i2c_client *client) drm_dev_unplug(&st7571->dev); } +struct st7571_panel_data st7567_config = { + .init = st7567_lcd_init, + .parse_dt = st7567_parse_dt, + .constraints = { + .min_nlines = 1, + .max_nlines = 64, + .min_ncols = 128, + .max_ncols = 128, + .support_grayscale = false, + }, +}; + struct st7571_panel_data st7571_config = { .init = st7571_lcd_init, .parse_dt = st7571_parse_dt, @@ -976,12 +1049,14 @@ struct st7571_panel_data st7571_config = { }; static const struct of_device_id st7571_of_match[] = { + { .compatible = "sitronix,st7567", .data = &st7567_config }, { .compatible = "sitronix,st7571", .data = &st7571_config }, {}, }; MODULE_DEVICE_TABLE(of, st7571_of_match); static const struct i2c_device_id st7571_id[] = { + { "st7567", 0 }, { "st7571", 0 }, { } }; From 1e57377dad3d194f1001b9ddd9f91582057e6030 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:12 -0500 Subject: [PATCH 247/358] drm/panel/lq101r1sx01: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-1-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c b/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c index a0d76d588da1..d159b0e4fdb6 100644 --- a/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c +++ b/drivers/gpu/drm/panel/panel-sharp-lq101r1sx01.c @@ -279,9 +279,6 @@ static int sharp_panel_add(struct sharp_panel *sharp) if (IS_ERR(sharp->supply)) return PTR_ERR(sharp->supply); - drm_panel_init(&sharp->base, &sharp->link1->dev, &sharp_panel_funcs, - DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&sharp->base); if (ret) return ret; @@ -323,10 +320,12 @@ static int sharp_panel_probe(struct mipi_dsi_device *dsi) /* register a panel for only the DSI-LINK1 interface */ if (secondary) { - sharp = devm_kzalloc(&dsi->dev, sizeof(*sharp), GFP_KERNEL); - if (!sharp) { + sharp = devm_drm_panel_alloc(&dsi->dev, __typeof(*sharp), base, + &sharp_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(sharp)) { put_device(&secondary->dev); - return -ENOMEM; + return PTR_ERR(sharp); } mipi_dsi_set_drvdata(dsi, sharp); From 6299cb4a211caeb7d2d8cec091e256e5ad428730 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:13 -0500 Subject: [PATCH 248/358] drm/panel/raspberrypi: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-2-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- .../gpu/drm/panel/panel-raspberrypi-touchscreen.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index e10e469aa7a6..dc4bb8ad9131 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -373,9 +373,12 @@ static int rpi_touchscreen_probe(struct i2c_client *i2c) .node = NULL, }; - ts = devm_kzalloc(dev, sizeof(*ts), GFP_KERNEL); - if (!ts) - return -ENOMEM; + ts = devm_drm_panel_alloc(dev, __typeof(*ts), base, + &rpi_touchscreen_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(ts)) + return PTR_ERR(ts); i2c_set_clientdata(i2c, ts); @@ -428,9 +431,6 @@ static int rpi_touchscreen_probe(struct i2c_client *i2c) return PTR_ERR(ts->dsi); } - drm_panel_init(&ts->base, dev, &rpi_touchscreen_funcs, - DRM_MODE_CONNECTOR_DSI); - /* This appears last, as it's what will unblock the DSI host * driver's component bind function. */ From 8e4e733d9543061b4a988077d119d298c56e3bef Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:14 -0500 Subject: [PATCH 249/358] drm/panel/vvx10f034n00: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-3-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c index d1c5c9bc3c56..3c3308fc55df 100644 --- a/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c +++ b/drivers/gpu/drm/panel/panel-panasonic-vvx10f034n00.c @@ -166,9 +166,6 @@ static int wuxga_nt_panel_add(struct wuxga_nt_panel *wuxga_nt) if (IS_ERR(wuxga_nt->supply)) return PTR_ERR(wuxga_nt->supply); - drm_panel_init(&wuxga_nt->base, &wuxga_nt->dsi->dev, - &wuxga_nt_panel_funcs, DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&wuxga_nt->base); if (ret) return ret; @@ -196,9 +193,12 @@ static int wuxga_nt_panel_probe(struct mipi_dsi_device *dsi) MIPI_DSI_CLOCK_NON_CONTINUOUS | MIPI_DSI_MODE_LPM; - wuxga_nt = devm_kzalloc(&dsi->dev, sizeof(*wuxga_nt), GFP_KERNEL); - if (!wuxga_nt) - return -ENOMEM; + wuxga_nt = devm_drm_panel_alloc(&dsi->dev, __typeof(*wuxga_nt), base, + &wuxga_nt_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(wuxga_nt)) + return PTR_ERR(wuxga_nt); mipi_dsi_set_drvdata(dsi, wuxga_nt); From 6afbf43edfae07cb74ea3e66984e2b6ac1c2083a Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:15 -0500 Subject: [PATCH 250/358] drm/panel/osd101t2587-53ts: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-4-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c index dbea84f51514..2334b77f348c 100644 --- a/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c +++ b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c @@ -132,9 +132,6 @@ static int osd101t2587_panel_add(struct osd101t2587_panel *osd101t2587) if (IS_ERR(osd101t2587->supply)) return PTR_ERR(osd101t2587->supply); - drm_panel_init(&osd101t2587->base, &osd101t2587->dsi->dev, - &osd101t2587_panel_funcs, DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&osd101t2587->base); if (ret) return ret; @@ -161,9 +158,12 @@ static int osd101t2587_panel_probe(struct mipi_dsi_device *dsi) MIPI_DSI_MODE_VIDEO_SYNC_PULSE | MIPI_DSI_MODE_NO_EOT_PACKET; - osd101t2587 = devm_kzalloc(&dsi->dev, sizeof(*osd101t2587), GFP_KERNEL); - if (!osd101t2587) - return -ENOMEM; + osd101t2587 = devm_drm_panel_alloc(&dsi->dev, __typeof(*osd101t2587), base, + &osd101t2587_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(osd101t2587)) + return PTR_ERR(osd101t2587); mipi_dsi_set_drvdata(dsi, osd101t2587); From a8f268ac9d46355e48ff24d0c0a1dbaccdb800ca Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:16 -0500 Subject: [PATCH 251/358] drm/panel/novatek-nt36672a: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-5-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-novatek-nt36672a.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-novatek-nt36672a.c b/drivers/gpu/drm/panel/panel-novatek-nt36672a.c index c2abd20e0734..29e1f6aea480 100644 --- a/drivers/gpu/drm/panel/panel-novatek-nt36672a.c +++ b/drivers/gpu/drm/panel/panel-novatek-nt36672a.c @@ -608,8 +608,6 @@ static int nt36672a_panel_add(struct nt36672a_panel *pinfo) return dev_err_probe(dev, PTR_ERR(pinfo->reset_gpio), "failed to get reset gpio from DT\n"); - drm_panel_init(&pinfo->base, dev, &panel_funcs, DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&pinfo->base); if (ret) return dev_err_probe(dev, ret, "Failed to get backlight\n"); @@ -625,9 +623,11 @@ static int nt36672a_panel_probe(struct mipi_dsi_device *dsi) const struct nt36672a_panel_desc *desc; int err; - pinfo = devm_kzalloc(&dsi->dev, sizeof(*pinfo), GFP_KERNEL); - if (!pinfo) - return -ENOMEM; + pinfo = devm_drm_panel_alloc(&dsi->dev, __typeof(*pinfo), base, + &panel_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(pinfo)) + return PTR_ERR(pinfo); desc = of_device_get_match_data(&dsi->dev); dsi->mode_flags = desc->mode_flags; From daeca2b7c7321bae49ceb7520004f2059e66d35c Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:17 -0500 Subject: [PATCH 252/358] drm/panel/lg-sw43408: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-6-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-lg-sw43408.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-lg-sw43408.c b/drivers/gpu/drm/panel/panel-lg-sw43408.c index f3dcc39670ea..46a56ea92ad9 100644 --- a/drivers/gpu/drm/panel/panel-lg-sw43408.c +++ b/drivers/gpu/drm/panel/panel-lg-sw43408.c @@ -246,8 +246,6 @@ static int sw43408_add(struct sw43408_panel *ctx) ctx->base.prepare_prev_first = true; - drm_panel_init(&ctx->base, dev, &sw43408_funcs, DRM_MODE_CONNECTOR_DSI); - drm_panel_add(&ctx->base); return ret; } @@ -257,9 +255,11 @@ static int sw43408_probe(struct mipi_dsi_device *dsi) struct sw43408_panel *ctx; int ret; - ctx = devm_kzalloc(&dsi->dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + ctx = devm_drm_panel_alloc(&dsi->dev, __typeof(*ctx), base, + &sw43408_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); dsi->mode_flags = MIPI_DSI_MODE_LPM; dsi->format = MIPI_DSI_FMT_RGB888; From 6a855c7f5685fa06d33727d62484e116478994d3 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:18 -0500 Subject: [PATCH 253/358] drm/panel/kd097d04: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-7-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c index d6b912277196..2fc7b0779b37 100644 --- a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c +++ b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c @@ -337,9 +337,6 @@ static int kingdisplay_panel_add(struct kingdisplay_panel *kingdisplay) kingdisplay->enable_gpio = NULL; } - drm_panel_init(&kingdisplay->base, &kingdisplay->link->dev, - &kingdisplay_panel_funcs, DRM_MODE_CONNECTOR_DSI); - err = drm_panel_of_backlight(&kingdisplay->base); if (err) return err; @@ -364,9 +361,12 @@ static int kingdisplay_panel_probe(struct mipi_dsi_device *dsi) dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_LPM; - kingdisplay = devm_kzalloc(&dsi->dev, sizeof(*kingdisplay), GFP_KERNEL); - if (!kingdisplay) - return -ENOMEM; + kingdisplay = devm_drm_panel_alloc(&dsi->dev, __typeof(*kingdisplay), base, + &kingdisplay_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(kingdisplay)) + return PTR_ERR(kingdisplay); mipi_dsi_set_drvdata(dsi, kingdisplay); kingdisplay->link = dsi; From 95ec5c606dfdd73641e11ac481128f48e7cb6cfc Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:19 -0500 Subject: [PATCH 254/358] drm/panel/khadas-ts050: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-8-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-khadas-ts050.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-khadas-ts050.c b/drivers/gpu/drm/panel/panel-khadas-ts050.c index 0e5e8e57bd1e..67ca055f06f3 100644 --- a/drivers/gpu/drm/panel/panel-khadas-ts050.c +++ b/drivers/gpu/drm/panel/panel-khadas-ts050.c @@ -821,9 +821,6 @@ static int khadas_ts050_panel_add(struct khadas_ts050_panel *khadas_ts050) return dev_err_probe(dev, PTR_ERR(khadas_ts050->enable_gpio), "failed to get enable gpio"); - drm_panel_init(&khadas_ts050->base, &khadas_ts050->link->dev, - &khadas_ts050_panel_funcs, DRM_MODE_CONNECTOR_DSI); - err = drm_panel_of_backlight(&khadas_ts050->base); if (err) return err; @@ -850,10 +847,12 @@ static int khadas_ts050_panel_probe(struct mipi_dsi_device *dsi) dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; - khadas_ts050 = devm_kzalloc(&dsi->dev, sizeof(*khadas_ts050), - GFP_KERNEL); - if (!khadas_ts050) - return -ENOMEM; + khadas_ts050 = devm_drm_panel_alloc(&dsi->dev, __typeof(*khadas_ts050), + base, &khadas_ts050_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(khadas_ts050)) + return PTR_ERR(khadas_ts050); khadas_ts050->panel_data = (struct khadas_ts050_panel_data *)data; mipi_dsi_set_drvdata(dsi, khadas_ts050); From b669ce70f459dc40ed7c501940a99c7046736155 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:20 -0500 Subject: [PATCH 255/358] drm/panel/jdi-lt070me05000: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-9-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-jdi-lt070me05000.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c index b1ce186de261..3513e5c4dd8c 100644 --- a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c +++ b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c @@ -402,9 +402,6 @@ static int jdi_panel_add(struct jdi_panel *jdi) return dev_err_probe(dev, PTR_ERR(jdi->backlight), "failed to register backlight %d\n", ret); - drm_panel_init(&jdi->base, &jdi->dsi->dev, &jdi_panel_funcs, - DRM_MODE_CONNECTOR_DSI); - drm_panel_add(&jdi->base); return 0; @@ -426,9 +423,11 @@ static int jdi_panel_probe(struct mipi_dsi_device *dsi) dsi->mode_flags = MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO | MIPI_DSI_CLOCK_NON_CONTINUOUS; - jdi = devm_kzalloc(&dsi->dev, sizeof(*jdi), GFP_KERNEL); - if (!jdi) - return -ENOMEM; + jdi = devm_drm_panel_alloc(&dsi->dev, __typeof(*jdi), base, + &jdi_panel_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(jdi)) + return PTR_ERR(jdi); mipi_dsi_set_drvdata(dsi, jdi); From d29ab79c6f59a626028dac3c5177648e4417bd3f Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:21 -0500 Subject: [PATCH 256/358] drm/panel/lpm102a188a: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-10-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c b/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c index 5b5082efb282..5f897e143758 100644 --- a/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c +++ b/drivers/gpu/drm/panel/panel-jdi-lpm102a188a.c @@ -435,9 +435,6 @@ static int jdi_panel_add(struct jdi_panel *jdi) return dev_err_probe(dev, PTR_ERR(jdi->backlight), "failed to create backlight\n"); - drm_panel_init(&jdi->base, &jdi->link1->dev, &jdi_panel_funcs, - DRM_MODE_CONNECTOR_DSI); - drm_panel_add(&jdi->base); return 0; @@ -475,10 +472,13 @@ static int jdi_panel_dsi_probe(struct mipi_dsi_device *dsi) /* register a panel for only the DSI-LINK1 interface */ if (secondary) { - jdi = devm_kzalloc(&dsi->dev, sizeof(*jdi), GFP_KERNEL); - if (!jdi) { + jdi = devm_drm_panel_alloc(&dsi->dev, __typeof(*jdi), + base, &jdi_panel_funcs, + DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(jdi)) { put_device(&secondary->dev); - return -ENOMEM; + return PTR_ERR(jdi); } mipi_dsi_set_drvdata(dsi, jdi); From ea8642fe79662546aa3bbd2d3315df4cc3367d89 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:22 -0500 Subject: [PATCH 257/358] drm/panel/ilitek-ili9882t: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-11-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-ilitek-ili9882t.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c index 3c24a63b6be8..85c7059be214 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9882t.c @@ -614,8 +614,6 @@ static int ili9882t_add(struct ili9882t *ili) gpiod_set_value(ili->enable_gpio, 0); - drm_panel_init(&ili->base, dev, &ili9882t_funcs, - DRM_MODE_CONNECTOR_DSI); err = of_drm_get_panel_orientation(dev->of_node, &ili->orientation); if (err < 0) { dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err); @@ -640,9 +638,11 @@ static int ili9882t_probe(struct mipi_dsi_device *dsi) int ret; const struct panel_desc *desc; - ili = devm_kzalloc(&dsi->dev, sizeof(*ili), GFP_KERNEL); - if (!ili) - return -ENOMEM; + ili = devm_drm_panel_alloc(&dsi->dev, __typeof(*ili), base, + &ili9882t_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(ili)) + return PTR_ERR(ili); desc = of_device_get_match_data(&dsi->dev); dsi->lanes = desc->lanes; From 46c8779be619159a9774846b022286fbf861fe8e Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:23 -0500 Subject: [PATCH 258/358] drm/panel/himax-hx83102: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-12-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-himax-hx83102.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-himax-hx83102.c b/drivers/gpu/drm/panel/panel-himax-hx83102.c index 66abfc44e424..4c432d207634 100644 --- a/drivers/gpu/drm/panel/panel-himax-hx83102.c +++ b/drivers/gpu/drm/panel/panel-himax-hx83102.c @@ -989,8 +989,6 @@ static int hx83102_panel_add(struct hx83102 *ctx) ctx->base.prepare_prev_first = true; - drm_panel_init(&ctx->base, dev, &hx83102_drm_funcs, - DRM_MODE_CONNECTOR_DSI); err = of_drm_get_panel_orientation(dev->of_node, &ctx->orientation); if (err < 0) return dev_err_probe(dev, err, "failed to get orientation\n"); @@ -1013,9 +1011,11 @@ static int hx83102_probe(struct mipi_dsi_device *dsi) int ret; const struct hx83102_panel_desc *desc; - ctx = devm_kzalloc(&dsi->dev, sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + ctx = devm_drm_panel_alloc(&dsi->dev, __typeof(*ctx), base, + &hx83102_drm_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(ctx)) + return PTR_ERR(ctx); desc = of_device_get_match_data(&dsi->dev); dsi->lanes = 4; From 51929b6850a92a40db1270ea7234ac5d325aafac Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:24 -0500 Subject: [PATCH 259/358] drm/panel/boe-tv101wum-nl6: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-13-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c index 3e5b0d8636d0..d5fe105bdbdd 100644 --- a/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c +++ b/drivers/gpu/drm/panel/panel-boe-tv101wum-nl6.c @@ -1720,8 +1720,6 @@ static int boe_panel_add(struct boe_panel *boe) boe->base.prepare_prev_first = true; - drm_panel_init(&boe->base, dev, &boe_panel_funcs, - DRM_MODE_CONNECTOR_DSI); err = of_drm_get_panel_orientation(dev->of_node, &boe->orientation); if (err < 0) { dev_err(dev, "%pOF: failed to get orientation %d\n", dev->of_node, err); @@ -1746,9 +1744,11 @@ static int boe_panel_probe(struct mipi_dsi_device *dsi) int ret; const struct panel_desc *desc; - boe = devm_kzalloc(&dsi->dev, sizeof(*boe), GFP_KERNEL); - if (!boe) - return -ENOMEM; + boe = devm_drm_panel_alloc(&dsi->dev, __typeof(*boe), base, + &boe_panel_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(boe)) + return PTR_ERR(boe); desc = of_device_get_match_data(&dsi->dev); dsi->lanes = desc->lanes; From d27da6792c805f5b4d88f21e8ba2069b1f2d41ea Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 10 Jul 2025 23:31:25 -0500 Subject: [PATCH 260/358] drm/panel/boe-himax8279d: Use refcounted allocation in place of devm_kzalloc() Move to using the new API devm_drm_panel_alloc() to allocate the panel. In the call to the new API, avoid using explicit type and use __typeof() for more type safety. Signed-off-by: Anusha Srivatsa Link: https://lore.kernel.org/r/20250710-b4-driver-convert-last-part-july-v1-14-de73ba81b2f5@redhat.com Signed-off-by: Maxime Ripard --- drivers/gpu/drm/panel/panel-boe-himax8279d.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-boe-himax8279d.c b/drivers/gpu/drm/panel/panel-boe-himax8279d.c index df746baae301..4a8560b4b899 100644 --- a/drivers/gpu/drm/panel/panel-boe-himax8279d.c +++ b/drivers/gpu/drm/panel/panel-boe-himax8279d.c @@ -847,9 +847,6 @@ static int panel_add(struct panel_info *pinfo) "failed to get enable gpio\n"); } - drm_panel_init(&pinfo->base, dev, &panel_funcs, - DRM_MODE_CONNECTOR_DSI); - ret = drm_panel_of_backlight(&pinfo->base); if (ret) return ret; @@ -865,9 +862,11 @@ static int panel_probe(struct mipi_dsi_device *dsi) const struct panel_desc *desc; int err; - pinfo = devm_kzalloc(&dsi->dev, sizeof(*pinfo), GFP_KERNEL); - if (!pinfo) - return -ENOMEM; + pinfo = devm_drm_panel_alloc(&dsi->dev, __typeof(*pinfo), base, + &panel_funcs, DRM_MODE_CONNECTOR_DSI); + + if (IS_ERR(pinfo)) + return PTR_ERR(pinfo); desc = of_device_get_match_data(&dsi->dev); dsi->mode_flags = desc->mode_flags; From 2bf85c45db96d83e082daa2903fd2a3019b7ad18 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 25 Jun 2025 17:14:35 +0200 Subject: [PATCH 261/358] drm/tests: edid: Fix monitor range limits For some reason, some EDIDs used by kunit had Monitor Range Limits making no sense, and not matching the edid-decode output in the comment. While they were in the comments as: Display Range Limits: Monitor ranges (GTF): 50-70 Hz V, 30-70 kHz H, max dotclock 150 MHz They were actually: Display Range Limits: Monitor ranges (GTF): 50-70 Hz V, 0-0 kHz H, max dotclock 1960 MHz Fix that section of the EDIDs to match the expected edid-decode output. Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250625-drm-update-edid-v1-1-2d963743ab9e@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_kunit_edid.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/tests/drm_kunit_edid.h b/drivers/gpu/drm/tests/drm_kunit_edid.h index 02e2761b3b1f..f65c46cc1684 100644 --- a/drivers/gpu/drm/tests/drm_kunit_edid.h +++ b/drivers/gpu/drm/tests/drm_kunit_edid.h @@ -147,9 +147,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { 0x2d, 0x40, 0x58, 0x2c, 0x45, 0x00, 0x40, 0x84, 0x63, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x45, 0x44, 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, - 0x46, 0x00, 0x00, 0xc4, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x41, 0x02, 0x03, 0x1b, 0x81, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x1b, 0x81, 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, 0x00, 0x12, 0x34, 0x00, 0x14, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -249,9 +249,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { 0x2d, 0x40, 0x58, 0x2c, 0x45, 0x00, 0x40, 0x84, 0x63, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x45, 0x44, 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, - 0x46, 0x00, 0x00, 0xc4, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x41, 0x02, 0x03, 0x1b, 0x81, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x1b, 0x81, 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, 0x00, 0x12, 0x34, 0x00, 0x28, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -351,9 +351,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { 0x2d, 0x40, 0x58, 0x2c, 0x45, 0x00, 0x40, 0x84, 0x63, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xfc, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x45, 0x44, 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, - 0x46, 0x00, 0x00, 0xc4, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x41, 0x02, 0x03, 0x1b, 0x81, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x1b, 0x81, 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, 0x00, 0x12, 0x34, 0x00, 0x44, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, From b17ade59aac4b23abff7c58bc4e19398b05c7b43 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 25 Jun 2025 17:14:36 +0200 Subject: [PATCH 262/358] drm/tests: edid: Update CTA-861 HDMI Vendor Specific Data Block For some reason, the HDMI VSDBs in our kunit EDIDs had a length longer than expected. While this was harmless, we should get rid of it to make it somewhat predictable. Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250625-drm-update-edid-v1-2-2d963743ab9e@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_kunit_edid.h | 85 ++++++++++++-------------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/tests/drm_kunit_edid.h b/drivers/gpu/drm/tests/drm_kunit_edid.h index f65c46cc1684..7d9f0d198e21 100644 --- a/drivers/gpu/drm/tests/drm_kunit_edid.h +++ b/drivers/gpu/drm/tests/drm_kunit_edid.h @@ -73,14 +73,14 @@ static const unsigned char test_edid_dvi_1080p[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 92 * - * 02 03 1b 81 e3 05 00 20 41 10 e2 00 4a 6d 03 0c - * 00 12 34 00 14 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 81 e3 05 00 20 41 10 e2 00 4a 67 03 0c + * 00 12 34 00 14 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 e4 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 10 * * ---------------- * @@ -135,8 +135,7 @@ static const unsigned char test_edid_dvi_1080p[] = { * Vendor-Specific Data Block (HDMI), OUI 00-0C-03: * Source physical address: 1.2.3.4 * Maximum TMDS clock: 100 MHz - * Extended HDMI video details: - * Checksum: 0xe4 Unused space in Extension Block: 100 bytes + * Checksum: 0x10 Unused space in Extension Block: 106 bytes */ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -149,9 +148,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x1b, 0x81, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x00, 0x14, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x15, 0x81, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -160,7 +159,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xe4 + 0x00, 0x00, 0x00, 0x10 }; /* @@ -175,14 +174,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 92 * - * 02 03 1b 81 e3 05 00 20 41 10 e2 00 4a 6d 03 0c - * 00 12 34 00 28 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 81 e3 05 00 20 41 10 e2 00 4a 67 03 0c + * 00 12 34 00 28 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 d0 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc * * ---------------- * @@ -237,8 +236,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { * Vendor-Specific Data Block (HDMI), OUI 00-0C-03: * Source physical address: 1.2.3.4 * Maximum TMDS clock: 200 MHz - * Extended HDMI video details: - * Checksum: 0xd0 Unused space in Extension Block: 100 bytes + * Checksum: 0xfc Unused space in Extension Block: 106 bytes */ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -251,9 +249,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x1b, 0x81, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x00, 0x28, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x15, 0x81, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -262,7 +260,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xd0 + 0x00, 0x00, 0x00, 0xfc }; /* @@ -277,14 +275,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 92 * - * 02 03 1b 81 e3 05 00 20 41 10 e2 00 4a 6d 03 0c - * 00 12 34 00 28 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 81 e3 05 00 20 41 10 e2 00 4a 67 03 0c + * 00 12 34 00 44 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 d0 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 e0 * * ---------------- * @@ -339,8 +337,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { * Vendor-Specific Data Block (HDMI), OUI 00-0C-03: * Source physical address: 1.2.3.4 * Maximum TMDS clock: 340 MHz - * Extended HDMI video details: - * Checksum: 0xd0 Unused space in Extension Block: 100 bytes + * Checksum: 0xe0 Unused space in Extension Block: 106 bytes */ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -353,9 +350,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x1b, 0x81, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x00, 0x44, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x92, 0x02, 0x03, 0x15, 0x81, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x00, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -364,7 +361,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xd0 + 0x00, 0x00, 0x00, 0xe0 }; /* @@ -379,14 +376,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 7a * - * 02 03 1b b1 e3 05 00 20 41 10 e2 00 ca 6d 03 0c - * 00 12 34 78 28 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 b1 e3 05 00 20 41 10 e2 00 ca 67 03 0c + * 00 12 34 78 28 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 a8 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 d4 * * ---------------- * @@ -447,8 +444,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { * DC_30bit * DC_Y444 * Maximum TMDS clock: 200 MHz - * Extended HDMI video details: - * Checksum: 0xa8 Unused space in Extension Block: 100 bytes + * Checksum: 0xd4 Unused space in Extension Block: 106 bytes */ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -461,9 +457,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x7a, 0x02, 0x03, 0x1b, 0xb1, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0xca, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x78, 0x28, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x7a, 0x02, 0x03, 0x15, 0xb1, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0xca, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x78, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -472,7 +468,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0xa8 + 0x00, 0x00, 0x00, 0xd4 }; /* @@ -487,14 +483,14 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 8a * - * 02 03 1b b1 e3 05 00 20 41 10 e2 00 ca 6d 03 0c - * 00 12 34 78 44 20 00 00 00 00 00 00 00 00 00 00 + * 02 03 15 b1 e3 05 00 20 41 10 e2 00 ca 67 03 0c + * 00 12 34 78 44 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 - * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 8c + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 b8 * * ---------------- * @@ -555,8 +551,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { * DC_30bit * DC_Y444 * Maximum TMDS clock: 340 MHz - * Extended HDMI video details: - * Checksum: 0x8c Unused space in Extension Block: 100 bytes + * Checksum: 0xb8 Unused space in Extension Block: 106 bytes */ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -569,9 +564,9 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz[] = { 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, 0x46, 0x1e, 0x46, 0x0f, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x8a, 0x02, 0x03, 0x1b, 0xb1, - 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0xca, 0x6d, 0x03, 0x0c, - 0x00, 0x12, 0x34, 0x78, 0x44, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x8a, 0x02, 0x03, 0x15, 0xb1, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0xca, 0x67, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x78, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -580,7 +575,7 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x8c + 0x00, 0x00, 0x00, 0xb8 }; /* From d618363a53aed24c94442b58c4f59e33222eb092 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 25 Jun 2025 17:14:37 +0200 Subject: [PATCH 263/358] drm/tests: edid: Add edid-decode --check output Some of our EDIDs are (rightfully) invalid, but most of them should be valid. Let's add the edid-decode --check of these EDIDs when they were generated, so we know what to expect going forward, and a comment to explicitly mention when we expect them to be broken. Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20250625-drm-update-edid-v1-3-2d963743ab9e@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/tests/drm_kunit_edid.h | 51 ++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/gpu/drm/tests/drm_kunit_edid.h b/drivers/gpu/drm/tests/drm_kunit_edid.h index 7d9f0d198e21..c59c8528a3f7 100644 --- a/drivers/gpu/drm/tests/drm_kunit_edid.h +++ b/drivers/gpu/drm/tests/drm_kunit_edid.h @@ -46,6 +46,13 @@ * Monitor ranges (GTF): 50-70 Hz V, 30-70 kHz H, max dotclock 150 MHz * Dummy Descriptor: * Checksum: 0xab + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_dvi_1080p[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -62,6 +69,10 @@ static const unsigned char test_edid_dvi_1080p[] = { }; /* + * + * This edid is intentionally broken with the 100MHz limit. It's meant + * to be used only with tests in unusual situations. + * * edid-decode (hex): * * 00 ff ff ff ff ff ff 00 31 d8 2a 00 00 00 00 00 @@ -136,6 +147,18 @@ static const unsigned char test_edid_dvi_1080p[] = { * Source physical address: 1.2.3.4 * Maximum TMDS clock: 100 MHz * Checksum: 0x10 Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * Failures: + * + * EDID: + * CTA-861: The maximum HDMI TMDS clock is 100000 kHz, but one or more video timings go up to 148500 kHz. + * + * EDID conformity: FAIL */ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -237,6 +260,13 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { * Source physical address: 1.2.3.4 * Maximum TMDS clock: 200 MHz * Checksum: 0xfc Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -338,6 +368,13 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_200mhz[] = { * Source physical address: 1.2.3.4 * Maximum TMDS clock: 340 MHz * Checksum: 0xe0 Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -445,6 +482,13 @@ static const unsigned char test_edid_hdmi_1080p_rgb_max_340mhz[] = { * DC_Y444 * Maximum TMDS clock: 200 MHz * Checksum: 0xd4 Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, @@ -552,6 +596,13 @@ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz[] = { * DC_Y444 * Maximum TMDS clock: 340 MHz * Checksum: 0xb8 Unused space in Extension Block: 106 bytes + * + * ---------------- + * + * edid-decode 1.30.0-5367 + * edid-decode SHA: 41ebf7135691 2025-05-01 10:19:22 + * + * EDID conformity: PASS */ static const unsigned char test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz[] = { 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, From 0389e4256eb29ee80598129b8004db5bbbd6fbe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:04 +0300 Subject: [PATCH 264/358] drm: Pass pixel_format+modifier to .get_format_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decouple .get_format_info() from struct drm_mode_fb_cmd2 and just pass the pixel format+modifier combo in by hand. We may want to use .get_format_info() outside of the normal addfb paths where we won't have a struct drm_mode_fb_cmd2, and creating a temporary one just for this seems silly. v2: Fix intel_fb_get_format_info() docs (Laurent) Cc: Harry Wentland Cc: Leo Li Cc: Rodrigo Siqueira Cc: Alex Deucher Cc: amd-gfx@lists.freedesktop.org Cc: Laurent Pinchart Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Acked-by: Alex Deucher Acked-by: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-2-ville.syrjala@linux.intel.com --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c | 4 ++-- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h | 2 +- drivers/gpu/drm/drm_fourcc.c | 3 ++- drivers/gpu/drm/i915/display/intel_fb.c | 11 ++++++----- drivers/gpu/drm/i915/display/intel_fb.h | 2 +- include/drm/drm_mode_config.h | 2 +- 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index b7c6e8d13435..eef51652ca35 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -92,9 +92,9 @@ enum dm_micro_swizzle { MICRO_SWIZZLE_R = 3 }; -const struct drm_format_info *amdgpu_dm_plane_get_format_info(const struct drm_mode_fb_cmd2 *cmd) +const struct drm_format_info *amdgpu_dm_plane_get_format_info(u32 pixel_format, u64 modifier) { - return amdgpu_lookup_format_info(cmd->pixel_format, cmd->modifier[0]); + return amdgpu_lookup_format_info(pixel_format, modifier); } void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state *plane_state, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h index 615d2ab2b803..ea2619b507db 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -58,7 +58,7 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, unsigned long possible_crtcs, const struct dc_plane_cap *plane_cap); -const struct drm_format_info *amdgpu_dm_plane_get_format_info(const struct drm_mode_fb_cmd2 *cmd); +const struct drm_format_info *amdgpu_dm_plane_get_format_info(u32 pixel_format, u64 modifier); void amdgpu_dm_plane_fill_blending_from_plane_state(const struct drm_plane_state *plane_state, bool *per_pixel_alpha, bool *pre_multiplied_alpha, diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 2890e889dd15..4b4444f6d504 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -430,7 +430,8 @@ drm_get_format_info(struct drm_device *dev, const struct drm_format_info *info = NULL; if (dev->mode_config.funcs->get_format_info) - info = dev->mode_config.funcs->get_format_info(mode_cmd); + info = dev->mode_config.funcs->get_format_info(mode_cmd->pixel_format, + mode_cmd->modifier[0]); if (!info) info = drm_format_info(mode_cmd->pixel_format); diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 79811f998e38..e221db072de2 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -422,21 +422,22 @@ unsigned int intel_fb_modifier_to_tiling(u64 fb_modifier) /** * intel_fb_get_format_info: Get a modifier specific format information - * @cmd: FB add command structure + * @pixel_format: pixel format + * @modifier: modifier * * Returns: - * Returns the format information for @cmd->pixel_format specific to @cmd->modifier[0], + * Returns the format information for @pixel_format specific to @modifier, * or %NULL if the modifier doesn't override the format. */ const struct drm_format_info * -intel_fb_get_format_info(const struct drm_mode_fb_cmd2 *cmd) +intel_fb_get_format_info(u32 pixel_format, u64 modifier) { - const struct intel_modifier_desc *md = lookup_modifier_or_null(cmd->modifier[0]); + const struct intel_modifier_desc *md = lookup_modifier_or_null(modifier); if (!md || !md->formats) return NULL; - return lookup_format_info(md->formats, md->format_count, cmd->pixel_format); + return lookup_format_info(md->formats, md->format_count, pixel_format); } static bool plane_caps_contain_any(u8 caps, u8 mask) diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index bdd76b372957..7d1267fbeee2 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -47,7 +47,7 @@ u64 *intel_fb_plane_get_modifiers(struct intel_display *display, bool intel_fb_plane_supports_modifier(struct intel_plane *plane, u64 modifier); const struct drm_format_info * -intel_fb_get_format_info(const struct drm_mode_fb_cmd2 *cmd); +intel_fb_get_format_info(u32 pixel_format, u64 modifier); bool intel_format_info_is_yuv_semiplanar(const struct drm_format_info *info, diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 9e524b51a001..e971e1b8a850 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -95,7 +95,7 @@ struct drm_mode_config_funcs { * The format information specific to the given fb metadata, or * NULL if none is found. */ - const struct drm_format_info *(*get_format_info)(const struct drm_mode_fb_cmd2 *mode_cmd); + const struct drm_format_info *(*get_format_info)(u32 pixel_format, u64 modifier); /** * @mode_valid: From 0e7d5874fb6b80c44be3cfbcf1cf356e81d91232 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:05 +0300 Subject: [PATCH 265/358] drm: Pass pixel_format+modifier directly to drm_get_format_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decouple drm_get_format_info() from struct drm_mode_fb_cmd2 and just pass the pixel format+modifier combo in by hand. We may want to use drm_get_format_info() outside of the normal addfb paths where we won't have a struct drm_mode_fb_cmd2, and creating a temporary one just for this seems silly. Done with cocci: @@ identifier dev, mode_cmd; @@ struct drm_format_info * drm_get_format_info(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd + u32 pixel_format, u64 modifier ) { <... ( - mode_cmd->pixel_format + pixel_format | - mode_cmd->modifier[0] + modifier ) ...> } @@ identifier dev, mode_cmd; @@ struct drm_format_info * drm_get_format_info(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd + u32 pixel_format, u64 modifier ); @@ expression dev, mode_cmd; @@ - drm_get_format_info(dev, mode_cmd) + drm_get_format_info(dev, mode_cmd->pixel_format, mode_cmd->modifier[0]) v2: Fix kernel docs (Laurent) Drop drm_mode_fb_cmd2 forward declaration (Thomas) Cc: Liviu Dudau Cc: Russell King Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: Patrik Jakobsson Cc: Chun-Kuang Hu Cc: Philipp Zabel Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Sean Paul Cc: Marijn Suijten Cc: Marek Vasut Cc: Stefan Agner Cc: Lyude Paul Cc: Danilo Krummrich Cc: Tomi Valkeinen Cc: Alex Deucher Cc: Sandy Huang Cc: "Heiko Stübner" Cc: Andy Yan Cc: Thierry Reding Cc: Mikko Perttunen Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: amd-gfx@lists.freedesktop.org Cc: linux-tegra@vger.kernel.org Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Reviewed-by: Liviu Dudau Acked-by: Alex Deucher Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/arm/malidp_drv.c | 3 ++- drivers/gpu/drm/armada/armada_fb.c | 4 +++- drivers/gpu/drm/drm_fourcc.c | 11 ++++++----- drivers/gpu/drm/drm_framebuffer.c | 2 +- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 9 ++++++--- drivers/gpu/drm/drm_modeset_helper.c | 3 ++- drivers/gpu/drm/exynos/exynos_drm_fb.c | 4 +++- drivers/gpu/drm/gma500/framebuffer.c | 3 ++- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 4 +++- drivers/gpu/drm/msm/msm_fb.c | 6 ++++-- drivers/gpu/drm/mxsfb/mxsfb_drv.c | 3 ++- drivers/gpu/drm/nouveau/nouveau_display.c | 3 ++- drivers/gpu/drm/omapdrm/omap_fb.c | 6 ++++-- drivers/gpu/drm/radeon/radeon_fbdev.c | 3 ++- drivers/gpu/drm/rockchip/rockchip_drm_fb.c | 3 ++- drivers/gpu/drm/tegra/fb.c | 4 +++- include/drm/drm_fourcc.h | 3 +-- 17 files changed, 48 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index e083021e9e99..558e44a7e627 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -325,7 +325,8 @@ malidp_verify_afbc_framebuffer_size(struct drm_device *dev, return false; } - info = drm_get_format_info(dev, mode_cmd); + info = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); n_superblocks = (mode_cmd->width / afbc_superblock_width) * (mode_cmd->height / afbc_superblock_height); diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c index cf2e88218dc0..85fc2cb50544 100644 --- a/drivers/gpu/drm/armada/armada_fb.c +++ b/drivers/gpu/drm/armada/armada_fb.c @@ -86,7 +86,9 @@ struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev, struct drm_framebuffer *armada_fb_create(struct drm_device *dev, struct drm_file *dfile, const struct drm_mode_fb_cmd2 *mode) { - const struct drm_format_info *info = drm_get_format_info(dev, mode); + const struct drm_format_info *info = drm_get_format_info(dev, + mode->pixel_format, + mode->modifier[0]); struct armada_gem_object *obj; struct armada_framebuffer *dfb; int ret; diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c index 4b4444f6d504..e0d533611040 100644 --- a/drivers/gpu/drm/drm_fourcc.c +++ b/drivers/gpu/drm/drm_fourcc.c @@ -417,7 +417,8 @@ EXPORT_SYMBOL(drm_format_info); /** * drm_get_format_info - query information for a given framebuffer configuration * @dev: DRM device - * @mode_cmd: metadata from the userspace fb creation request + * @pixel_format: pixel format (DRM_FORMAT_*) + * @modifier: modifier * * Returns: * The instance of struct drm_format_info that describes the pixel format, or @@ -425,16 +426,16 @@ EXPORT_SYMBOL(drm_format_info); */ const struct drm_format_info * drm_get_format_info(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd) + u32 pixel_format, u64 modifier) { const struct drm_format_info *info = NULL; if (dev->mode_config.funcs->get_format_info) - info = dev->mode_config.funcs->get_format_info(mode_cmd->pixel_format, - mode_cmd->modifier[0]); + info = dev->mode_config.funcs->get_format_info(pixel_format, + modifier); if (!info) - info = drm_format_info(mode_cmd->pixel_format); + info = drm_format_info(pixel_format); return info; } diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index b781601946db..18a0267e374e 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -176,7 +176,7 @@ static int framebuffer_check(struct drm_device *dev, } /* now let the driver pick its own format info */ - info = drm_get_format_info(dev, r); + info = drm_get_format_info(dev, r->pixel_format, r->modifier[0]); for (i = 0; i < info->num_planes; i++) { unsigned int width = drm_format_info_plane_width(info, r->width, i); diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 618ce725cd75..62eec0fddc3e 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -160,7 +160,8 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, unsigned int i; int ret; - info = drm_get_format_info(dev, mode_cmd); + info = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); if (!info) { drm_dbg_kms(dev, "Failed to get FB format info\n"); return -EINVAL; @@ -502,7 +503,8 @@ static __u32 drm_gem_afbc_get_bpp(struct drm_device *dev, { const struct drm_format_info *info; - info = drm_get_format_info(dev, mode_cmd); + info = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); switch (info->format) { case DRM_FORMAT_YUV420_8BIT: @@ -600,7 +602,8 @@ int drm_gem_fb_afbc_init(struct drm_device *dev, int ret; objs = afbc_fb->base.obj; - info = drm_get_format_info(dev, mode_cmd); + info = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); if (!info) return -EINVAL; diff --git a/drivers/gpu/drm/drm_modeset_helper.c b/drivers/gpu/drm/drm_modeset_helper.c index ef32f6af10d4..3fed2d5ab1d6 100644 --- a/drivers/gpu/drm/drm_modeset_helper.c +++ b/drivers/gpu/drm/drm_modeset_helper.c @@ -86,7 +86,8 @@ void drm_helper_mode_fill_fb_struct(struct drm_device *dev, int i; fb->dev = dev; - fb->format = drm_get_format_info(dev, mode_cmd); + fb->format = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); fb->width = mode_cmd->width; fb->height = mode_cmd->height; for (i = 0; i < 4; i++) { diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index fc1c5608db96..bcf7b534d1f7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -96,7 +96,9 @@ static struct drm_framebuffer * exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, mode_cmd); + const struct drm_format_info *info = drm_get_format_info(dev, + mode_cmd->pixel_format, + mode_cmd->modifier[0]); struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; struct drm_framebuffer *fb; int i; diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 1a374702b696..c82e623a2071 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -39,7 +39,8 @@ static int psb_framebuffer_init(struct drm_device *dev, * Reject unknown formats, YUV formats, and formats with more than * 4 bytes per pixel. */ - info = drm_get_format_info(dev, mode_cmd); + info = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); if (!info || !info->depth || info->cpp[0] > 4) return -EINVAL; diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 7c0c12dde488..0ebcfcbc258b 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -45,7 +45,9 @@ mtk_drm_mode_fb_create(struct drm_device *dev, struct drm_file *file, const struct drm_mode_fb_cmd2 *cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, cmd); + const struct drm_format_info *info = drm_get_format_info(dev, + cmd->pixel_format, + cmd->modifier[0]); if (info->num_planes != 1) return ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index bc7c2bb8f01e..d8a7ac4595bc 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -142,7 +142,8 @@ struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev, struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd) { const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd); + mode_cmd->pixel_format, + mode_cmd->modifier[0]); struct drm_gem_object *bos[4] = {0}; struct drm_framebuffer *fb; int ret, i, n = info->num_planes; @@ -173,7 +174,8 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos) { const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd); + mode_cmd->pixel_format, + mode_cmd->modifier[0]); struct msm_drm_private *priv = dev->dev_private; struct msm_kms *kms = priv->kms; struct msm_framebuffer *msm_fb = NULL; diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index c183b1112bc4..09329af9b01e 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -95,7 +95,8 @@ mxsfb_fb_create(struct drm_device *dev, struct drm_file *file_priv, { const struct drm_format_info *info; - info = drm_get_format_info(dev, mode_cmd); + info = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); if (!info) return ERR_PTR(-EINVAL); diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index c50ec347b30a..bd9a85f4b4fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -295,7 +295,8 @@ nouveau_framebuffer_new(struct drm_device *dev, kind = nvbo->kind; } - info = drm_get_format_info(dev, mode_cmd); + info = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); for (i = 0; i < info->num_planes; i++) { height = drm_format_info_plane_height(info, diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 449d521c78fe..e18878068c57 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -338,7 +338,8 @@ struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd) { const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd); + mode_cmd->pixel_format, + mode_cmd->modifier[0]); unsigned int num_planes = info->num_planes; struct drm_gem_object *bos[4]; struct drm_framebuffer *fb; @@ -378,7 +379,8 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, dev, mode_cmd, mode_cmd->width, mode_cmd->height, (char *)&mode_cmd->pixel_format); - format = drm_get_format_info(dev, mode_cmd); + format = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); for (i = 0; i < ARRAY_SIZE(formats); i++) { if (formats[i] == mode_cmd->pixel_format) diff --git a/drivers/gpu/drm/radeon/radeon_fbdev.c b/drivers/gpu/drm/radeon/radeon_fbdev.c index d4a58bd679db..e3a481bbee7b 100644 --- a/drivers/gpu/drm/radeon/radeon_fbdev.c +++ b/drivers/gpu/drm/radeon/radeon_fbdev.c @@ -67,7 +67,8 @@ static int radeon_fbdev_create_pinned_object(struct drm_fb_helper *fb_helper, int height = mode_cmd->height; u32 cpp; - info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd); + info = drm_get_format_info(rdev_to_drm(rdev), mode_cmd->pixel_format, + mode_cmd->modifier[0]); cpp = info->cpp[0]; /* need to align pitch with crtc limits */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index 5829ee061c61..66762ca54a98 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -36,7 +36,8 @@ rockchip_fb_create(struct drm_device *dev, struct drm_file *file, const struct drm_format_info *info; int ret; - info = drm_get_format_info(dev, mode_cmd); + info = drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); if (!info) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 46170753699d..634c6346d947 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -134,7 +134,9 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, struct drm_file *file, const struct drm_mode_fb_cmd2 *cmd) { - const struct drm_format_info *info = drm_get_format_info(drm, cmd); + const struct drm_format_info *info = drm_get_format_info(drm, + cmd->pixel_format, + cmd->modifier[0]); struct tegra_bo *planes[4]; struct drm_gem_object *gem; struct drm_framebuffer *fb; diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h index c3f4405d6662..471784426857 100644 --- a/include/drm/drm_fourcc.h +++ b/include/drm/drm_fourcc.h @@ -54,7 +54,6 @@ #endif struct drm_device; -struct drm_mode_fb_cmd2; /** * struct drm_format_info - information about a DRM format @@ -309,7 +308,7 @@ const struct drm_format_info *__drm_format_info(u32 format); const struct drm_format_info *drm_format_info(u32 format); const struct drm_format_info * drm_get_format_info(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd); + u32 pixel_format, u64 modifier); uint32_t drm_mode_legacy_fb_format(uint32_t bpp, uint32_t depth); uint32_t drm_driver_legacy_fb_format(struct drm_device *dev, uint32_t bpp, uint32_t depth); From d5d6340c0b65ce1340c7403b5fc5e54fc8239dab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:06 +0300 Subject: [PATCH 266/358] drm: Look up the format info earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Look up the format info already in drm_internal_framebuffer_create() so that we can later pass it along to .fb_create(). Currently various drivers are doing additional lookups in their .fb_create() implementations, and these lookups are rather expensive now (given how many different pixel formats we have). v2: Fix commit msg (Thomas) Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_framebuffer.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index 18a0267e374e..ae09ef6977b2 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -153,18 +153,11 @@ int drm_mode_addfb_ioctl(struct drm_device *dev, } static int framebuffer_check(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *r) { - const struct drm_format_info *info; int i; - /* check if the format is supported at all */ - if (!__drm_format_info(r->pixel_format)) { - drm_dbg_kms(dev, "bad framebuffer format %p4cc\n", - &r->pixel_format); - return -EINVAL; - } - if (r->width == 0) { drm_dbg_kms(dev, "bad framebuffer width %u\n", r->width); return -EINVAL; @@ -175,9 +168,6 @@ static int framebuffer_check(struct drm_device *dev, return -EINVAL; } - /* now let the driver pick its own format info */ - info = drm_get_format_info(dev, r->pixel_format, r->modifier[0]); - for (i = 0; i < info->num_planes; i++) { unsigned int width = drm_format_info_plane_width(info, r->width, i); unsigned int height = drm_format_info_plane_height(info, r->height, i); @@ -272,6 +262,7 @@ drm_internal_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv) { struct drm_mode_config *config = &dev->mode_config; + const struct drm_format_info *info; struct drm_framebuffer *fb; int ret; @@ -297,7 +288,17 @@ drm_internal_framebuffer_create(struct drm_device *dev, return ERR_PTR(-EINVAL); } - ret = framebuffer_check(dev, r); + /* check if the format is supported at all */ + if (!__drm_format_info(r->pixel_format)) { + drm_dbg_kms(dev, "bad framebuffer format %p4cc\n", + &r->pixel_format); + return ERR_PTR(-EINVAL); + } + + /* now let the driver pick its own format info */ + info = drm_get_format_info(dev, r->pixel_format, r->modifier[0]); + + ret = framebuffer_check(dev, info, r); if (ret) return ERR_PTR(ret); From 81112eaac559ccd451b3dce3bbb64d6b69083961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:07 +0300 Subject: [PATCH 267/358] drm: Pass the format info to .fb_create() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass along the format information from the top to .fb_create() so that we can avoid redundant (and somewhat expensive) lookups in the drivers. Done with cocci (with some manual fixups): @@ identifier func =~ ".*create.*"; identifier dev, file, mode_cmd; @@ struct drm_framebuffer *func( struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { ... ( - const struct drm_format_info *info = drm_get_format_info(...); | - const struct drm_format_info *info; ... - info = drm_get_format_info(...); ) <... - if (!info) - return ...; ...> } @@ identifier func =~ ".*create.*"; identifier dev, file, mode_cmd; @@ struct drm_framebuffer *func( struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { ... } @find@ identifier fb_create_func =~ ".*create.*"; identifier dev, file, mode_cmd; @@ struct drm_framebuffer *fb_create_func( struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); @@ identifier find.fb_create_func; expression dev, file, mode_cmd; @@ fb_create_func(dev, file + ,info ,mode_cmd) @@ expression dev, file, mode_cmd; @@ drm_gem_fb_create(dev, file + ,info ,mode_cmd) @@ expression dev, file, mode_cmd; @@ drm_gem_fb_create_with_dirty(dev, file + ,info ,mode_cmd) @@ expression dev, file_priv, mode_cmd; identifier info, fb; @@ info = drm_get_format_info(...); ... fb = dev->mode_config.funcs->fb_create(dev, file_priv + ,info ,mode_cmd); @@ identifier dev, file_priv, mode_cmd; @@ struct drm_mode_config_funcs { ... struct drm_framebuffer *(*fb_create)(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); ... }; v2: Fix kernel docs (Laurent) Fix commit msg (Geert) Cc: Alex Deucher Cc: Liviu Dudau Cc: Maxime Ripard Cc: Russell King Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: Patrik Jakobsson Cc: Chun-Kuang Hu Cc: Philipp Zabel Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Sean Paul Cc: Marijn Suijten Cc: Marek Vasut Cc: Stefan Agner Cc: Lyude Paul Cc: Danilo Krummrich Cc: Tomi Valkeinen Cc: Dave Airlie Cc: Gerd Hoffmann Cc: Kieran Bingham Cc: Biju Das Cc: Sandy Huang Cc: "Heiko Stübner" Cc: Andy Yan Cc: Thierry Reding Cc: Mikko Perttunen Cc: Dave Stevenson Cc: "Maíra Canal" Cc: Raspberry Pi Kernel Maintenance Cc: Dmitry Osipenko Cc: Gurchetan Singh Cc: Chia-I Wu Cc: Zack Rusin Cc: Broadcom internal kernel review list Cc: Oleksandr Andrushchenko Cc: amd-gfx@lists.freedesktop.org Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: virtualization@lists.linux.dev Cc: spice-devel@lists.freedesktop.org Cc: linux-renesas-soc@vger.kernel.org Cc: linux-tegra@vger.kernel.org Cc: Laurent Pinchart Reviewed-by: Geert Uytterhoeven Reviewed-by: Thomas Zimmermann Reviewed-by: Dmitry Baryshkov Acked-by: Liviu Dudau Reviewed-by: Laurent Pinchart Acked-by: Alex Deucher Acked-by: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_display.h | 1 + .../gpu/drm/arm/display/komeda/komeda_framebuffer.c | 1 + .../gpu/drm/arm/display/komeda/komeda_framebuffer.h | 1 + drivers/gpu/drm/arm/malidp_drv.c | 3 ++- drivers/gpu/drm/armada/armada_fb.c | 6 ++---- drivers/gpu/drm/armada/armada_fb.h | 3 ++- drivers/gpu/drm/drm_framebuffer.c | 2 +- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 4 ++++ drivers/gpu/drm/exynos/exynos_drm_fb.c | 4 +--- drivers/gpu/drm/gma500/framebuffer.c | 1 + drivers/gpu/drm/i915/display/intel_fb.c | 1 + drivers/gpu/drm/i915/display/intel_fb.h | 1 + drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 5 +++-- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 7 ++----- drivers/gpu/drm/msm/msm_drv.h | 3 ++- drivers/gpu/drm/msm/msm_fb.c | 6 ++---- drivers/gpu/drm/mxsfb/mxsfb_drv.c | 10 ++-------- drivers/gpu/drm/nouveau/nouveau_display.c | 1 + drivers/gpu/drm/nouveau/nouveau_display.h | 1 + drivers/gpu/drm/omapdrm/omap_fb.c | 6 ++---- drivers/gpu/drm/omapdrm/omap_fb.h | 3 ++- drivers/gpu/drm/qxl/qxl_display.c | 1 + drivers/gpu/drm/radeon/radeon_display.c | 1 + drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c | 3 ++- drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c | 3 ++- drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c | 3 ++- drivers/gpu/drm/rockchip/rockchip_drm_fb.c | 7 +------ drivers/gpu/drm/tegra/drm.h | 1 + drivers/gpu/drm/tegra/fb.c | 4 +--- drivers/gpu/drm/tests/drm_framebuffer_test.c | 1 + drivers/gpu/drm/vc4/vc4_kms.c | 3 ++- drivers/gpu/drm/virtio/virtgpu_display.c | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 1 + drivers/gpu/drm/xen/xen_drm_front_kms.c | 1 + drivers/gpu/drm/xlnx/zynqmp_kms.c | 3 ++- include/drm/drm_gem_framebuffer_helper.h | 3 +++ include/drm/drm_mode_config.h | 1 + 38 files changed, 59 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 9e463d3ee927..76cf908cf976 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1297,6 +1297,7 @@ static int amdgpu_display_framebuffer_init(struct drm_device *dev, struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct amdgpu_framebuffer *amdgpu_fb; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index dfa0d642ac16..930c171473b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -44,6 +44,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); const struct drm_format_info * amdgpu_lookup_format_info(u32 format, uint64_t modifier); diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c index df5da5a44755..29b05482f713 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c @@ -157,6 +157,7 @@ komeda_fb_none_afbc_size_check(struct komeda_dev *mdev, struct komeda_fb *kfb, struct drm_framebuffer * komeda_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct komeda_dev *mdev = dev->dev_private; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h index c61ca98a3a63..02b2b8ae482a 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h @@ -37,6 +37,7 @@ struct komeda_fb { struct drm_framebuffer * komeda_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); int komeda_fb_check_src_coords(const struct komeda_fb *kfb, u32 src_x, u32 src_y, u32 src_w, u32 src_h); diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index 558e44a7e627..8b920566f2e8 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -377,6 +377,7 @@ malidp_verify_afbc_framebuffer(struct drm_device *dev, struct drm_file *file, static struct drm_framebuffer * malidp_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { if (mode_cmd->modifier[0]) { @@ -384,7 +385,7 @@ malidp_fb_create(struct drm_device *dev, struct drm_file *file, return ERR_PTR(-EINVAL); } - return drm_gem_fb_create(dev, file, mode_cmd); + return drm_gem_fb_create(dev, file, info, mode_cmd); } static const struct drm_mode_config_funcs malidp_mode_config_funcs = { diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c index 85fc2cb50544..597720e229c2 100644 --- a/drivers/gpu/drm/armada/armada_fb.c +++ b/drivers/gpu/drm/armada/armada_fb.c @@ -84,11 +84,9 @@ struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev, } struct drm_framebuffer *armada_fb_create(struct drm_device *dev, - struct drm_file *dfile, const struct drm_mode_fb_cmd2 *mode) + struct drm_file *dfile, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode) { - const struct drm_format_info *info = drm_get_format_info(dev, - mode->pixel_format, - mode->modifier[0]); struct armada_gem_object *obj; struct armada_framebuffer *dfb; int ret; diff --git a/drivers/gpu/drm/armada/armada_fb.h b/drivers/gpu/drm/armada/armada_fb.h index c5bc53d7e0c4..41ba76dd80d6 100644 --- a/drivers/gpu/drm/armada/armada_fb.h +++ b/drivers/gpu/drm/armada/armada_fb.h @@ -19,5 +19,6 @@ struct armada_framebuffer { struct armada_framebuffer *armada_framebuffer_create(struct drm_device *, const struct drm_mode_fb_cmd2 *, struct armada_gem_object *); struct drm_framebuffer *armada_fb_create(struct drm_device *dev, - struct drm_file *dfile, const struct drm_mode_fb_cmd2 *mode); + struct drm_file *dfile, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode); #endif diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index ae09ef6977b2..61a7213f2389 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -302,7 +302,7 @@ drm_internal_framebuffer_create(struct drm_device *dev, if (ret) return ERR_PTR(ret); - fb = dev->mode_config.funcs->fb_create(dev, file_priv, r); + fb = dev->mode_config.funcs->fb_create(dev, file_priv, info, r); if (IS_ERR(fb)) { drm_dbg_kms(dev, "could not create framebuffer\n"); return fb; diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 62eec0fddc3e..7c0d2174dbc9 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -264,6 +264,7 @@ static const struct drm_framebuffer_funcs drm_gem_fb_funcs = { * &drm_mode_config_funcs.fb_create callback * @dev: DRM device * @file: DRM file that holds the GEM handle(s) backing the framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * * This function creates a new framebuffer object described by @@ -283,6 +284,7 @@ static const struct drm_framebuffer_funcs drm_gem_fb_funcs = { */ struct drm_framebuffer * drm_gem_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { return drm_gem_fb_create_with_funcs(dev, file, mode_cmd, @@ -301,6 +303,7 @@ static const struct drm_framebuffer_funcs drm_gem_fb_funcs_dirtyfb = { * &drm_mode_config_funcs.fb_create callback * @dev: DRM device * @file: DRM file that holds the GEM handle(s) backing the framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * * This function creates a new framebuffer object described by @@ -321,6 +324,7 @@ static const struct drm_framebuffer_funcs drm_gem_fb_funcs_dirtyfb = { */ struct drm_framebuffer * drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { return drm_gem_fb_create_with_funcs(dev, file, mode_cmd, diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index bcf7b534d1f7..9ae526825726 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -94,11 +94,9 @@ exynos_drm_framebuffer_init(struct drm_device *dev, static struct drm_framebuffer * exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd->pixel_format, - mode_cmd->modifier[0]); struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; struct drm_framebuffer *fb; int i; diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index c82e623a2071..a4a18ec2dd56 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -97,6 +97,7 @@ struct drm_framebuffer *psb_framebuffer_create(struct drm_device *dev, */ static struct drm_framebuffer *psb_user_framebuffer_create (struct drm_device *dev, struct drm_file *filp, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *cmd) { struct drm_gem_object *obj; diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index e221db072de2..96edc791c33b 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2324,6 +2324,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_framebuffer * intel_user_framebuffer_create(struct drm_device *dev, struct drm_file *filp, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *user_mode_cmd) { struct drm_framebuffer *fb; diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 7d1267fbeee2..00181c4a67dc 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -109,6 +109,7 @@ intel_framebuffer_create(struct drm_gem_object *obj, struct drm_framebuffer * intel_user_framebuffer_create(struct drm_device *dev, struct drm_file *filp, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *user_mode_cmd); bool intel_fb_modifier_uses_dpt(struct intel_display *display, u64 modifier); diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index f851e9ffdb28..9db1ceaed518 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -901,14 +901,15 @@ static void ingenic_drm_disable_vblank(struct drm_crtc *crtc) static struct drm_framebuffer * ingenic_drm_gem_fb_create(struct drm_device *drm, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct ingenic_drm *priv = drm_device_get_priv(drm); if (priv->soc_info->map_noncoherent) - return drm_gem_fb_create_with_dirty(drm, file, mode_cmd); + return drm_gem_fb_create_with_dirty(drm, file, info, mode_cmd); - return drm_gem_fb_create(drm, file, mode_cmd); + return drm_gem_fb_create(drm, file, info, mode_cmd); } static struct drm_gem_object * diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 0ebcfcbc258b..d5e6bab36414 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -43,16 +43,13 @@ static const struct drm_mode_config_helper_funcs mtk_drm_mode_config_helpers = { static struct drm_framebuffer * mtk_drm_mode_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, - cmd->pixel_format, - cmd->modifier[0]); - if (info->num_planes != 1) return ERR_PTR(-EINVAL); - return drm_gem_fb_create(dev, file, cmd); + return drm_gem_fb_create(dev, file, info, cmd); } static const struct drm_mode_config_funcs mtk_drm_mode_config_funcs = { diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 9875ca62e9ad..985db9febd98 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -260,7 +260,8 @@ uint32_t msm_framebuffer_iova(struct drm_framebuffer *fb, int plane); struct drm_gem_object *msm_framebuffer_bo(struct drm_framebuffer *fb, int plane); const struct msm_format *msm_framebuffer_format(struct drm_framebuffer *fb); struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev, - struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd); + struct drm_file *file, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer * msm_alloc_stolen_fb(struct drm_device *dev, int w, int h, int p, uint32_t format); diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index d8a7ac4595bc..f151244e8cfb 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -139,11 +139,9 @@ const struct msm_format *msm_framebuffer_format(struct drm_framebuffer *fb) } struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev, - struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd) + struct drm_file *file, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd->pixel_format, - mode_cmd->modifier[0]); struct drm_gem_object *bos[4] = {0}; struct drm_framebuffer *fb; int ret, i, n = info->num_planes; diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index 09329af9b01e..0b756da2fec2 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -91,21 +91,15 @@ void mxsfb_disable_axi_clk(struct mxsfb_drm_private *mxsfb) static struct drm_framebuffer * mxsfb_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info; - - info = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - if (!info) - return ERR_PTR(-EINVAL); - if (mode_cmd->width * info->cpp[0] != mode_cmd->pitches[0]) { dev_dbg(dev->dev, "Invalid pitch: fb width must match pitch\n"); return ERR_PTR(-EINVAL); } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } static const struct drm_mode_config_funcs mxsfb_mode_config_funcs = { diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index bd9a85f4b4fc..1ddd92901526 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -333,6 +333,7 @@ nouveau_framebuffer_new(struct drm_device *dev, struct drm_framebuffer * nouveau_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_framebuffer *fb; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h index 1f506f8b289c..e45f211501f6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.h +++ b/drivers/gpu/drm/nouveau/nouveau_display.h @@ -67,5 +67,6 @@ nouveau_framebuffer_get_layout(struct drm_framebuffer *fb, uint32_t *tile_mode, struct drm_framebuffer * nouveau_user_framebuffer_create(struct drm_device *, struct drm_file *, + const struct drm_format_info *, const struct drm_mode_fb_cmd2 *); #endif diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index e18878068c57..36afcd1c1fd7 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -335,11 +335,9 @@ void omap_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m) #endif struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, - struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd) + struct drm_file *file, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd->pixel_format, - mode_cmd->modifier[0]); unsigned int num_planes = info->num_planes; struct drm_gem_object *bos[4]; struct drm_framebuffer *fb; diff --git a/drivers/gpu/drm/omapdrm/omap_fb.h b/drivers/gpu/drm/omapdrm/omap_fb.h index b75f0b5ef1d8..0873f953cf1d 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.h +++ b/drivers/gpu/drm/omapdrm/omap_fb.h @@ -20,7 +20,8 @@ struct omap_overlay_info; struct seq_file; struct drm_framebuffer *omap_framebuffer_create(struct drm_device *dev, - struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd); + struct drm_file *file, const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); int omap_framebuffer_pin(struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 70aff64ced87..f7bc83f2d489 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -1176,6 +1176,7 @@ static int qdev_output_init(struct drm_device *dev, int num_output) static struct drm_framebuffer * qxl_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { return drm_gem_fb_create_with_funcs(dev, file_priv, mode_cmd, diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 8f5f8abcb1b4..85b714ac9882 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1314,6 +1314,7 @@ radeon_framebuffer_init(struct drm_device *dev, static struct drm_framebuffer * radeon_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj; diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c index 4c8fe83dd610..216219accfd9 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_du_kms.c @@ -426,6 +426,7 @@ int rcar_du_dumb_create(struct drm_file *file, struct drm_device *dev, static struct drm_framebuffer * rcar_du_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct rcar_du_device *rcdu = to_rcar_du_device(dev); @@ -490,7 +491,7 @@ rcar_du_fb_create(struct drm_device *dev, struct drm_file *file_priv, } } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } /* ----------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c index 55a97691e9b2..87f171145a23 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c @@ -191,6 +191,7 @@ int rzg2l_du_dumb_create(struct drm_file *file, struct drm_device *dev, static struct drm_framebuffer * rzg2l_du_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { const struct rzg2l_du_format_info *format; @@ -214,7 +215,7 @@ rzg2l_du_fb_create(struct drm_device *dev, struct drm_file *file_priv, return ERR_PTR(-EINVAL); } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } /* ----------------------------------------------------------------------------- diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c index 4202ab00fb0c..fd9460da1789 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_kms.c @@ -117,6 +117,7 @@ const struct shmob_drm_format_info *shmob_drm_format_info(u32 fourcc) static struct drm_framebuffer * shmob_drm_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { const struct shmob_drm_format_info *format; @@ -144,7 +145,7 @@ shmob_drm_fb_create(struct drm_device *dev, struct drm_file *file_priv, } } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } static const struct drm_mode_config_funcs shmob_drm_mode_config_funcs = { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index 66762ca54a98..f19113e5ae8f 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -30,17 +30,12 @@ static const struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = static struct drm_framebuffer * rockchip_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_afbc_framebuffer *afbc_fb; - const struct drm_format_info *info; int ret; - info = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - if (!info) - return ERR_PTR(-ENOMEM); - afbc_fb = kzalloc(sizeof(*afbc_fb), GFP_KERNEL); if (!afbc_fb) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 0b65e69f3a8a..77e520c43f72 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -190,6 +190,7 @@ struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, unsigned int num_planes); struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *cmd); #ifdef CONFIG_DRM_FBDEV_EMULATION diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 634c6346d947..24907573e758 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -132,11 +132,9 @@ struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *cmd) { - const struct drm_format_info *info = drm_get_format_info(drm, - cmd->pixel_format, - cmd->modifier[0]); struct tegra_bo *planes[4]; struct drm_gem_object *gem; struct drm_framebuffer *fb; diff --git a/drivers/gpu/drm/tests/drm_framebuffer_test.c b/drivers/gpu/drm/tests/drm_framebuffer_test.c index 6ea04cc8f324..9b8e01e8cd91 100644 --- a/drivers/gpu/drm/tests/drm_framebuffer_test.c +++ b/drivers/gpu/drm/tests/drm_framebuffer_test.c @@ -363,6 +363,7 @@ struct drm_framebuffer_test_priv { static struct drm_framebuffer *fb_create_mock(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_framebuffer_test_priv *priv = container_of(dev, typeof(*priv), dev); diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index f5b167417428..8f983edb81ff 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -530,6 +530,7 @@ static int vc4_atomic_commit_setup(struct drm_atomic_state *state) static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vc4_dev *vc4 = to_vc4_dev(dev); @@ -568,7 +569,7 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, mode_cmd = &mode_cmd_local; } - return drm_gem_fb_create(dev, file_priv, mode_cmd); + return drm_gem_fb_create(dev, file_priv, info, mode_cmd); } /* Our CTM has some peculiar limitations: we can only enable it for one CRTC diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 59a45e74a641..f9a98fbbabd1 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -293,6 +293,7 @@ static int vgdev_output_init(struct virtio_gpu_device *vgdev, int index) static struct drm_framebuffer * virtio_gpu_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj = NULL; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 05b1c54a070c..2d48a28cda9c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -712,6 +712,7 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv, static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vmw_private *dev_priv = vmw_priv(dev); diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c index dfa78a49a6d9..a360003bee47 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_kms.c +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c @@ -54,6 +54,7 @@ static const struct drm_framebuffer_funcs fb_funcs = { static struct drm_framebuffer * fb_create(struct drm_device *dev, struct drm_file *filp, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct xen_drm_front_drm_info *drm_info = dev->dev_private; diff --git a/drivers/gpu/drm/xlnx/zynqmp_kms.c b/drivers/gpu/drm/xlnx/zynqmp_kms.c index b47463473472..2bee0a2275ed 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_kms.c +++ b/drivers/gpu/drm/xlnx/zynqmp_kms.c @@ -373,6 +373,7 @@ static int zynqmp_dpsub_dumb_create(struct drm_file *file_priv, static struct drm_framebuffer * zynqmp_dpsub_fb_create(struct drm_device *drm, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct zynqmp_dpsub *dpsub = to_zynqmp_dpsub(drm); @@ -383,7 +384,7 @@ zynqmp_dpsub_fb_create(struct drm_device *drm, struct drm_file *file_priv, for (i = 0; i < ARRAY_SIZE(cmd.pitches); ++i) cmd.pitches[i] = ALIGN(cmd.pitches[i], dpsub->dma_align); - return drm_gem_fb_create(drm, file_priv, &cmd); + return drm_gem_fb_create(drm, file_priv, info, &cmd); } static const struct drm_mode_config_funcs zynqmp_dpsub_mode_config_funcs = { diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index d302521f3dd4..4fdf9d3d1863 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -8,6 +8,7 @@ struct drm_afbc_framebuffer; struct drm_device; struct drm_fb_helper_surface_size; struct drm_file; +struct drm_format_info; struct drm_framebuffer; struct drm_framebuffer_funcs; struct drm_gem_object; @@ -32,9 +33,11 @@ drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, const struct drm_framebuffer_funcs *funcs); struct drm_framebuffer * drm_gem_fb_create(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer * drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); int drm_gem_fb_vmap(struct drm_framebuffer *fb, struct iosys_map *map, diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index e971e1b8a850..2e848b816218 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -82,6 +82,7 @@ struct drm_mode_config_funcs { */ struct drm_framebuffer *(*fb_create)(struct drm_device *dev, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); /** From a34cc7bf1034280904f9683e260f9d9e9fd4b84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:08 +0300 Subject: [PATCH 268/358] drm: Allow the caller to pass in the format info to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Soon all drivers should have the format info already available in the places where they call drm_helper_mode_fill_fb_struct(). Allow it to be passed along into drm_helper_mode_fill_fb_struct() instead of doing yet another redundant lookup. Start by always passing in NULL and still doing the extra lookup. The actual changes to avoid the lookup will follow. Done with cocci (with some manual fixups): @@ identifier dev, fb, mode_cmd; expression get_format_info; @@ void drm_helper_mode_fill_fb_struct(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { ... - fb->format = get_format_info; + fb->format = info ?: get_format_info; ... } @@ identifier dev, fb, mode_cmd; @@ void drm_helper_mode_fill_fb_struct(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); @@ expression dev, fb, mode_cmd; @@ drm_helper_mode_fill_fb_struct(dev, fb + ,NULL ,mode_cmd); Cc: Alex Deucher Cc: Liviu Dudau Cc: Russell King Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: Patrik Jakobsson Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Sean Paul Cc: Marijn Suijten Cc: Lyude Paul Cc: Danilo Krummrich Cc: Tomi Valkeinen Cc: Thierry Reding Cc: Mikko Perttunen Cc: Gerd Hoffmann Cc: Dmitry Osipenko Cc: Gurchetan Singh Cc: Chia-I Wu Cc: Zack Rusin Cc: Broadcom internal kernel review list Cc: amd-gfx@lists.freedesktop.org Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: linux-tegra@vger.kernel.org Cc: virtualization@lists.linux.dev Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Reviewed-by: Dmitry Baryshkov Reviewed-by: Liviu Dudau Acked-by: Alex Deucher Acked-by: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +- drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c | 2 +- drivers/gpu/drm/armada/armada_fb.c | 2 +- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 2 +- drivers/gpu/drm/drm_modeset_helper.c | 6 ++++-- drivers/gpu/drm/exynos/exynos_drm_fb.c | 2 +- drivers/gpu/drm/gma500/framebuffer.c | 2 +- drivers/gpu/drm/i915/display/intel_fb.c | 2 +- drivers/gpu/drm/msm/msm_fb.c | 2 +- drivers/gpu/drm/nouveau/nouveau_display.c | 2 +- drivers/gpu/drm/omapdrm/omap_fb.c | 2 +- drivers/gpu/drm/radeon/radeon_display.c | 2 +- drivers/gpu/drm/tegra/fb.c | 2 +- drivers/gpu/drm/virtio/virtgpu_display.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 4 ++-- include/drm/drm_modeset_helper.h | 2 ++ 16 files changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 76cf908cf976..866e17fd76fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1202,7 +1202,7 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, int ret; rfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &rfb->base, NULL, mode_cmd); /* Verify that the modifier is supported. */ if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format, mode_cmd->modifier[0])) { diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c index 29b05482f713..acd8e505ebc7 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c @@ -178,7 +178,7 @@ komeda_fb_create(struct drm_device *dev, struct drm_file *file, return ERR_PTR(-EINVAL); } - drm_helper_mode_fill_fb_struct(dev, &kfb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &kfb->base, NULL, mode_cmd); if (kfb->base.modifier) ret = komeda_fb_afbc_size_check(kfb, file, mode_cmd); diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c index 597720e229c2..7e94ec5bd4f4 100644 --- a/drivers/gpu/drm/armada/armada_fb.c +++ b/drivers/gpu/drm/armada/armada_fb.c @@ -64,7 +64,7 @@ struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev, dfb->mod = config; dfb->fb.obj[0] = &obj->obj; - drm_helper_mode_fill_fb_struct(dev, &dfb->fb, mode); + drm_helper_mode_fill_fb_struct(dev, &dfb->fb, NULL, mode); ret = drm_framebuffer_init(dev, &dfb->fb, &armada_fb_funcs); if (ret) { diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 7c0d2174dbc9..6fe4094bd0fe 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -75,7 +75,7 @@ drm_gem_fb_init(struct drm_device *dev, unsigned int i; int ret; - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); for (i = 0; i < num_planes; i++) fb->obj[i] = obj[i]; diff --git a/drivers/gpu/drm/drm_modeset_helper.c b/drivers/gpu/drm/drm_modeset_helper.c index 3fed2d5ab1d6..89ba99970735 100644 --- a/drivers/gpu/drm/drm_modeset_helper.c +++ b/drivers/gpu/drm/drm_modeset_helper.c @@ -74,6 +74,7 @@ EXPORT_SYMBOL(drm_helper_move_panel_connectors_to_head); * drm_helper_mode_fill_fb_struct - fill out framebuffer metadata * @dev: DRM device * @fb: drm_framebuffer object to fill out + * @info: pixel format information * @mode_cmd: metadata from the userspace fb creation request * * This helper can be used in a drivers fb_create callback to pre-fill the fb's @@ -81,13 +82,14 @@ EXPORT_SYMBOL(drm_helper_move_panel_connectors_to_head); */ void drm_helper_mode_fill_fb_struct(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { int i; fb->dev = dev; - fb->format = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); + fb->format = info ? : drm_get_format_info(dev, mode_cmd->pixel_format, + mode_cmd->modifier[0]); fb->width = mode_cmd->width; fb->height = mode_cmd->height; for (i = 0; i < 4; i++) { diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 9ae526825726..7091d31835ec 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -76,7 +76,7 @@ exynos_drm_framebuffer_init(struct drm_device *dev, fb->obj[i] = &exynos_gem[i]->base; } - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs); if (ret < 0) { diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index a4a18ec2dd56..f9ade8361354 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -47,7 +47,7 @@ static int psb_framebuffer_init(struct drm_device *dev, if (mode_cmd->pitches[0] & 63) return -EINVAL; - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); fb->obj[0] = obj; ret = drm_framebuffer_init(dev, fb, &psb_fb_funcs); if (ret) { diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 96edc791c33b..1f5f8c2e9d31 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2254,7 +2254,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, goto err_frontbuffer_put; } - drm_helper_mode_fill_fb_struct(display->drm, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(display->drm, fb, NULL, mode_cmd); for (i = 0; i < fb->format->num_planes; i++) { unsigned int stride_alignment; diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index f151244e8cfb..a82a65716975 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -227,7 +227,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, msm_fb->base.obj[i] = bos[i]; } - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); ret = drm_framebuffer_init(dev, fb, &msm_framebuffer_funcs); if (ret) { diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 1ddd92901526..e1e542126310 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -321,7 +321,7 @@ nouveau_framebuffer_new(struct drm_device *dev, if (!(fb = *pfb = kzalloc(sizeof(*fb), GFP_KERNEL))) return -ENOMEM; - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); fb->obj[0] = gem; ret = drm_framebuffer_init(dev, fb, &nouveau_framebuffer_funcs); diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 36afcd1c1fd7..30c81e2e5d6b 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -440,7 +440,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, plane->dma_addr = 0; } - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); ret = drm_framebuffer_init(dev, fb, &omap_framebuffer_funcs); if (ret) { diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 85b714ac9882..b4bf5dfeea2d 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1302,7 +1302,7 @@ radeon_framebuffer_init(struct drm_device *dev, { int ret; fb->obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); ret = drm_framebuffer_init(dev, fb, &radeon_fb_funcs); if (ret) { fb->obj[0] = NULL; diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index 24907573e758..d359683f5ce6 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -114,7 +114,7 @@ struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, if (!fb) return ERR_PTR(-ENOMEM); - drm_helper_mode_fill_fb_struct(drm, fb, mode_cmd); + drm_helper_mode_fill_fb_struct(drm, fb, NULL, mode_cmd); for (i = 0; i < fb->format->num_planes; i++) fb->obj[i] = &planes[i]->gem; diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index f9a98fbbabd1..93763b91bab5 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -73,7 +73,7 @@ virtio_gpu_framebuffer_init(struct drm_device *dev, vgfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &vgfb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &vgfb->base, NULL, mode_cmd); ret = drm_framebuffer_init(dev, &vgfb->base, &virtio_gpu_fb_funcs); if (ret) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 2d48a28cda9c..35965e29e408 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -548,7 +548,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, goto out_err1; } - drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, NULL, mode_cmd); memcpy(&vfbs->uo, uo, sizeof(vfbs->uo)); vmw_user_object_ref(&vfbs->uo); @@ -634,7 +634,7 @@ static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, } vfbd->base.base.obj[0] = &bo->tbo.base; - drm_helper_mode_fill_fb_struct(dev, &vfbd->base.base, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &vfbd->base.base, NULL, mode_cmd); vfbd->base.bo = true; vfbd->buffer = vmw_bo_reference(bo); *out = &vfbd->base; diff --git a/include/drm/drm_modeset_helper.h b/include/drm/drm_modeset_helper.h index 995fd981cab0..7e3d4c5a7f66 100644 --- a/include/drm/drm_modeset_helper.h +++ b/include/drm/drm_modeset_helper.h @@ -26,6 +26,7 @@ struct drm_crtc; struct drm_crtc_funcs; struct drm_device; +struct drm_format_info; struct drm_framebuffer; struct drm_mode_fb_cmd2; @@ -33,6 +34,7 @@ void drm_helper_move_panel_connectors_to_head(struct drm_device *); void drm_helper_mode_fill_fb_struct(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); int drm_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, From e3c5074b1fc2b49c456d8dc567d59b800b45e267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:09 +0300 Subject: [PATCH 269/358] drm/malidp: Pass along the format info from .fb_create() malidp_verify_afbc_framebuffer_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to malidp_verify_afbc_framebuffer_size() to avoid the redundant lookup. Cc: Liviu Dudau Reviewed-by: Thomas Zimmermann Reviewed-by: Liviu Dudau Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/arm/malidp_drv.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index 8b920566f2e8..bc5f5e9798c3 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -306,10 +306,10 @@ malidp_verify_afbc_framebuffer_caps(struct drm_device *dev, static bool malidp_verify_afbc_framebuffer_size(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { int n_superblocks = 0; - const struct drm_format_info *info; struct drm_gem_object *objs = NULL; u32 afbc_superblock_size = 0, afbc_superblock_height = 0; u32 afbc_superblock_width = 0, afbc_size = 0; @@ -325,9 +325,6 @@ malidp_verify_afbc_framebuffer_size(struct drm_device *dev, return false; } - info = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - n_superblocks = (mode_cmd->width / afbc_superblock_width) * (mode_cmd->height / afbc_superblock_height); @@ -367,10 +364,11 @@ malidp_verify_afbc_framebuffer_size(struct drm_device *dev, static bool malidp_verify_afbc_framebuffer(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { if (malidp_verify_afbc_framebuffer_caps(dev, mode_cmd)) - return malidp_verify_afbc_framebuffer_size(dev, file, mode_cmd); + return malidp_verify_afbc_framebuffer_size(dev, file, info, mode_cmd); return false; } @@ -381,7 +379,7 @@ malidp_fb_create(struct drm_device *dev, struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd) { if (mode_cmd->modifier[0]) { - if (!malidp_verify_afbc_framebuffer(dev, file, mode_cmd)) + if (!malidp_verify_afbc_framebuffer(dev, file, info, mode_cmd)) return ERR_PTR(-EINVAL); } From 04a5889cf75aa5b59bd1e13c33eccaf49f3f9d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:10 +0300 Subject: [PATCH 270/358] drm/gem: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass along the format info from .fb_create() to eliminate the redundant drm_get_format_info() calls from the gem fb code. v2: Fix kernel docs (Laurent) Cc: Dave Airlie Cc: Gerd Hoffmann Cc: Sandy Huang Cc: "Heiko Stübner" Cc: Andy Yan Cc: Oleksandr Andrushchenko Cc: virtualization@lists.linux.dev Cc: spice-devel@lists.freedesktop.org Cc: Laurent Pinchart Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-8-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 23 +++++++++----------- drivers/gpu/drm/qxl/qxl_display.c | 2 +- drivers/gpu/drm/rockchip/rockchip_drm_fb.c | 3 ++- drivers/gpu/drm/xen/xen_drm_front_kms.c | 2 +- include/drm/drm_gem_framebuffer_helper.h | 2 ++ 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 6fe4094bd0fe..1d58b0259df4 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -68,6 +68,7 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_get_obj); static int drm_gem_fb_init(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **obj, unsigned int num_planes, const struct drm_framebuffer_funcs *funcs) @@ -75,7 +76,7 @@ drm_gem_fb_init(struct drm_device *dev, unsigned int i; int ret; - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); for (i = 0; i < num_planes; i++) fb->obj[i] = obj[i]; @@ -136,6 +137,7 @@ EXPORT_SYMBOL(drm_gem_fb_create_handle); * @dev: DRM device * @fb: framebuffer object * @file: DRM file that holds the GEM handle(s) backing the framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * @funcs: vtable to be used for the new framebuffer object * @@ -152,21 +154,14 @@ EXPORT_SYMBOL(drm_gem_fb_create_handle); int drm_gem_fb_init_with_funcs(struct drm_device *dev, struct drm_framebuffer *fb, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, const struct drm_framebuffer_funcs *funcs) { - const struct drm_format_info *info; struct drm_gem_object *objs[DRM_FORMAT_MAX_PLANES]; unsigned int i; int ret; - info = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - if (!info) { - drm_dbg_kms(dev, "Failed to get FB format info\n"); - return -EINVAL; - } - if (drm_drv_uses_atomic_modeset(dev) && !drm_any_plane_has_format(dev, mode_cmd->pixel_format, mode_cmd->modifier[0])) { @@ -201,7 +196,7 @@ int drm_gem_fb_init_with_funcs(struct drm_device *dev, } } - ret = drm_gem_fb_init(dev, fb, mode_cmd, objs, i, funcs); + ret = drm_gem_fb_init(dev, fb, info, mode_cmd, objs, i, funcs); if (ret) goto err_gem_object_put; @@ -222,6 +217,7 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_init_with_funcs); * callback * @dev: DRM device * @file: DRM file that holds the GEM handle(s) backing the framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * @funcs: vtable to be used for the new framebuffer object * @@ -234,6 +230,7 @@ EXPORT_SYMBOL_GPL(drm_gem_fb_init_with_funcs); */ struct drm_framebuffer * drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, const struct drm_framebuffer_funcs *funcs) { @@ -244,7 +241,7 @@ drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, if (!fb) return ERR_PTR(-ENOMEM); - ret = drm_gem_fb_init_with_funcs(dev, fb, file, mode_cmd, funcs); + ret = drm_gem_fb_init_with_funcs(dev, fb, file, info, mode_cmd, funcs); if (ret) { kfree(fb); return ERR_PTR(ret); @@ -287,7 +284,7 @@ drm_gem_fb_create(struct drm_device *dev, struct drm_file *file, const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - return drm_gem_fb_create_with_funcs(dev, file, mode_cmd, + return drm_gem_fb_create_with_funcs(dev, file, info, mode_cmd, &drm_gem_fb_funcs); } EXPORT_SYMBOL_GPL(drm_gem_fb_create); @@ -327,7 +324,7 @@ drm_gem_fb_create_with_dirty(struct drm_device *dev, struct drm_file *file, const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - return drm_gem_fb_create_with_funcs(dev, file, mode_cmd, + return drm_gem_fb_create_with_funcs(dev, file, info, mode_cmd, &drm_gem_fb_funcs_dirtyfb); } EXPORT_SYMBOL_GPL(drm_gem_fb_create_with_dirty); diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index f7bc83f2d489..ae7e572b1b4a 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -1179,7 +1179,7 @@ qxl_user_framebuffer_create(struct drm_device *dev, const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - return drm_gem_fb_create_with_funcs(dev, file_priv, mode_cmd, + return drm_gem_fb_create_with_funcs(dev, file_priv, info, mode_cmd, &qxl_fb_funcs); } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index f19113e5ae8f..d496ac0feb08 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -40,7 +40,8 @@ rockchip_fb_create(struct drm_device *dev, struct drm_file *file, if (!afbc_fb) return ERR_PTR(-ENOMEM); - ret = drm_gem_fb_init_with_funcs(dev, &afbc_fb->base, file, mode_cmd, + ret = drm_gem_fb_init_with_funcs(dev, &afbc_fb->base, + file, info, mode_cmd, &rockchip_drm_fb_funcs); if (ret) { kfree(afbc_fb); diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c index a360003bee47..806ec66ee7f7 100644 --- a/drivers/gpu/drm/xen/xen_drm_front_kms.c +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c @@ -62,7 +62,7 @@ fb_create(struct drm_device *dev, struct drm_file *filp, struct drm_gem_object *gem_obj; int ret; - fb = drm_gem_fb_create_with_funcs(dev, filp, mode_cmd, &fb_funcs); + fb = drm_gem_fb_create_with_funcs(dev, filp, info, mode_cmd, &fb_funcs); if (IS_ERR(fb)) return fb; diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index 4fdf9d3d1863..971d266ab1ba 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -25,10 +25,12 @@ int drm_gem_fb_create_handle(struct drm_framebuffer *fb, struct drm_file *file, int drm_gem_fb_init_with_funcs(struct drm_device *dev, struct drm_framebuffer *fb, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, const struct drm_framebuffer_funcs *funcs); struct drm_framebuffer * drm_gem_fb_create_with_funcs(struct drm_device *dev, struct drm_file *file, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, const struct drm_framebuffer_funcs *funcs); struct drm_framebuffer * From 283da9e3a9a43e07188f038fc278140a73e781cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:11 +0300 Subject: [PATCH 271/358] drm/gem/afbc: Eliminate redundant drm_get_format_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass along the format info from .fb_create() to aliminate the redundant drm_get_format_info() calls from the afbc code. Cc: Sandy Huang Cc: "Heiko Stübner" Cc: Andy Yan Reviewed-by: Thomas Zimmermann Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_gem_framebuffer_helper.c | 18 ++++++------------ drivers/gpu/drm/rockchip/rockchip_drm_fb.c | 2 +- include/drm/drm_gem_framebuffer_helper.h | 1 + 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 1d58b0259df4..0477d594fab3 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -500,13 +500,9 @@ EXPORT_SYMBOL(drm_gem_fb_end_cpu_access); // TODO Drop this function and replace by drm_format_info_bpp() once all // DRM_FORMAT_* provide proper block info in drivers/gpu/drm/drm_fourcc.c static __u32 drm_gem_afbc_get_bpp(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { - const struct drm_format_info *info; - - info = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - switch (info->format) { case DRM_FORMAT_YUV420_8BIT: return 12; @@ -520,6 +516,7 @@ static __u32 drm_gem_afbc_get_bpp(struct drm_device *dev, } static int drm_gem_afbc_min_size(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_afbc_framebuffer *afbc_fb) { @@ -560,7 +557,7 @@ static int drm_gem_afbc_min_size(struct drm_device *dev, afbc_fb->aligned_height = ALIGN(mode_cmd->height, h_alignment); afbc_fb->offset = mode_cmd->offsets[0]; - bpp = drm_gem_afbc_get_bpp(dev, mode_cmd); + bpp = drm_gem_afbc_get_bpp(dev, info, mode_cmd); if (!bpp) { drm_dbg_kms(dev, "Invalid AFBC bpp value: %d\n", bpp); return -EINVAL; @@ -582,6 +579,7 @@ static int drm_gem_afbc_min_size(struct drm_device *dev, * * @dev: DRM device * @afbc_fb: afbc-specific framebuffer + * @info: pixel format information * @mode_cmd: Metadata from the userspace framebuffer creation request * @afbc_fb: afbc framebuffer * @@ -595,20 +593,16 @@ static int drm_gem_afbc_min_size(struct drm_device *dev, * Zero on success or a negative error value on failure. */ int drm_gem_fb_afbc_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_afbc_framebuffer *afbc_fb) { - const struct drm_format_info *info; struct drm_gem_object **objs; int ret; objs = afbc_fb->base.obj; - info = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - if (!info) - return -EINVAL; - ret = drm_gem_afbc_min_size(dev, mode_cmd, afbc_fb); + ret = drm_gem_afbc_min_size(dev, info, mode_cmd, afbc_fb); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index d496ac0feb08..2f469d370021 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -49,7 +49,7 @@ rockchip_fb_create(struct drm_device *dev, struct drm_file *file, } if (drm_is_afbc(mode_cmd->modifier[0])) { - ret = drm_gem_fb_afbc_init(dev, mode_cmd, afbc_fb); + ret = drm_gem_fb_afbc_init(dev, info, mode_cmd, afbc_fb); if (ret) { drm_framebuffer_put(&afbc_fb->base); return ERR_PTR(ret); diff --git a/include/drm/drm_gem_framebuffer_helper.h b/include/drm/drm_gem_framebuffer_helper.h index 971d266ab1ba..24f1fd40d553 100644 --- a/include/drm/drm_gem_framebuffer_helper.h +++ b/include/drm/drm_gem_framebuffer_helper.h @@ -52,6 +52,7 @@ void drm_gem_fb_end_cpu_access(struct drm_framebuffer *fb, enum dma_data_directi (((modifier) & AFBC_VENDOR_AND_TYPE_MASK) == DRM_FORMAT_MOD_ARM_AFBC(0)) int drm_gem_fb_afbc_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_afbc_framebuffer *afbc_fb); From b4d360701b76b8f2505b2e349b89f54fc87c678e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:12 +0300 Subject: [PATCH 272/358] drm/amdgpu: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. Cc: Alex Deucher Cc: amd-gfx@lists.freedesktop.org Reviewed-by: Thomas Zimmermann Acked-by: Alex Deucher Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-10-ville.syrjala@linux.intel.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 866e17fd76fd..51bab32fd8c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1196,13 +1196,14 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, struct amdgpu_framebuffer *rfb, struct drm_file *file_priv, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { int ret; rfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &rfb->base, info, mode_cmd); /* Verify that the modifier is supported. */ if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format, mode_cmd->modifier[0])) { @@ -1331,7 +1332,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, } ret = amdgpu_display_gem_fb_verify_and_init(dev, amdgpu_fb, file_priv, - mode_cmd, obj); + info, mode_cmd, obj); if (ret) { kfree(amdgpu_fb); drm_gem_object_put(obj); From 797f8fc4cc839bc7023ca752285f1ec5993aeaeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:13 +0300 Subject: [PATCH 273/358] drm/armada: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. For the fbdev case a manual drm_get_format_info() lookup is needed. Cc: Russell King Reviewed-by: Thomas Zimmermann Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-11-ville.syrjala@linux.intel.com --- drivers/gpu/drm/armada/armada_fb.c | 8 +++++--- drivers/gpu/drm/armada/armada_fb.h | 1 + drivers/gpu/drm/armada/armada_fbdev.c | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_fb.c b/drivers/gpu/drm/armada/armada_fb.c index 7e94ec5bd4f4..aa4289127086 100644 --- a/drivers/gpu/drm/armada/armada_fb.c +++ b/drivers/gpu/drm/armada/armada_fb.c @@ -18,7 +18,9 @@ static const struct drm_framebuffer_funcs armada_fb_funcs = { }; struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode, struct armada_gem_object *obj) + const struct drm_format_info *info, + const struct drm_mode_fb_cmd2 *mode, + struct armada_gem_object *obj) { struct armada_framebuffer *dfb; uint8_t format, config; @@ -64,7 +66,7 @@ struct armada_framebuffer *armada_framebuffer_create(struct drm_device *dev, dfb->mod = config; dfb->fb.obj[0] = &obj->obj; - drm_helper_mode_fill_fb_struct(dev, &dfb->fb, NULL, mode); + drm_helper_mode_fill_fb_struct(dev, &dfb->fb, info, mode); ret = drm_framebuffer_init(dev, &dfb->fb, &armada_fb_funcs); if (ret) { @@ -122,7 +124,7 @@ struct drm_framebuffer *armada_fb_create(struct drm_device *dev, goto err_unref; } - dfb = armada_framebuffer_create(dev, mode, obj); + dfb = armada_framebuffer_create(dev, info, mode, obj); if (IS_ERR(dfb)) { ret = PTR_ERR(dfb); goto err; diff --git a/drivers/gpu/drm/armada/armada_fb.h b/drivers/gpu/drm/armada/armada_fb.h index 41ba76dd80d6..f2b990f055a2 100644 --- a/drivers/gpu/drm/armada/armada_fb.h +++ b/drivers/gpu/drm/armada/armada_fb.h @@ -17,6 +17,7 @@ struct armada_framebuffer { #define drm_fb_obj(fb) drm_to_armada_gem((fb)->obj[0]) struct armada_framebuffer *armada_framebuffer_create(struct drm_device *, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *, struct armada_gem_object *); struct drm_framebuffer *armada_fb_create(struct drm_device *dev, struct drm_file *dfile, const struct drm_format_info *info, diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c index 6ee7ce04ee71..cb53cc91bafb 100644 --- a/drivers/gpu/drm/armada/armada_fbdev.c +++ b/drivers/gpu/drm/armada/armada_fbdev.c @@ -78,7 +78,10 @@ int armada_fbdev_driver_fbdev_probe(struct drm_fb_helper *fbh, return -ENOMEM; } - dfb = armada_framebuffer_create(dev, &mode, obj); + dfb = armada_framebuffer_create(dev, + drm_get_format_info(dev, mode.pixel_format, + mode.modifier[0]), + &mode, obj); /* * A reference is now held by the framebuffer object if From d26e853410fd82d174c83e267d9f809ddd1672e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:14 +0300 Subject: [PATCH 274/358] drm/exynos: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. For the fbdev case a manual drm_get_format_info() lookup is needed. Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Reviewed-by: Thomas Zimmermann Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-12-ville.syrjala@linux.intel.com --- drivers/gpu/drm/exynos/exynos_drm_fb.c | 5 +++-- drivers/gpu/drm/exynos/exynos_drm_fb.h | 1 + drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 7091d31835ec..ddd73e7f26a3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -56,6 +56,7 @@ static const struct drm_framebuffer_funcs exynos_drm_fb_funcs = { struct drm_framebuffer * exynos_drm_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct exynos_drm_gem **exynos_gem, int count) @@ -76,7 +77,7 @@ exynos_drm_framebuffer_init(struct drm_device *dev, fb->obj[i] = &exynos_gem[i]->base; } - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs); if (ret < 0) { @@ -124,7 +125,7 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, } } - fb = exynos_drm_framebuffer_init(dev, mode_cmd, exynos_gem, i); + fb = exynos_drm_framebuffer_init(dev, info, mode_cmd, exynos_gem, i); if (IS_ERR(fb)) { ret = PTR_ERR(fb); goto err; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h index 2f841bbdddc5..fdc6cb40cc9c 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.h +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h @@ -14,6 +14,7 @@ struct drm_framebuffer * exynos_drm_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct exynos_drm_gem **exynos_gem, int count); diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c index 9526a25e90ac..93de25b77e68 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c @@ -116,7 +116,10 @@ int exynos_drm_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, return PTR_ERR(exynos_gem); helper->fb = - exynos_drm_framebuffer_init(dev, &mode_cmd, &exynos_gem, 1); + exynos_drm_framebuffer_init(dev, + drm_get_format_info(dev, mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd, &exynos_gem, 1); if (IS_ERR(helper->fb)) { DRM_DEV_ERROR(dev->dev, "failed to create drm framebuffer.\n"); ret = PTR_ERR(helper->fb); From 4a792c59203b650b90ddbc36d055591a1547ac5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:15 +0300 Subject: [PATCH 275/358] drm/gma500: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. For the fbdev case a manual drm_get_format_info() lookup is needed. Cc: Patrik Jakobsson Reviewed-by: Thomas Zimmermann Acked-by: Patrik Jakobsson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-13-ville.syrjala@linux.intel.com --- drivers/gpu/drm/gma500/fbdev.c | 5 ++++- drivers/gpu/drm/gma500/framebuffer.c | 14 +++++++------- drivers/gpu/drm/gma500/psb_drv.h | 1 + 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/gma500/fbdev.c b/drivers/gpu/drm/gma500/fbdev.c index 8edefea2ef59..d32689cb0e23 100644 --- a/drivers/gpu/drm/gma500/fbdev.c +++ b/drivers/gpu/drm/gma500/fbdev.c @@ -203,7 +203,10 @@ int psb_fbdev_driver_fbdev_probe(struct drm_fb_helper *fb_helper, return PTR_ERR(backing); obj = &backing->base; - fb = psb_framebuffer_create(dev, &mode_cmd, obj); + fb = psb_framebuffer_create(dev, + drm_get_format_info(dev, mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd, obj); if (IS_ERR(fb)) { ret = PTR_ERR(fb); goto err_drm_gem_object_put; diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index f9ade8361354..e69b537ded6b 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -29,25 +29,23 @@ static const struct drm_framebuffer_funcs psb_fb_funcs = { */ static int psb_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { - const struct drm_format_info *info; int ret; /* * Reject unknown formats, YUV formats, and formats with more than * 4 bytes per pixel. */ - info = drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); - if (!info || !info->depth || info->cpp[0] > 4) + if (!info->depth || info->cpp[0] > 4) return -EINVAL; if (mode_cmd->pitches[0] & 63) return -EINVAL; - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); fb->obj[0] = obj; ret = drm_framebuffer_init(dev, fb, &psb_fb_funcs); if (ret) { @@ -60,6 +58,7 @@ static int psb_framebuffer_init(struct drm_device *dev, /** * psb_framebuffer_create - create a framebuffer backed by gt * @dev: our DRM device + * @info: pixel format information * @mode_cmd: the description of the requested mode * @obj: the backing object * @@ -69,6 +68,7 @@ static int psb_framebuffer_init(struct drm_device *dev, * TODO: review object references */ struct drm_framebuffer *psb_framebuffer_create(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { @@ -79,7 +79,7 @@ struct drm_framebuffer *psb_framebuffer_create(struct drm_device *dev, if (!fb) return ERR_PTR(-ENOMEM); - ret = psb_framebuffer_init(dev, fb, mode_cmd, obj); + ret = psb_framebuffer_init(dev, fb, info, mode_cmd, obj); if (ret) { kfree(fb); return ERR_PTR(ret); @@ -112,7 +112,7 @@ static struct drm_framebuffer *psb_user_framebuffer_create return ERR_PTR(-ENOENT); /* Let the core code do all the work */ - fb = psb_framebuffer_create(dev, cmd, obj); + fb = psb_framebuffer_create(dev, info, cmd, obj); if (IS_ERR(fb)) drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h index 7f77cb2b2751..0b27112ec46f 100644 --- a/drivers/gpu/drm/gma500/psb_drv.h +++ b/drivers/gpu/drm/gma500/psb_drv.h @@ -594,6 +594,7 @@ extern void psb_modeset_cleanup(struct drm_device *dev); /* framebuffer */ struct drm_framebuffer *psb_framebuffer_create(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); From 800df9e50ca2c87675f23783d18d9e60d0801525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:16 +0300 Subject: [PATCH 276/358] drm/i915: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. For the fbdev case a manual drm_get_format_info() lookup is needed. Reviewed-by: Thomas Zimmermann Acked-by: Rodrigo Vivi Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-14-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/display/intel_fb.c | 8 +++++--- drivers/gpu/drm/i915/display/intel_fb.h | 2 ++ drivers/gpu/drm/i915/display/intel_fbdev_fb.c | 6 +++++- drivers/gpu/drm/i915/display/intel_plane_initial.c | 3 ++- drivers/gpu/drm/xe/display/intel_fbdev_fb.c | 6 +++++- drivers/gpu/drm/xe/display/xe_plane_initial.c | 2 +- 6 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 1f5f8c2e9d31..0da842bd2f2f 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2207,6 +2207,7 @@ static const struct drm_framebuffer_funcs intel_fb_funcs = { int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_gem_object *obj, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_display *display = to_intel_display(obj->dev); @@ -2254,7 +2255,7 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, goto err_frontbuffer_put; } - drm_helper_mode_fill_fb_struct(display->drm, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(display->drm, fb, info, mode_cmd); for (i = 0; i < fb->format->num_planes; i++) { unsigned int stride_alignment; @@ -2335,7 +2336,7 @@ intel_user_framebuffer_create(struct drm_device *dev, if (IS_ERR(obj)) return ERR_CAST(obj); - fb = intel_framebuffer_create(obj, &mode_cmd); + fb = intel_framebuffer_create(obj, info, &mode_cmd); drm_gem_object_put(obj); return fb; @@ -2343,6 +2344,7 @@ intel_user_framebuffer_create(struct drm_device *dev, struct drm_framebuffer * intel_framebuffer_create(struct drm_gem_object *obj, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_framebuffer *intel_fb; @@ -2352,7 +2354,7 @@ intel_framebuffer_create(struct drm_gem_object *obj, if (!intel_fb) return ERR_PTR(-ENOMEM); - ret = intel_framebuffer_init(intel_fb, obj, mode_cmd); + ret = intel_framebuffer_init(intel_fb, obj, info, mode_cmd); if (ret) goto err; diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 00181c4a67dc..403b8b63721a 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -102,9 +102,11 @@ void intel_add_fb_offsets(int *x, int *y, int intel_framebuffer_init(struct intel_framebuffer *ifb, struct drm_gem_object *obj, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer * intel_framebuffer_create(struct drm_gem_object *obj, + const struct drm_format_info *info, struct drm_mode_fb_cmd2 *mode_cmd); struct drm_framebuffer * intel_user_framebuffer_create(struct drm_device *dev, diff --git a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c index 8db3af36b2f2..210aee9ae88b 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev_fb.c @@ -62,7 +62,11 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, return ERR_PTR(-ENOMEM); } - fb = intel_framebuffer_create(intel_bo_to_drm_bo(obj), &mode_cmd); + fb = intel_framebuffer_create(intel_bo_to_drm_bo(obj), + drm_get_format_info(display->drm, + mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd); i915_gem_object_put(obj); return to_intel_framebuffer(fb); diff --git a/drivers/gpu/drm/i915/display/intel_plane_initial.c b/drivers/gpu/drm/i915/display/intel_plane_initial.c index 2194d39a5c98..4246173ed311 100644 --- a/drivers/gpu/drm/i915/display/intel_plane_initial.c +++ b/drivers/gpu/drm/i915/display/intel_plane_initial.c @@ -289,7 +289,8 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, mode_cmd.flags = DRM_MODE_FB_MODIFIERS; if (intel_framebuffer_init(to_intel_framebuffer(fb), - intel_bo_to_drm_bo(vma->obj), &mode_cmd)) { + intel_bo_to_drm_bo(vma->obj), + fb->format, &mode_cmd)) { drm_dbg_kms(display->drm, "intel fb init failed\n"); goto err_vma; } diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c index b28a94df824f..fba9617a75a5 100644 --- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c +++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c @@ -66,7 +66,11 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper, goto err; } - fb = intel_framebuffer_create(&obj->ttm.base, &mode_cmd); + fb = intel_framebuffer_create(&obj->ttm.base, + drm_get_format_info(dev, + mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd); if (IS_ERR(fb)) { xe_bo_unpin_map_no_vm(obj); goto err; diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c index b2ede3af9345..dcbc4b2d3fd9 100644 --- a/drivers/gpu/drm/xe/display/xe_plane_initial.c +++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c @@ -184,7 +184,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; if (intel_framebuffer_init(to_intel_framebuffer(fb), - &bo->ttm.base, &mode_cmd)) { + &bo->ttm.base, fb->format, &mode_cmd)) { drm_dbg_kms(&xe->drm, "intel fb init failed\n"); goto err_bo; } From 7a46d03936727f2342686cbc90e073271b1827db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:17 +0300 Subject: [PATCH 277/358] drm/komeda: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. Cc: Liviu Dudau Reviewed-by: Thomas Zimmermann Reviewed-by: Liviu Dudau Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-15-ville.syrjala@linux.intel.com --- drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c index acd8e505ebc7..901f938aefe0 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c @@ -178,7 +178,7 @@ komeda_fb_create(struct drm_device *dev, struct drm_file *file, return ERR_PTR(-EINVAL); } - drm_helper_mode_fill_fb_struct(dev, &kfb->base, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &kfb->base, info, mode_cmd); if (kfb->base.modifier) ret = komeda_fb_afbc_size_check(kfb, file, mode_cmd); From 1506b103105e7f1608da41f8b33e5727088d0211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:18 +0300 Subject: [PATCH 278/358] drm/msm: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookups. For the fbdev case a manual drm_get_format_info() lookup is needed. Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Sean Paul Cc: Marijn Suijten Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Reviewed-by: Thomas Zimmermann Acked-by: Dmitry Baryshkov Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-16-ville.syrjala@linux.intel.com --- drivers/gpu/drm/msm/msm_fb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index a82a65716975..1eff615ff9bf 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -30,6 +30,7 @@ struct msm_framebuffer { #define to_msm_framebuffer(x) container_of(x, struct msm_framebuffer, base) static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); static int msm_framebuffer_dirtyfb(struct drm_framebuffer *fb, @@ -154,7 +155,7 @@ struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev, } } - fb = msm_framebuffer_init(dev, mode_cmd, bos); + fb = msm_framebuffer_init(dev, info, mode_cmd, bos); if (IS_ERR(fb)) { ret = PTR_ERR(fb); goto out_unref; @@ -169,11 +170,9 @@ struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev, } static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos) { - const struct drm_format_info *info = drm_get_format_info(dev, - mode_cmd->pixel_format, - mode_cmd->modifier[0]); struct msm_drm_private *priv = dev->dev_private; struct msm_kms *kms = priv->kms; struct msm_framebuffer *msm_fb = NULL; @@ -227,7 +226,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, msm_fb->base.obj[i] = bos[i]; } - drm_helper_mode_fill_fb_struct(dev, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, info, mode_cmd); ret = drm_framebuffer_init(dev, fb, &msm_framebuffer_funcs); if (ret) { @@ -276,7 +275,10 @@ msm_alloc_stolen_fb(struct drm_device *dev, int w, int h, int p, uint32_t format msm_gem_object_set_name(bo, "stolenfb"); - fb = msm_framebuffer_init(dev, &mode_cmd, &bo); + fb = msm_framebuffer_init(dev, + drm_get_format_info(dev, mode_cmd.pixel_format, + mode_cmd.modifier[0]), + &mode_cmd, &bo); if (IS_ERR(fb)) { DRM_DEV_ERROR(dev->dev, "failed to allocate fb\n"); /* note: if fb creation failed, we can't rely on fb destroy From b146e3e03b628bee694aaa95b4885d96834c1b56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:19 +0300 Subject: [PATCH 279/358] drm/tegra: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. For the fbdev case a manual drm_get_format_info() lookup is needed. Cc: Thierry Reding Cc: Mikko Perttunen Cc: linux-tegra@vger.kernel.org Reviewed-by: Thomas Zimmermann Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-17-ville.syrjala@linux.intel.com --- drivers/gpu/drm/tegra/drm.h | 1 + drivers/gpu/drm/tegra/fb.c | 5 +++-- drivers/gpu/drm/tegra/fbdev.c | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 77e520c43f72..1dd3670f37db 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -185,6 +185,7 @@ bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer); int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer, struct tegra_bo_tiling *tiling); struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct tegra_bo **planes, unsigned int num_planes); diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c index d359683f5ce6..dd041089f797 100644 --- a/drivers/gpu/drm/tegra/fb.c +++ b/drivers/gpu/drm/tegra/fb.c @@ -102,6 +102,7 @@ static const struct drm_framebuffer_funcs tegra_fb_funcs = { }; struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct tegra_bo **planes, unsigned int num_planes) @@ -114,7 +115,7 @@ struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm, if (!fb) return ERR_PTR(-ENOMEM); - drm_helper_mode_fill_fb_struct(drm, fb, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(drm, fb, info, mode_cmd); for (i = 0; i < fb->format->num_planes; i++) fb->obj[i] = &planes[i]->gem; @@ -166,7 +167,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm, planes[i] = to_tegra_bo(gem); } - fb = tegra_fb_alloc(drm, cmd, planes, i); + fb = tegra_fb_alloc(drm, info, cmd, planes, i); if (IS_ERR(fb)) { err = PTR_ERR(fb); goto unreference; diff --git a/drivers/gpu/drm/tegra/fbdev.c b/drivers/gpu/drm/tegra/fbdev.c index cd9d798f8870..1b70f5e164af 100644 --- a/drivers/gpu/drm/tegra/fbdev.c +++ b/drivers/gpu/drm/tegra/fbdev.c @@ -106,7 +106,9 @@ int tegra_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper, return PTR_ERR(info); } - fb = tegra_fb_alloc(drm, &cmd, &bo, 1); + fb = tegra_fb_alloc(drm, + drm_get_format_info(drm, cmd.pixel_format, cmd.modifier[0]), + &cmd, &bo, 1); if (IS_ERR(fb)) { err = PTR_ERR(fb); dev_err(drm->dev, "failed to allocate DRM framebuffer: %d\n", From e7e9cde252c9b3a5315c0a993fe3643719a4c52d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:20 +0300 Subject: [PATCH 280/358] drm/virtio: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. Cc: David Airlie Cc: Gerd Hoffmann Cc: Dmitry Osipenko Cc: Gurchetan Singh Cc: Chia-I Wu Cc: virtualization@lists.linux.dev Reviewed-by: Thomas Zimmermann Acked-by: Dmitry Osipenko Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-18-ville.syrjala@linux.intel.com --- drivers/gpu/drm/virtio/virtgpu_display.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 93763b91bab5..e5805ca646c7 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -66,6 +66,7 @@ static const struct drm_framebuffer_funcs virtio_gpu_fb_funcs = { static int virtio_gpu_framebuffer_init(struct drm_device *dev, struct virtio_gpu_framebuffer *vgfb, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { @@ -73,7 +74,7 @@ virtio_gpu_framebuffer_init(struct drm_device *dev, vgfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &vgfb->base, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &vgfb->base, info, mode_cmd); ret = drm_framebuffer_init(dev, &vgfb->base, &virtio_gpu_fb_funcs); if (ret) { @@ -315,7 +316,7 @@ virtio_gpu_user_framebuffer_create(struct drm_device *dev, return ERR_PTR(-ENOMEM); } - ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, mode_cmd, obj); + ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, info, mode_cmd, obj); if (ret) { kfree(virtio_gpu_fb); drm_gem_object_put(obj); From 3f019d749671b21c31cf1290e6c6a9f107e78cb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:21 +0300 Subject: [PATCH 281/358] drm/vmwgfx: Pass along the format info from .fb_create() to drm_helper_mode_fill_fb_struct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Plumb the format info from .fb_create() all the way to drm_helper_mode_fill_fb_struct() to avoid the redundant lookup. Cc: Zack Rusin Cc: Broadcom internal kernel review list Reviewed-by: Thomas Zimmermann Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-19-ville.syrjala@linux.intel.com --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 14 +++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_kms.h | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 35965e29e408..54ea1b513950 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -500,6 +500,7 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = { static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, struct vmw_user_object *uo, struct vmw_framebuffer **out, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) @@ -548,7 +549,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, goto out_err1; } - drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, info, mode_cmd); memcpy(&vfbs->uo, uo, sizeof(vfbs->uo)); vmw_user_object_ref(&vfbs->uo); @@ -602,6 +603,7 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = { static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, struct vmw_bo *bo, struct vmw_framebuffer **out, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) @@ -634,7 +636,7 @@ static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, } vfbd->base.base.obj[0] = &bo->tbo.base; - drm_helper_mode_fill_fb_struct(dev, &vfbd->base.base, NULL, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, &vfbd->base.base, info, mode_cmd); vfbd->base.bo = true; vfbd->buffer = vmw_bo_reference(bo); *out = &vfbd->base; @@ -679,11 +681,13 @@ vmw_kms_srf_ok(struct vmw_private *dev_priv, uint32_t width, uint32_t height) * @dev_priv: Pointer to device private struct. * @uo: Pointer to user object to wrap the kms framebuffer around. * Either the buffer or surface inside the user object must be NULL. + * @info: pixel format information. * @mode_cmd: Frame-buffer metadata. */ struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, struct vmw_user_object *uo, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vmw_framebuffer *vfb = NULL; @@ -692,10 +696,10 @@ vmw_kms_new_framebuffer(struct vmw_private *dev_priv, /* Create the new framebuffer depending one what we have */ if (vmw_user_object_surface(uo)) { ret = vmw_kms_new_framebuffer_surface(dev_priv, uo, &vfb, - mode_cmd); + info, mode_cmd); } else if (uo->buffer) { ret = vmw_kms_new_framebuffer_bo(dev_priv, uo->buffer, &vfb, - mode_cmd); + info, mode_cmd); } else { BUG(); } @@ -742,7 +746,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - vfb = vmw_kms_new_framebuffer(dev_priv, &uo, mode_cmd); + vfb = vmw_kms_new_framebuffer(dev_priv, &uo, info, mode_cmd); if (IS_ERR(vfb)) { ret = PTR_ERR(vfb); goto err_out; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 511e29cdb987..445471fe9be6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -399,6 +399,7 @@ int vmw_kms_readback(struct vmw_private *dev_priv, struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, struct vmw_user_object *uo, + const struct drm_format_info *info, const struct drm_mode_fb_cmd2 *mode_cmd); void vmw_guess_mode_timing(struct drm_display_mode *mode); void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv); From 41ab92d35ccd2d66bfb049bd34cd95f0304b0240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Jul 2025 12:07:22 +0300 Subject: [PATCH 282/358] drm: Make passing of format info to drm_helper_mode_fill_fb_struct() mandatory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that everyone passes along the format info to drm_helper_mode_fill_fb_struct() we can make this behaviour mandatory and drop the extra lookup. Reviewed-by: Thomas Zimmermann Reviewed-by: Laurent Pinchart Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20250701090722.13645-20-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_modeset_helper.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_modeset_helper.c b/drivers/gpu/drm/drm_modeset_helper.c index 89ba99970735..988735560570 100644 --- a/drivers/gpu/drm/drm_modeset_helper.c +++ b/drivers/gpu/drm/drm_modeset_helper.c @@ -88,8 +88,7 @@ void drm_helper_mode_fill_fb_struct(struct drm_device *dev, int i; fb->dev = dev; - fb->format = info ? : drm_get_format_info(dev, mode_cmd->pixel_format, - mode_cmd->modifier[0]); + fb->format = info; fb->width = mode_cmd->width; fb->height = mode_cmd->height; for (i = 0; i < 4; i++) { From 28c5c486380cc29e82b7747e999b3238f2887539 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 9 Jul 2025 21:02:51 +0200 Subject: [PATCH 283/358] drm/amdgpu: Fix missing unlocking in an error path in amdgpu_userq_create() If kasprintf() fails, some mutex still need to be released to avoid locking issue, as already done in all other error handling path. Fixes: c03ea34cbf88 ("drm/amdgpu: add support of debugfs for mqd information") Reviewed-by: Alex Deucher Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/all/366557fa7ca8173fd78c58336986ca56953369b9.1752087753.git.christophe.jaillet@wanadoo.fr/ Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index b1b80efc7993..941f61f92005 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -521,8 +521,10 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args) } queue_name = kasprintf(GFP_KERNEL, "queue-%d", qid); - if (!queue_name) - return -ENOMEM; + if (!queue_name) { + r = -ENOMEM; + goto unlock; + } #if defined(CONFIG_DEBUG_FS) /* Queue dentry per client to hold MQD information */ From d18e1faef6baab417cff8f6704c6279ba8f4922f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 10:22:59 -0400 Subject: [PATCH 284/358] drm/amdgpu: clean up sdma reset functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make them consistent and drop unneeded extra variables. Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++++--- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 17 +++++++++++++---- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 20 ++++++++------------ drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 20 ++++++++------------ 4 files changed, 40 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 5a1098bdd825..999705e7b264 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1428,7 +1428,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(5, 0, 0): case IP_VERSION(5, 0, 2): case IP_VERSION(5, 0, 5): - if (adev->sdma.instance[0].fw_version >= 35) + if ((adev->sdma.instance[0].fw_version >= 35) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1544,11 +1545,18 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - u32 inst_id = ring->me; int r; + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); + return -EINVAL; + } + amdgpu_amdkfd_suspend(adev, true); - r = amdgpu_sdma_reset_engine(adev, inst_id, false); + r = amdgpu_sdma_reset_engine(adev, ring->me, false); amdgpu_amdkfd_resume(adev, true); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 6843c2c3d71f..e542195972dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1347,11 +1347,13 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(5, 2, 2): case IP_VERSION(5, 2, 3): case IP_VERSION(5, 2, 4): - if (adev->sdma.instance[0].fw_version >= 76) + if ((adev->sdma.instance[0].fw_version >= 76) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; case IP_VERSION(5, 2, 5): - if (adev->sdma.instance[0].fw_version >= 34) + if ((adev->sdma.instance[0].fw_version >= 34) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1457,11 +1459,18 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - u32 inst_id = ring->me; int r; + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); + return -EINVAL; + } + amdgpu_amdkfd_suspend(adev, true); - r = amdgpu_sdma_reset_engine(adev, inst_id, false); + r = amdgpu_sdma_reset_engine(adev, ring->me, false); amdgpu_amdkfd_resume(adev, true); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index d2effa531817..c08e9a6cf682 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1355,7 +1355,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): - if (adev->sdma.instance[0].fw_version >= 21) + if ((adev->sdma.instance[0].fw_version >= 21) && + !amdgpu_sriov_vf(adev)) adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; break; default: @@ -1575,18 +1576,13 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, r; + int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; - } - - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; } @@ -1596,7 +1592,7 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, if (r) return r; - r = sdma_v6_0_gfx_resume_instance(adev, i, true); + r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true); if (r) return r; amdgpu_fence_driver_force_completion(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 99a080bad2a3..ba1f3e3b6eb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -807,18 +807,13 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; - int i, r; + int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; - for (i = 0; i < adev->sdma.num_instances; i++) { - if (ring == &adev->sdma.instance[i].ring) - break; - } - - if (i == adev->sdma.num_instances) { - DRM_ERROR("sdma instance not found\n"); + if (ring->me >= adev->sdma.num_instances) { + dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; } @@ -828,7 +823,7 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, if (r) return r; - r = sdma_v7_0_gfx_resume_instance(adev, i, true); + r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true); if (r) return r; amdgpu_fence_driver_force_completion(ring); @@ -1346,7 +1341,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) adev->sdma.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); - adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_sdma_sysfs_reset_mask_init(adev); if (r) From c9bfafc1a672978b7608fcfb5f498ea1acf4dd24 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 10:52:49 -0400 Subject: [PATCH 285/358] drm/amdgpu/jpeg2: add additional ring reset error checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start and stop can fail, so add checks. Fixes: 500c04d2a708 ("drm/amdgpu: Add ring reset callback for JPEG2_0_0") Reviewed-by: Sathishkumar S Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: Sathishkumar S --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 781a5a8a8361..5ba1c5ad9a5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -771,8 +771,12 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, int r; drm_sched_wqueue_stop(&ring->sched); - jpeg_v2_0_stop(ring->adev); - jpeg_v2_0_start(ring->adev); + r = jpeg_v2_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v2_0_start(ring->adev); + if (r) + return r; r = amdgpu_ring_test_helper(ring); if (r) return r; From 29184874556adff0399580280a077f3a2833396e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 10:56:07 -0400 Subject: [PATCH 286/358] drm/amdgpu/jpeg3: add additional ring reset error checking Start and stop can fail, so add checks. Fixes: 03399d0bff25 ("drm/amdgpu: Add ring reset callback for JPEG3_0_0") Reviewed-by: Sathishkumar S Signed-off-by: Alex Deucher Cc: Sathishkumar S --- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index a24bd833d644..5428930eafa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -562,8 +562,12 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, int r; drm_sched_wqueue_stop(&ring->sched); - jpeg_v3_0_stop(ring->adev); - jpeg_v3_0_start(ring->adev); + r = jpeg_v3_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v3_0_start(ring->adev); + if (r) + return r; r = amdgpu_ring_test_helper(ring); if (r) return r; From 94ee19ea149f3d9ffc4baa8a7977c1aa8b878f8e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Jul 2025 11:00:24 -0400 Subject: [PATCH 287/358] drm/amdgpu/jpeg4: add additional ring reset error checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start and stop can fail, so add checks. Fixes: 74894ffc7d0c ("drm/amdgpu: Add ring reset callback for JPEG4_0_0") Reviewed-by: Sathishkumar S Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: Sathishkumar S --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 1d4edd77837d..78fe1924f3cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -730,8 +730,12 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, return -EINVAL; drm_sched_wqueue_stop(&ring->sched); - jpeg_v4_0_stop(ring->adev); - jpeg_v4_0_start(ring->adev); + r = jpeg_v4_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v4_0_start(ring->adev); + if (r) + return r; r = amdgpu_ring_test_helper(ring); if (r) return r; From 290ccae52dcff890c2b9fd9a9862c08598f5ed8f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 11 Jul 2025 13:21:46 -0400 Subject: [PATCH 288/358] drm/amdgpu/vcn: don't enable per queue resets on SR-IOV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Power control is only available in bare metal. SR-IOV will need a different method. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 3 ++- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 5bbce8544fef..e6613246d8b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -241,7 +241,8 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 6000c528ad6a..732e9a9293d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -220,7 +220,8 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) } adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index 3d3b4254bd72..a137bef918ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -198,7 +198,8 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; vcn_v5_0_0_alloc_ip_dump(adev); From e3f15cfd8b5dce72d3517c85b7eb6f6633905f6e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 11 Jul 2025 13:31:14 -0400 Subject: [PATCH 289/358] drm/amdgpu: clean up jpeg reset functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make them consistent and use the reset flags. Reviewed-by: Sathishkumar S Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 7 ++++--- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 5ba1c5ad9a5e..0b56cb67e8f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -118,7 +118,8 @@ static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -770,6 +771,9 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, { int r; + if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + drm_sched_wqueue_stop(&ring->sched); r = jpeg_v2_0_stop(ring->adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 5be9cdcae32c..3d0e61f9f285 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -167,7 +167,8 @@ static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -649,6 +650,9 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, { int r; + if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + drm_sched_wqueue_stop(&ring->sched); jpeg_v2_5_stop_inst(ring->adev, ring->me); jpeg_v2_5_start_inst(ring->adev, ring->me); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 5428930eafa3..5d54c882d889 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -132,7 +132,8 @@ static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -561,6 +562,9 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, { int r; + if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + drm_sched_wqueue_stop(&ring->sched); r = jpeg_v3_0_stop(ring->adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 78fe1924f3cb..5485e983a089 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -143,7 +143,8 @@ static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; @@ -726,8 +727,8 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, { int r; - if (amdgpu_sriov_vf(ring->adev)) - return -EINVAL; + if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; drm_sched_wqueue_stop(&ring->sched); r = jpeg_v4_0_stop(ring->adev); From bc29c03b28159bac846ea61033dad09e33f4a338 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 11 Jul 2025 14:01:42 -0400 Subject: [PATCH 290/358] drm/amdgpu: clean up GC reset functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make them consistent and use the reset flags. Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 +++++++++----- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 19 ++++++++++++------- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 18 +++++++++++------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 14 +++++++++----- 5 files changed, 45 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d739bfb20383..506454ed27bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4952,11 +4952,15 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) } } } - /* TODO: Add queue reset mask when FW fully supports it */ + adev->gfx.gfx_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + if (!amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0); if (r) { @@ -9534,8 +9538,8 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, u64 addr; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -9607,8 +9611,8 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 37dcec2d0784..372dceceff35 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1806,12 +1806,17 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): if ((adev->gfx.me_fw_version >= 2280) && - (adev->gfx.mec_fw_version >= 2410)) { - adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; - adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + (adev->gfx.mec_fw_version >= 2410) && + !amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; } break; default: + if (!amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } break; } @@ -6818,8 +6823,8 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; drm_sched_wqueue_stop(&ring->sched); @@ -6989,8 +6994,8 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r = 0; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; drm_sched_wqueue_stop(&ring->sched); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index e4fc42470cf3..7220ed2fa2a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -1542,10 +1542,14 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): if ((adev->gfx.me_fw_version >= 2660) && - (adev->gfx.mec_fw_version >= 2920)) { - adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; - adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + (adev->gfx.mec_fw_version >= 2920) && + !amdgpu_sriov_vf(adev)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; } + break; + default: + break; } if (!adev->enable_mes_kiq) { @@ -5314,8 +5318,8 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; drm_sched_wqueue_stop(&ring->sched); @@ -5437,8 +5441,8 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; drm_sched_wqueue_stop(&ring->sched); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4c61157405b0..ac058697054f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2410,6 +2410,8 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + if (!amdgpu_sriov_vf(adev)) + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); if (r) { @@ -7181,8 +7183,8 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 5f92975cc305..3c10595125e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1148,13 +1148,15 @@ static int gfx_v9_4_3_sw_init(struct amdgpu_ip_block *ip_block) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): - if (adev->gfx.mec_fw_version >= 155) { + if ((adev->gfx.mec_fw_version >= 155) && + !amdgpu_sriov_vf(adev)) { adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; } break; case IP_VERSION(9, 5, 0): - if (adev->gfx.mec_fw_version >= 21) { + if ((adev->gfx.mec_fw_version >= 21) && + !amdgpu_sriov_vf(adev)) { adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_PIPE; } @@ -3561,8 +3563,8 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -3594,7 +3596,9 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, dev_err(adev->dev, "fail to wait on hqd deactive and will try pipe reset\n"); pipe_reset: - if(r) { + if (r) { + if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)) + return -EOPNOTSUPP; r = gfx_v9_4_3_reset_hw_pipe(ring); dev_info(adev->dev, "ring: %s pipe reset :%s\n", ring->name, r ? "failed" : "successfully"); From 77cc0da39c7ce203cd3ce6bc5696421947a979d7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 27 May 2025 21:35:00 -0400 Subject: [PATCH 291/358] drm/amdgpu: track ring state associated with a fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to know the wptr and sequence number associated with a fence so that we can re-emit the unprocessed state after a ring reset. Pre-allocate storage space for the ring buffer contents and add helpers to save off and re-emit the unprocessed state so that it can be re-emitted after the queue is reset. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 90 +++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 15 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 67 +++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 18 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 + 6 files changed, 195 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 2c3547f4cea4..9e7506965cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -120,6 +120,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, am_fence = kzalloc(sizeof(*am_fence), GFP_KERNEL); if (!am_fence) return -ENOMEM; + am_fence->context = 0; } else { am_fence = af; } @@ -127,6 +128,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, am_fence->ring = ring; seq = ++ring->fence_drv.sync_seq; + am_fence->seq = seq; if (af) { dma_fence_init(fence, &amdgpu_job_fence_ops, &ring->fence_drv.lock, @@ -141,6 +143,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); + amdgpu_fence_save_wptr(fence); pm_runtime_get_noresume(adev_to_drm(adev)->dev); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { @@ -253,6 +256,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) do { struct dma_fence *fence, **ptr; + struct amdgpu_fence *am_fence; ++last_seq; last_seq &= drv->num_fences_mask; @@ -265,6 +269,12 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) if (!fence) continue; + /* Save the wptr in the fence driver so we know what the last processed + * wptr was. This is required for re-emitting the ring state for + * queues that are reset but are not guilty and thus have no guilty fence. + */ + am_fence = container_of(fence, struct amdgpu_fence, base); + drv->signalled_wptr = am_fence->wptr; dma_fence_signal(fence); dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -727,6 +737,86 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring) amdgpu_fence_process(ring); } + +/** + * Kernel queue reset handling + * + * The driver can reset individual queues for most engines, but those queues + * may contain work from multiple contexts. Resetting the queue will reset + * lose all of that state. In order to minimize the collateral damage, the + * driver will save the ring contents which are not associated with the guilty + * context prior to resetting the queue. After resetting the queue the queue + * contents from the other contexts is re-emitted to the rings so that it can + * be processed by the engine. To handle this, we save the queue's write + * pointer (wptr) in the fences associated with each context. If we get a + * queue timeout, we can then use the wptrs from the fences to determine + * which data needs to be saved out of the queue's ring buffer. + */ + +/** + * amdgpu_fence_driver_guilty_force_completion - force signal of specified sequence + * + * @fence: fence of the ring to signal + * + */ +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence) +{ + dma_fence_set_error(&fence->base, -ETIME); + amdgpu_fence_write(fence->ring, fence->seq); + amdgpu_fence_process(fence->ring); +} + +void amdgpu_fence_save_wptr(struct dma_fence *fence) +{ + struct amdgpu_fence *am_fence = container_of(fence, struct amdgpu_fence, base); + + am_fence->wptr = am_fence->ring->wptr; +} + +static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring, + u64 start_wptr, u32 end_wptr) +{ + unsigned int first_idx = start_wptr & ring->buf_mask; + unsigned int last_idx = end_wptr & ring->buf_mask; + unsigned int i; + + /* Backup the contents of the ring buffer. */ + for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask) + ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring->ring[i]; +} + +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + struct dma_fence *unprocessed; + struct dma_fence __rcu **ptr; + struct amdgpu_fence *fence; + u64 wptr, i, seqno; + + seqno = amdgpu_fence_read(ring); + wptr = ring->fence_drv.signalled_wptr; + ring->ring_backup_entries_to_copy = 0; + + for (i = seqno + 1; i <= ring->fence_drv.sync_seq; ++i) { + ptr = &ring->fence_drv.fences[i & ring->fence_drv.num_fences_mask]; + rcu_read_lock(); + unprocessed = rcu_dereference(*ptr); + + if (unprocessed && !dma_fence_is_signaled(unprocessed)) { + fence = container_of(unprocessed, struct amdgpu_fence, base); + + /* save everything if the ring is not guilty, otherwise + * just save the content from other contexts. + */ + if (!guilty_fence || (fence->context != guilty_fence->context)) + amdgpu_ring_backup_unprocessed_command(ring, wptr, + fence->wptr); + wptr = fence->wptr; + } + rcu_read_unlock(); + } +} + /* * Common fence implementation */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 206b70acb29a..7d9bcb72e8dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -139,7 +139,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, int vmid = AMDGPU_JOB_GET_VMID(job); bool need_pipe_sync = false; unsigned int cond_exec; - unsigned int i; int r = 0; @@ -156,6 +155,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, gds_va = job->gds_va; init_shadow = job->init_shadow; af = &job->hw_fence; + /* Save the context of the job for reset handling. + * The driver needs this so it can skip the ring + * contents for guilty contexts. + */ + af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; } else { vm = NULL; fence_ctx = 0; @@ -307,8 +311,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH) ring->funcs->emit_wave_limit(ring, false); + /* Save the wptr associated with this fence. + * This must be last for resets to work properly + * as we need to save the wptr associated with this + * fence so we know what rings contents to backup + * after we reset the queue. + */ + amdgpu_fence_save_wptr(*f); + amdgpu_ring_ib_end(ring); amdgpu_ring_commit(ring); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 2b58e353cca1..5cb7bf9420f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -90,8 +90,8 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); struct drm_wedge_task_info *info = NULL; - struct amdgpu_task_info *ti; struct amdgpu_device *adev = ring->adev; + struct amdgpu_task_info *ti; int idx, r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -134,7 +134,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } else if (amdgpu_gpu_recovery && ring->funcs->reset) { dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); - r = amdgpu_ring_reset(ring, job->vmid, NULL); + r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence); if (!r) { atomic_inc(&ring->adev->gpu_reset_counter); dev_err(adev->dev, "Ring %s reset succeeded\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 426834806fbf..09d7681b7a75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -99,6 +99,29 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw) return 0; } +/** + * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit + * + * @ring: amdgpu_ring structure holding ring information + * @ndw: number of dwords to allocate in the ring buffer + * + * Allocate @ndw dwords in the ring buffer (all asics). + * doesn't check the max_dw limit as we may be reemitting + * several submissions. + */ +static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int ndw) +{ + /* Align requested size with padding so unlock_commit can + * pad safely */ + ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask; + + ring->count_dw = ndw; + ring->wptr_old = ring->wptr; + + if (ring->funcs->begin_use) + ring->funcs->begin_use(ring); +} + /** amdgpu_ring_insert_nop - insert NOP packets * * @ring: amdgpu_ring structure holding ring information @@ -333,6 +356,12 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, /* Initialize cached_rptr to 0 */ ring->cached_rptr = 0; + if (!ring->ring_backup) { + ring->ring_backup = kvzalloc(ring->ring_size, GFP_KERNEL); + if (!ring->ring_backup) + return -ENOMEM; + } + /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE, @@ -342,6 +371,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, (void **)&ring->ring); if (r) { dev_err(adev->dev, "(%d) ring create failed\n", r); + kvfree(ring->ring_backup); return r; } amdgpu_ring_clear_ring(ring); @@ -385,6 +415,8 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) amdgpu_bo_free_kernel(&ring->ring_obj, &ring->gpu_addr, (void **)&ring->ring); + kvfree(ring->ring_backup); + ring->ring_backup = NULL; dma_fence_put(ring->vmid_wait); ring->vmid_wait = NULL; @@ -753,3 +785,38 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) return true; } + +void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + /* Stop the scheduler to prevent anybody else from touching the ring buffer. */ + drm_sched_wqueue_stop(&ring->sched); + /* back up the non-guilty commands */ + amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence); +} + +int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence) +{ + unsigned int i; + int r; + + /* verify that the ring is functional */ + r = amdgpu_ring_test_ring(ring); + if (r) + return r; + + /* signal the fence of the bad job */ + if (guilty_fence) + amdgpu_fence_driver_guilty_force_completion(guilty_fence); + /* Re-emit the non-guilty commands */ + if (ring->ring_backup_entries_to_copy) { + amdgpu_ring_alloc_reemit(ring, ring->ring_backup_entries_to_copy); + for (i = 0; i < ring->ring_backup_entries_to_copy; i++) + amdgpu_ring_write(ring, ring->ring_backup[i]); + amdgpu_ring_commit(ring); + } + /* Start the scheduler again */ + drm_sched_wqueue_start(&ring->sched); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 784ba2ec354c..309e7bb6001b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -118,6 +118,7 @@ struct amdgpu_fence_driver { /* sync_seq is protected by ring emission lock */ uint32_t sync_seq; atomic_t last_seq; + u64 signalled_wptr; bool initialized; struct amdgpu_irq_src *irq_src; unsigned irq_type; @@ -141,6 +142,12 @@ struct amdgpu_fence { /* RB, DMA, etc. */ struct amdgpu_ring *ring; ktime_t start_timestamp; + + /* wptr for the fence for resets */ + u64 wptr; + /* fence context for resets */ + u64 context; + uint32_t seq; }; extern const struct drm_sched_backend_ops amdgpu_sched_ops; @@ -148,6 +155,8 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops; void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); +void amdgpu_fence_driver_guilty_force_completion(struct amdgpu_fence *fence); +void amdgpu_fence_save_wptr(struct dma_fence *fence); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -284,6 +293,9 @@ struct amdgpu_ring { struct amdgpu_bo *ring_obj; uint32_t *ring; + /* backups for resets */ + uint32_t *ring_backup; + unsigned int ring_backup_entries_to_copy; unsigned rptr_offs; u64 rptr_gpu_addr; volatile u32 *rptr_cpu_addr; @@ -550,4 +562,10 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); +void amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); +int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, + struct amdgpu_fence *guilty_fence); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index af0f655dfd5b..d5c0637d7392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -765,6 +765,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; struct dma_fence *fence = NULL; + struct amdgpu_fence *af; unsigned int patch; int r; @@ -830,6 +831,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; + /* this is part of the job's context */ + af = container_of(fence, struct amdgpu_fence, base); + af->context = job->base.s_fence ? job->base.s_fence->finished.context : 0; } if (vm_flush_needed) { From 991f2e0c63a7513202faab90a470ebb46e227541 Mon Sep 17 00:00:00 2001 From: Tony Yi Date: Mon, 9 Jun 2025 14:09:28 -0500 Subject: [PATCH 292/358] drm/amdgpu: Check SQ_CONFIG register support on SRIOV On SRIOV environments, check if RLCG supports SQ_CONFIG register programming. Signed-off-by: Tony Yi Reviewed-by: Zhigang Luo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 8 +++++++- drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 +++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 9320461bb486..3da3ebb1d9a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -152,8 +152,10 @@ enum AMDGIM_REG_ACCESS_FLAG { AMDGIM_FEATURE_MMHUB_REG_RLC_EN = (1 << 1), /* Use RLC to program GC regs */ AMDGIM_FEATURE_GC_REG_RLC_EN = (1 << 2), - /* Use PSP to program L1_TLB_CNTL*/ + /* Use PSP to program L1_TLB_CNTL */ AMDGIM_FEATURE_L1_TLB_CNTL_PSP_EN = (1 << 3), + /* Use RLCG to program SQ_CONFIG1 */ + AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG = (1 << 4), }; struct amdgim_pf2vf_info_v1 { @@ -346,6 +348,10 @@ struct amdgpu_video_codec_info; #define amdgpu_sriov_rlcg_error_report_enabled(adev) \ (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) +#define amdgpu_sriov_reg_access_sq_config(adev) \ +(amdgpu_sriov_vf((adev)) && \ + ((adev)->virt.reg_access & (AMDGIM_FEATURE_REG_ACCESS_SQ_CONFIG))) + #define amdgpu_passthrough(adev) \ ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 92ca13097aaa..33edad1f9dcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -113,7 +113,8 @@ union amd_sriov_reg_access_flags { uint32_t vf_reg_access_mmhub : 1; uint32_t vf_reg_access_gc : 1; uint32_t vf_reg_access_l1_tlb_cntl : 1; - uint32_t reserved : 28; + uint32_t vf_reg_access_sq_config : 1; + uint32_t reserved : 27; } flags; uint32_t all; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 3c10595125e0..7314ad08fde3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -1351,7 +1351,9 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { /* ToDo: GC 9.4.4 */ case IP_VERSION(9, 4, 3): - if (adev->gfx.mec_fw_version >= 184) + if (adev->gfx.mec_fw_version >= 184 && + (amdgpu_sriov_reg_access_sq_config(adev) || + !amdgpu_sriov_vf(adev))) adev->gmc.xnack_flags |= AMDGPU_GMC_XNACK_FLAG_CHAIN; break; case IP_VERSION(9, 5, 0): From ec8fbb44b51ef5cf82ef09043387879276e9ba6b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 18 Jun 2025 09:49:05 -0400 Subject: [PATCH 293/358] drm/amdgpu: make compute timeouts consistent For kernel compute queues, align the timeout with other kernel queues (10 sec). This had previously been set higher for OpenCL when it used kernel queues, but now OpenCL uses KFD user queues which don't have a timeout limitation. This also aligns with SR-IOV which already used a shorter timeout. Additionally the longer timeout negatively impacts the user experience with kernel queues for interactive applications. Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 ++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +++++----- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 45b44dec0d7f..79d0ff0bda29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4220,18 +4220,10 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) int ret = 0; /* - * By default timeout for non compute jobs is 10000 - * and 60000 for compute jobs. - * In SR-IOV or passthrough mode, timeout for compute - * jobs are 60000 by default. + * By default timeout for jobs is 10 sec */ - adev->gfx_timeout = msecs_to_jiffies(10000); + adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000); adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; - if (amdgpu_sriov_vf(adev)) - adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? - msecs_to_jiffies(60000) : msecs_to_jiffies(10000); - else - adev->compute_timeout = msecs_to_jiffies(60000); if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { while ((timeout_setting = strsep(&input, ",")) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 648a829559e1..3bb9b25cd121 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -362,12 +362,12 @@ module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint * The second one is for Compute. The third and fourth ones are * for SDMA and Video. * - * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) - * jobs is 10000. The timeout for compute is 60000. + * By default(with no lockup_timeout settings), the timeout for all jobs is 10000. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; " - "for passthrough or sriov, 10000 for all jobs. 0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " - "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); +MODULE_PARM_DESC(lockup_timeout, + "GPU lockup timeout in ms (default: 10000 for all jobs. " + "0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " + "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); /** From 9ad73536f8758e53e266c81f6f0fcbb90b349b6b Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Sat, 12 Jul 2025 12:10:48 +0530 Subject: [PATCH 294/358] drm/amd/pm: Get max/min frequency on aldebaran VF PMFW interface to get max/min frequencies is not available on aldebaran VFs. Use data, if available, in DPM tables to get the max/min frequencies. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/aldebaran_ppt.c | 57 ++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 6de653d2ed62..c63d2e28954d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -342,6 +342,61 @@ static int aldebaran_get_allowed_feature_mask(struct smu_context *smu, return 0; } +static int aldebaran_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, uint32_t *max) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + struct smu_13_0_dpm_table *dpm_table; + uint32_t min_clk, max_clk; + + if (amdgpu_sriov_vf(smu->adev)) { + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + dpm_table = &dpm_context->dpm_tables.uclk_table; + break; + case SMU_GFXCLK: + case SMU_SCLK: + dpm_table = &dpm_context->dpm_tables.gfx_table; + break; + case SMU_SOCCLK: + dpm_table = &dpm_context->dpm_tables.soc_table; + break; + case SMU_FCLK: + dpm_table = &dpm_context->dpm_tables.fclk_table; + break; + case SMU_VCLK: + dpm_table = &dpm_context->dpm_tables.vclk_table; + break; + case SMU_DCLK: + dpm_table = &dpm_context->dpm_tables.dclk_table; + break; + default: + return -EINVAL; + } + + min_clk = dpm_table->min; + max_clk = dpm_table->max; + + if (min) { + if (!min_clk) + return -ENODATA; + *min = min_clk; + } + if (max) { + if (!max_clk) + return -ENODATA; + *max = max_clk; + } + + } else { + return smu_v13_0_get_dpm_ultimate_freq(smu, clk_type, min, max); + } + + return 0; +} + static int aldebaran_set_default_dpm_table(struct smu_context *smu) { struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; @@ -2081,7 +2136,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = { .set_azalia_d3_pme = smu_v13_0_set_azalia_d3_pme, .get_max_sustainable_clocks_by_dc = smu_v13_0_get_max_sustainable_clocks_by_dc, .get_bamaco_support = aldebaran_get_bamaco_support, - .get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq, + .get_dpm_ultimate_freq = aldebaran_get_dpm_ultimate_freq, .set_soft_freq_limited_range = aldebaran_set_soft_freq_limited_range, .od_edit_dpm_table = aldebaran_usr_edit_dpm_table, .set_df_cstate = aldebaran_set_df_cstate, From 25c314aa3ec3d30e4ee282540e2096b5c66a2437 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 14 Jul 2025 10:37:00 +0530 Subject: [PATCH 295/358] drm/amdgpu: Increase reset counter only on success Increment the reset counter only if soft recovery succeeded. This is consistent with a ring hard reset behaviour where counter gets incremented only if hard reset succeeded. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 09d7681b7a75..acac646a4e4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -459,6 +459,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { unsigned long flags; ktime_t deadline; + bool ret; if (unlikely(ring->adev->debug_disable_soft_recovery)) return false; @@ -473,12 +474,16 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, dma_fence_set_error(fence, -ENODATA); spin_unlock_irqrestore(fence->lock, flags); - atomic_inc(&ring->adev->gpu_reset_counter); while (!dma_fence_is_signaled(fence) && ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) ring->funcs->soft_recovery(ring, vmid); - return dma_fence_is_signaled(fence); + ret = dma_fence_is_signaled(fence); + /* increment the counter only if soft reset worked */ + if (ret) + atomic_inc(&ring->adev->gpu_reset_counter); + + return ret; } /* From 461f43b9b65ed10d9bdebcca87312713f4adcbfd Mon Sep 17 00:00:00 2001 From: Asad Kamal Date: Tue, 15 Jul 2025 13:40:43 +0800 Subject: [PATCH 296/358] drm/amd/pm: Remove unnecessary variable Remove unnecessary variable ret from smu_v13_0_12_get_smu_metrics_data Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202507150618.WOfvWsQF-lkp@intel.com Signed-off-by: Asad Kamal Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c index b3adeb6e43a8..02a455a31c25 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c @@ -301,7 +301,6 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, struct smu_table_context *smu_table = &smu->smu_table; MetricsTable_t *metrics = (MetricsTable_t *)smu_table->metrics_table; struct amdgpu_device *adev = smu->adev; - int ret = 0; int xcc_id; /* For clocks with multiple instances, only report the first one */ @@ -357,7 +356,7 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu, break; } - return ret; + return 0; } ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct amdgpu_xcp *xcp, void *table, void *smu_metrics) From 3c9e205f325ab2eba11c1ce5e6fb63fa9613f60f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:21:46 -0400 Subject: [PATCH 297/358] drm/amdgpu/jpeg2: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 0b56cb67e8f1..554af4b9930e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -774,19 +774,14 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = jpeg_v2_0_stop(ring->adev); if (r) return r; r = jpeg_v2_0_start(ring->adev); if (r) return r; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { From bb7928f9fc697294a0026b1a2e28386aa762d001 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:22:01 -0400 Subject: [PATCH 298/358] drm/amdgpu/jpeg2.5: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 3d0e61f9f285..4e489a7e2115 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -648,20 +648,13 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { - int r; - if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v2_5_stop_inst(ring->adev, ring->me); jpeg_v2_5_start_inst(ring->adev, ring->me); - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { From b81891589be1fb6c963d3b6752a50d56fef6a030 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:22:20 -0400 Subject: [PATCH 299/358] drm/amdgpu/jpeg3: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 5d54c882d889..d4bc4fca460c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -565,19 +565,14 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = jpeg_v3_0_stop(ring->adev); if (r) return r; r = jpeg_v3_0_start(ring->adev); if (r) return r; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { From 429ccbf6f4418e0e823d3470591fb4a99aadb09e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:22:36 -0400 Subject: [PATCH 300/358] drm/amdgpu/jpeg4: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 5485e983a089..ca3debe371c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -730,19 +730,14 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = jpeg_v4_0_stop(ring->adev); if (r) return r; r = jpeg_v4_0_start(ring->adev); if (r) return r; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { From 98f16636a2fcebff67d6c488ed393287d3321c07 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:22:52 -0400 Subject: [PATCH 301/358] drm/amdgpu/jpeg4.0.3: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 78441f8fce97..c3f73a2a911b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1147,20 +1147,13 @@ static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { - int r; - if (amdgpu_sriov_vf(ring->adev)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v4_0_3_core_stall_reset(ring); jpeg_v4_0_3_start_jrbc(ring); - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { From cf07ece3a81f9696548b785dc481f18b4ea59dd4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Jun 2025 18:11:11 -0400 Subject: [PATCH 302/358] drm/amdgpu/jpeg4.0.5: add queue reset Add queue reset support for jpeg 4.0.5. Use the new helpers to re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 974030a5c03c..5a69a44e0f85 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -174,9 +174,10 @@ static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* TODO: Add queue reset mask when FW fully supports it */ adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); if (r) return r; @@ -767,6 +768,25 @@ static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v4_0_5_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r; + + if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v4_0_5_stop(ring->adev); + if (r) + return r; + r = jpeg_v4_0_5_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { .name = "jpeg_v4_0_5", .early_init = jpeg_v4_0_5_early_init, @@ -812,6 +832,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v4_0_5_ring_reset, }; static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) From e708f2cb56c08b8dd535202e7bf007fc1a2aeb64 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 13:05:35 -0400 Subject: [PATCH 303/358] drm/amdgpu/jpeg5: add queue reset Add queue reset support for jpeg 5.0.0. Use the new helpers to re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 31d213ccbe0a..d6b565c29b4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -120,9 +120,10 @@ static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* TODO: Add queue reset mask when FW fully supports it */ adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); if (r) return r; @@ -644,6 +645,25 @@ static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v5_0_0_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + int r; + + if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + r = jpeg_v5_0_0_stop(ring->adev); + if (r) + return r; + r = jpeg_v5_0_0_start(ring->adev); + if (r) + return r; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); +} + static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { .name = "jpeg_v5_0_0", .early_init = jpeg_v5_0_0_early_init, @@ -689,6 +709,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v5_0_0_ring_reset, }; static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev) From 8bea669e67aab1147d674d989202956b7e79ec36 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:24:11 -0400 Subject: [PATCH 304/358] drm/amdgpu/jpeg5.0.1: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index 6f8a16da9d60..5473cbaa5c0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -838,20 +838,13 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { - int r; - if (amdgpu_sriov_vf(ring->adev)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v5_0_1_core_stall_reset(ring); jpeg_v5_0_1_init_jrbc(ring); - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { From d156ba39704ed610f47dea901dca281900e33c6f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:25:05 -0400 Subject: [PATCH 305/358] drm/amdgpu/vcn4: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index e6613246d8b8..f642a06a77b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1979,20 +1979,14 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = vcn_v4_0_stop(vinst); if (r) return r; r = vcn_v4_0_start(vinst); if (r) return r; - - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { From 64c54f0aa207580f55adabf18afc44a97fd4c91e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:25:52 -0400 Subject: [PATCH 306/358] drm/amdgpu/vcn4.0.3: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index d8fd32c1e38e..3bda19b92cde 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1609,7 +1609,7 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); vcn_inst = GET_INST(VCN, ring->me); r = amdgpu_dpm_reset_vcn(adev, 1 << vcn_inst); @@ -1624,12 +1624,8 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); vcn_v4_0_3_hw_init_inst(vinst); vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[ring->me].indirect_sram); - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { From 6166e37afd1723866e1e45c13338107b4fc868fa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:26:08 -0400 Subject: [PATCH 307/358] drm/amdgpu/vcn4.0.5: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 732e9a9293d2..039989ab452a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1477,20 +1477,14 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = vcn_v4_0_5_stop(vinst); if (r) return r; r = vcn_v4_0_5_start(vinst); if (r) return r; - - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { From 3871149081b0d800d336791b208f541bdb8b457c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 15:26:37 -0400 Subject: [PATCH 308/358] drm/amdgpu/vcn5: re-emit unprocessed state on ring reset Re-emit the unprocessed state after resetting the queue. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index a137bef918ed..ec0268b5666c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1204,20 +1204,14 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = vcn_v5_0_0_stop(vinst); if (r) return r; r = vcn_v5_0_0_start(vinst); if (r) return r; - - r = amdgpu_ring_test_helper(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { From 7b6cde7f4e85d29f39779ff9cb7ed0203ad7e153 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Jun 2025 16:01:25 -0400 Subject: [PATCH 309/358] drm/amdgpu/vcn: add a helper framework for engine resets With engine resets we reset all queues on the engine rather than just a single queue. Add a framework to handle this similar to SDMA. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 79 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 6 +- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index c8885c3d54b3..d799bc74936c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -134,6 +134,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround); mutex_init(&adev->vcn.inst[i].vcn_pg_lock); + mutex_init(&adev->vcn.inst[i].engine_reset_mutex); atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler); atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); @@ -1451,3 +1452,81 @@ int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, return ret; } + +/** + * amdgpu_vcn_reset_engine - Reset a specific VCN engine + * @adev: Pointer to the AMDGPU device + * @instance_id: VCN engine instance to reset + * + * Returns: 0 on success, or a negative error code on failure. + */ +static int amdgpu_vcn_reset_engine(struct amdgpu_device *adev, + uint32_t instance_id) +{ + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[instance_id]; + int r, i; + + mutex_lock(&vinst->engine_reset_mutex); + /* Stop the scheduler's work queue for the dec and enc rings if they are running. + * This ensures that no new tasks are submitted to the queues while + * the reset is in progress. + */ + drm_sched_wqueue_stop(&vinst->ring_dec.sched); + for (i = 0; i < vinst->num_enc_rings; i++) + drm_sched_wqueue_stop(&vinst->ring_enc[i].sched); + + /* Perform the VCN reset for the specified instance */ + r = vinst->reset(vinst); + if (r) + goto unlock; + r = amdgpu_ring_test_ring(&vinst->ring_dec); + if (r) + goto unlock; + for (i = 0; i < vinst->num_enc_rings; i++) { + r = amdgpu_ring_test_ring(&vinst->ring_enc[i]); + if (r) + goto unlock; + } + amdgpu_fence_driver_force_completion(&vinst->ring_dec); + for (i = 0; i < vinst->num_enc_rings; i++) + amdgpu_fence_driver_force_completion(&vinst->ring_enc[i]); + + /* Restart the scheduler's work queue for the dec and enc rings + * if they were stopped by this function. This allows new tasks + * to be submitted to the queues after the reset is complete. + */ + drm_sched_wqueue_start(&vinst->ring_dec.sched); + for (i = 0; i < vinst->num_enc_rings; i++) + drm_sched_wqueue_start(&vinst->ring_enc[i].sched); + +unlock: + mutex_unlock(&vinst->engine_reset_mutex); + + return r; +} + +/** + * amdgpu_vcn_ring_reset - Reset a VCN ring + * @ring: ring to reset + * @vmid: vmid of guilty job + * @timedout_fence: fence of timed out job + * + * This helper is for VCN blocks without unified queues because + * resetting the engine resets all queues in that case. With + * unified queues we have one queue per engine. + * Returns: 0 on success, or a negative error code on failure. + */ +int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *timedout_fence) +{ + struct amdgpu_device *adev = ring->adev; + + if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + if (adev->vcn.inst[ring->me].using_unified_queue) + return -EINVAL; + + return amdgpu_vcn_reset_engine(adev, ring->me); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 83adf81defc7..0bc0a94d7cf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -330,7 +330,9 @@ struct amdgpu_vcn_inst { struct dpg_pause_state *new_state); int (*set_pg_state)(struct amdgpu_vcn_inst *vinst, enum amd_powergating_state state); + int (*reset)(struct amdgpu_vcn_inst *vinst); bool using_unified_queue; + struct mutex engine_reset_mutex; }; struct amdgpu_vcn_ras { @@ -552,5 +554,7 @@ void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev); int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); - +int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, + unsigned int vmid, + struct amdgpu_fence *guilty_fence); #endif From 64ac00974754fd340452970ab019a8666ee8fa2c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Jun 2025 16:37:34 -0400 Subject: [PATCH 310/358] drm/amdgpu/vcn2: implement ring reset Use the new helpers to handle engine resets for VCN. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 148b651be7ca..68b4371df0f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -98,6 +98,8 @@ static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst, static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, struct dpg_pause_state *new_state); static int vcn_v2_0_start_sriov(struct amdgpu_device *adev); +static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst); + /** * vcn_v2_0_early_init - set function pointers and load microcode * @@ -213,6 +215,12 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) } adev->vcn.inst[0].pause_dpg_mode = vcn_v2_0_pause_dpg_mode; + adev->vcn.inst[0].reset = vcn_v2_0_reset; + + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -233,6 +241,10 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.ip_dump = ptr; } + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; } @@ -260,6 +272,8 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_vcn_sysfs_reset_mask_fini(adev); + r = amdgpu_vcn_sw_fini(adev, 0); kfree(adev->vcn.ip_dump); @@ -1355,6 +1369,16 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, return 0; } +static int vcn_v2_0_reset(struct amdgpu_vcn_inst *vinst) +{ + int r; + + r = vcn_v2_0_stop(vinst); + if (r) + return r; + return vcn_v2_0_start(vinst); +} + static bool vcn_v2_0_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; @@ -2176,6 +2200,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { @@ -2205,6 +2230,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_enc_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) From 63b8c9fdfb7f822d13f1591b71a739a40513c0bf Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Jun 2025 17:07:22 -0400 Subject: [PATCH 311/358] drm/amdgpu/vcn2.5: implement ring reset Use the new helpers to handle engine resets for VCN. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 58b527a6b795..bc30a5326866 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -102,6 +102,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev); +static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst); static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -404,8 +405,14 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.inst[j].pause_dpg_mode = vcn_v2_5_pause_dpg_mode; + adev->vcn.inst[j].reset = vcn_v2_5_reset; } + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -425,6 +432,10 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.ip_dump = ptr; } + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; } @@ -455,6 +466,8 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); + amdgpu_vcn_sysfs_reset_mask_fini(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { r = amdgpu_vcn_suspend(adev, i); if (r) @@ -1816,6 +1829,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; /** @@ -1914,6 +1928,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) @@ -1942,6 +1957,16 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) } } +static int vcn_v2_5_reset(struct amdgpu_vcn_inst *vinst) +{ + int r; + + r = vcn_v2_5_stop(vinst); + if (r) + return r; + return vcn_v2_5_start(vinst); +} + static bool vcn_v2_5_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; From d7767a1fd46b386ce930e734a5f6de7a02aede11 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 16 Jun 2025 17:15:27 -0400 Subject: [PATCH 312/358] drm/amdgpu/vcn3: implement ring reset Use the new helpers to handle engine resets for VCN. Reviewed-by: Sathishkumar S Tested-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 9fb0d5380589..4b8f4407047f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -110,6 +110,7 @@ static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst, enum amd_powergating_state state); static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, struct dpg_pause_state *new_state); +static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst); static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring); static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); @@ -289,8 +290,14 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.inst[i].pause_dpg_mode = vcn_v3_0_pause_dpg_mode; + adev->vcn.inst[i].reset = vcn_v3_0_reset; } + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + if (!amdgpu_sriov_vf(adev)) + adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -306,6 +313,10 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vcn.ip_dump = ptr; } + r = amdgpu_vcn_sysfs_reset_mask_init(adev); + if (r) + return r; + return 0; } @@ -338,6 +349,8 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); + amdgpu_vcn_sysfs_reset_mask_fini(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { r = amdgpu_vcn_suspend(adev, i); if (r) @@ -2033,6 +2046,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; /** @@ -2131,6 +2145,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = { .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = amdgpu_vcn_ring_reset, }; static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) @@ -2164,6 +2179,18 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev) } } +static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst) +{ + int r; + + r = vcn_v3_0_stop(vinst); + if (r) + return r; + vcn_v3_0_enable_clock_gating(vinst); + vcn_v3_0_enable_static_power_gating(vinst); + return vcn_v3_0_start(vinst); +} + static bool vcn_v3_0_is_idle(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; From 084300fef58049eee71091f04e992959ac850d3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 5 Jun 2025 14:17:09 +0200 Subject: [PATCH 313/358] drm/amdgpu: rework gmc_v9_0_get_coherence_flags v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid using the mapping here. v2: use amdgpu_xgmi_same_hive() as suggested by Felix Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f73da518a6e8..c4d69cf4e06c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1121,8 +1121,8 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, } static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo *bo, - struct amdgpu_bo_va_mapping *mapping, uint64_t *flags) { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -1132,7 +1132,6 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_EXT_COHERENT); bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; - struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); bool snoop = false; @@ -1162,7 +1161,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, mtype = MTYPE_UC; else mtype = MTYPE_NC; - if (mapping->bo_va->is_xgmi) + if (amdgpu_xgmi_same_hive(adev, bo_adev)) snoop = true; } } else { @@ -1254,7 +1253,8 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, } if ((*flags & AMDGPU_PTE_VALID) && bo) - gmc_v9_0_get_coherence_flags(adev, bo, mapping, flags); + gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.vm, bo, + flags); } static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, From 82a7c94fcecd104fa70766caaf6423e84bf588cb Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Jul 2025 11:37:56 -0400 Subject: [PATCH 314/358] drm/amdgpu/jpeg: clean up reset type handling Make the handling consistent with other IPs and across JPEG versions. Reviewed-by: Sathishkumar S Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 4 +++- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 14 ++++++-------- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 7 +++---- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c | 16 +++++++--------- 8 files changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 554af4b9930e..b93d6af8f6e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -118,8 +118,10 @@ static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); if (!amdgpu_sriov_vf(adev)) - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 4e489a7e2115..b6d5ba0bdc14 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -167,8 +167,10 @@ static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); if (!amdgpu_sriov_vf(adev)) - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index d4bc4fca460c..a229d7eb900c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -132,8 +132,10 @@ static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); if (!amdgpu_sriov_vf(adev)) - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index ca3debe371c5..f3a9073b8b24 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -143,8 +143,10 @@ static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); if (!amdgpu_sriov_vf(adev)) - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index c3f73a2a911b..b86288a69e7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -216,12 +216,11 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) { - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; - r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - } + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(adev->jpeg.inst[0].ring_dec); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); return 0; } @@ -242,8 +241,7 @@ static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) - amdgpu_jpeg_sysfs_reset_mask_fini(adev); + amdgpu_jpeg_sysfs_reset_mask_fini(adev); r = amdgpu_jpeg_sw_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 5a69a44e0f85..1892c278ea3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -177,7 +177,7 @@ static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); if (!amdgpu_sriov_vf(adev)) - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d6b565c29b4b..0b4de0c6476a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -123,11 +123,10 @@ static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); if (!amdgpu_sriov_vf(adev)) - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - return 0; + + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index 5473cbaa5c0e..e622db1f818b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -200,14 +200,13 @@ static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) { - adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; - r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - } + adev->jpeg.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + if (!amdgpu_sriov_vf(adev)) + adev->jpeg.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - return 0; + return r; } /** @@ -226,8 +225,7 @@ static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!amdgpu_sriov_vf(adev)) - amdgpu_jpeg_sysfs_reset_mask_fini(adev); + amdgpu_jpeg_sysfs_reset_mask_fini(adev); r = amdgpu_jpeg_sw_fini(adev); From d524d40e3a6152a3ea1125af729f8cd8ca65efde Mon Sep 17 00:00:00 2001 From: Umio Yasuno Date: Tue, 15 Jul 2025 14:44:35 +0000 Subject: [PATCH 315/358] drm/amd/pm: fix null pointer access Writing a string without delimiters (' ', '\n', '\0') to the under gpu_od/fan_ctrl sysfs or pp_power_profile_mode for the CUSTOM profile will result in a null pointer dereference. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4401 Signed-off-by: Umio Yasuno Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 4b151bbaffaa..4b64851fdb42 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1398,6 +1398,8 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, if (ret) return -EINVAL; parameter_size++; + if (!tmp_str) + break; while (isspace(*tmp_str)) tmp_str++; } @@ -3645,6 +3647,9 @@ static int parse_input_od_command_lines(const char *buf, return -EINVAL; parameter_size++; + if (!tmp_str) + break; + while (isspace(*tmp_str)) tmp_str++; } From 2becafc319db3d96205320f31cc0de4ee5a93747 Mon Sep 17 00:00:00 2001 From: Eeli Haapalainen Date: Mon, 14 Jul 2025 08:13:09 +0300 Subject: [PATCH 316/358] drm/amdgpu/gfx8: reset compute ring wptr on the GPU on resume Commit 42cdf6f687da ("drm/amdgpu/gfx8: always restore kcq MQDs") made the ring pointer always to be reset on resume from suspend. This caused compute rings to fail since the reset was done without also resetting it for the firmware. Reset wptr on the GPU to avoid a disconnect between the driver and firmware wptr. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3911 Fixes: 42cdf6f687da ("drm/amdgpu/gfx8: always restore kcq MQDs") Signed-off-by: Eeli Haapalainen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 68c401ecb3ec..367449d8061b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4640,6 +4640,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); } return 0; From e36519f5c8035f1685b39690ed330ac3b2c978a2 Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Tue, 24 Jun 2025 19:15:00 -0400 Subject: [PATCH 317/358] drm/amd/amdgpu: Initialize swnode for ISP MFD device Create amd_isp_capture MFD device with swnode initialized to isp specific software_node part of fwnode graph in amd_isp4 x86/platform driver. The isp driver use this swnode handle to retrieve the critical properties (data-lanes, mipi phyid, link-frequencies etc.) required for camera to work on AMD ISP4 based targets. Reviewed-by: Mario Limonciello Signed-off-by: Pratap Nirujogi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c | 11 ++++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 1f531b5f594d..a1737556a77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1723,7 +1723,7 @@ static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return #endif #if defined(CONFIG_DRM_AMD_ISP) -int amdgpu_acpi_get_isp4_dev_hid(u8 (*hid)[ACPI_ID_LEN]); +int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev); #endif void amdgpu_register_gpu_instance(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b047fdf81543..6c62e27b9800 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1545,7 +1545,7 @@ static int isp_match_acpi_device_ids(struct device *dev, const void *data) return acpi_match_device(data, dev) ? 1 : 0; } -int amdgpu_acpi_get_isp4_dev_hid(u8 (*hid)[ACPI_ID_LEN]) +int amdgpu_acpi_get_isp4_dev(struct acpi_device **dev) { struct device *pdev __free(put_device) = NULL; struct acpi_device *acpi_pdev; @@ -1559,7 +1559,7 @@ int amdgpu_acpi_get_isp4_dev_hid(u8 (*hid)[ACPI_ID_LEN]) if (!acpi_pdev) return -ENODEV; - strscpy(*hid, acpi_device_hid(acpi_pdev)); + *dev = acpi_pdev; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index f857796f0297..a887df520414 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -183,15 +183,16 @@ static int isp_genpd_remove_device(struct device *dev, void *data) static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) { + const struct software_node *amd_camera_node, *isp4_node; struct amdgpu_device *adev = isp->adev; + struct acpi_device *acpi_dev; int idx, int_idx, num_res, r; - u8 isp_dev_hid[ACPI_ID_LEN]; u64 isp_base; if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; - r = amdgpu_acpi_get_isp4_dev_hid(&isp_dev_hid); + r = amdgpu_acpi_get_isp4_dev(&acpi_dev); if (r) { drm_dbg(&adev->ddev, "Invalid isp platform detected (%d)", r); /* allow GPU init to progress */ @@ -199,7 +200,7 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) } /* add GPIO resources required for OMNI5C10 sensor */ - if (!strcmp("OMNI5C10", isp_dev_hid)) { + if (!strcmp("OMNI5C10", acpi_device_hid(acpi_dev))) { gpiod_add_lookup_table(&isp_gpio_table); gpiod_add_lookup_table(&isp_sensor_gpio_table); } @@ -241,6 +242,9 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) goto failure; } + amd_camera_node = (const struct software_node *)acpi_dev->driver_data; + isp4_node = software_node_find_by_name(amd_camera_node, "isp4"); + /* initialize isp platform data */ isp->isp_pdata->adev = (void *)adev; isp->isp_pdata->asic_type = adev->asic_type; @@ -269,6 +273,7 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_cell[0].num_resources = num_res; isp->isp_cell[0].resources = &isp->isp_res[0]; isp->isp_cell[0].platform_data = isp->isp_pdata; + isp->isp_cell[0].swnode = isp4_node; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); /* initialize isp i2c platform data */ From 55d42f6169760d052330f3c949c02e37867b87d8 Mon Sep 17 00:00:00 2001 From: Pratap Nirujogi Date: Mon, 23 Jun 2025 18:44:50 -0400 Subject: [PATCH 318/358] drm/amd/amdgpu: Add helper functions for isp buffers Accessing amdgpu internal data structures "struct amdgpu_device" and "struct amdgpu_bo" in ISP V4L2 driver to alloc/free GART buffers is not recommended. Add new amdgpu_isp helper functions that takes opaque params from ISP V4L2 driver and calls the amdgpu internal functions amdgpu_bo_create_isp_user() and amdgpu_bo_create_kernel() to alloc/free GART buffers. Reviewed-by: Mario Limonciello Signed-off-by: Pratap Nirujogi Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c | 175 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 - include/drm/amd/isp.h | 51 ++++++ 4 files changed, 227 insertions(+), 10 deletions(-) create mode 100644 include/drm/amd/isp.h diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 43fc941dfa57..9cddbf50442a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -33,6 +33,8 @@ #include "isp_v4_1_0.h" #include "isp_v4_1_1.h" +#define ISP_MC_ADDR_ALIGN (1024 * 32) + /** * isp_hw_init - start and test isp block * @@ -141,6 +143,179 @@ static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block, return 0; } +static int is_valid_isp_device(struct device *isp_parent, struct device *amdgpu_dev) +{ + if (isp_parent != amdgpu_dev) + return -EINVAL; + + return 0; +} + +/** + * isp_user_buffer_alloc - create user buffer object (BO) for isp + * + * @dev: isp device handle + * @dmabuf: DMABUF handle for isp buffer allocated in system memory + * @buf_obj: GPU buffer object handle to initialize + * @buf_addr: GPU addr of the pinned BO to initialize + * + * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does + * GART alloc to generate GPU addr for BO to make it accessible through the + * GART aperture for ISP HW. + * + * This function is exported to allow the V4L2 isp device external to drm device + * to create and access the isp user BO. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int isp_user_buffer_alloc(struct device *dev, void *dmabuf, + void **buf_obj, u64 *buf_addr) +{ + struct platform_device *ispdev = to_platform_device(dev); + const struct isp_platform_data *isp_pdata; + struct amdgpu_device *adev; + struct mfd_cell *mfd_cell; + struct amdgpu_bo *bo; + u64 gpu_addr; + int ret; + + if (WARN_ON(!ispdev)) + return -ENODEV; + + if (WARN_ON(!buf_obj)) + return -EINVAL; + + if (WARN_ON(!buf_addr)) + return -EINVAL; + + mfd_cell = &ispdev->mfd_cell[0]; + if (!mfd_cell) + return -ENODEV; + + isp_pdata = mfd_cell->platform_data; + adev = isp_pdata->adev; + + ret = is_valid_isp_device(ispdev->dev.parent, adev->dev); + if (ret) + return ret; + + ret = amdgpu_bo_create_isp_user(adev, dmabuf, + AMDGPU_GEM_DOMAIN_GTT, &bo, &gpu_addr); + if (ret) { + drm_err(&adev->ddev, "failed to alloc gart user buffer (%d)", ret); + return ret; + } + + *buf_obj = (void *)bo; + *buf_addr = gpu_addr; + + return 0; +} +EXPORT_SYMBOL(isp_user_buffer_alloc); + +/** + * isp_user_buffer_free - free isp user buffer object (BO) + * + * @buf_obj: amdgpu isp user BO to free + * + * unpin and unref BO for isp internal use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the isp user BO. + */ +void isp_user_buffer_free(void *buf_obj) +{ + amdgpu_bo_free_isp_user(buf_obj); +} +EXPORT_SYMBOL(isp_user_buffer_free); + +/** + * isp_kernel_buffer_alloc - create kernel buffer object (BO) for isp + * + * @dev: isp device handle + * @size: size for the new BO + * @buf_obj: GPU BO handle to initialize + * @gpu_addr: GPU addr of the pinned BO + * @cpu_addr: CPU address mapping of BO + * + * Allocates and pins a kernel BO for internal isp firmware use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to create and access the kernel BO. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int isp_kernel_buffer_alloc(struct device *dev, u64 size, + void **buf_obj, u64 *gpu_addr, void **cpu_addr) +{ + struct platform_device *ispdev = to_platform_device(dev); + struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj; + const struct isp_platform_data *isp_pdata; + struct amdgpu_device *adev; + struct mfd_cell *mfd_cell; + int ret; + + if (WARN_ON(!ispdev)) + return -ENODEV; + + if (WARN_ON(!buf_obj)) + return -EINVAL; + + if (WARN_ON(!gpu_addr)) + return -EINVAL; + + if (WARN_ON(!cpu_addr)) + return -EINVAL; + + mfd_cell = &ispdev->mfd_cell[0]; + if (!mfd_cell) + return -ENODEV; + + isp_pdata = mfd_cell->platform_data; + adev = isp_pdata->adev; + + ret = is_valid_isp_device(ispdev->dev.parent, adev->dev); + if (ret) + return ret; + + ret = amdgpu_bo_create_kernel(adev, + size, + ISP_MC_ADDR_ALIGN, + AMDGPU_GEM_DOMAIN_GTT, + bo, + gpu_addr, + cpu_addr); + if (!cpu_addr || ret) { + drm_err(&adev->ddev, "failed to alloc gart kernel buffer (%d)", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(isp_kernel_buffer_alloc); + +/** + * isp_kernel_buffer_free - free isp kernel buffer object (BO) + * + * @buf_obj: amdgpu isp user BO to free + * @gpu_addr: GPU addr of isp kernel BO + * @cpu_addr: CPU addr of isp kernel BO + * + * unmaps and unpin a isp kernel BO. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the kernel BO. + */ +void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr) +{ + struct amdgpu_bo **bo = (struct amdgpu_bo **)buf_obj; + + amdgpu_bo_free_kernel(bo, gpu_addr, cpu_addr); +} +EXPORT_SYMBOL(isp_kernel_buffer_free); + static const struct amd_ip_funcs isp_ip_funcs = { .name = "isp_ip", .early_init = isp_early_init, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index 1d1c4b1ec7e7..d6f4ffa4c97c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -28,18 +28,13 @@ #ifndef __AMDGPU_ISP_H__ #define __AMDGPU_ISP_H__ +#include #include #define ISP_REGS_OFFSET_END 0x629A4 struct amdgpu_isp; -struct isp_platform_data { - void *adev; - u32 asic_type; - resource_size_t base_rmmio_size; -}; - struct isp_funcs { int (*hw_init)(struct amdgpu_isp *isp); int (*hw_fini)(struct amdgpu_isp *isp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c5fda18967c8..122a88294883 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -352,7 +352,6 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, return 0; } -EXPORT_SYMBOL(amdgpu_bo_create_kernel); /** * amdgpu_bo_create_isp_user - create user BO for isp @@ -421,7 +420,6 @@ int amdgpu_bo_create_isp_user(struct amdgpu_device *adev, return r; } -EXPORT_SYMBOL(amdgpu_bo_create_isp_user); /** * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location @@ -525,7 +523,6 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, if (cpu_addr) *cpu_addr = NULL; } -EXPORT_SYMBOL(amdgpu_bo_free_kernel); /** * amdgpu_bo_free_isp_user - free BO for isp use @@ -548,7 +545,6 @@ void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo) } amdgpu_bo_unref(&bo); } -EXPORT_SYMBOL(amdgpu_bo_free_isp_user); /* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, diff --git a/include/drm/amd/isp.h b/include/drm/amd/isp.h new file mode 100644 index 000000000000..ec868288abf2 --- /dev/null +++ b/include/drm/amd/isp.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#ifndef __ISP_H__ +#define __ISP_H__ + +#include + +struct device; + +struct isp_platform_data { + void *adev; + u32 asic_type; + resource_size_t base_rmmio_size; +}; + +int isp_user_buffer_alloc(struct device *dev, void *dmabuf, + void **buf_obj, u64 *buf_addr); + +void isp_user_buffer_free(void *buf_obj); + +int isp_kernel_buffer_alloc(struct device *dev, u64 size, + void **buf_obj, u64 *gpu_addr, void **cpu_addr); + +void isp_kernel_buffer_free(void **buf_obj, u64 *gpu_addr, void **cpu_addr); + +#endif From 78d0a27ae0e2e70b22895f4b388cc0ab88e3c6ca Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Wed, 25 Jun 2025 15:29:45 +0800 Subject: [PATCH 319/358] drm/amdgpu: Add user queue instance count in HW IP info This change exposes the number of available user queue instances for each hardware IP type (GFX, COMPUTE, SDMA) through the drm_amdgpu_info_hw_ip interface. Key changes: 1. Added userq_num_instance field to drm_amdgpu_info_hw_ip structure 2. Implemented counting of available HQD slots using: - mes.gfx_hqd_mask for GFX queues - mes.compute_hqd_mask for COMPUTE queues - mes.sdma_hqd_mask for SDMA queues 3. Only counts available instances when user queues are enabled (!disable_uq) v2: using the adev->mes.gfx_hqd_mask[]/compute_hqd_mask[]/sdma_hqd_mask[] masks to determine the number of queue slots available for each engine type (Alex) v3: rename userq_num_instance to userq_num_hqds (Alex) Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 20 ++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index c78eea14c70a..3d4185a1d938 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -399,6 +399,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, uint32_t ib_size_alignment = 0; enum amd_ip_block_type type; unsigned int num_rings = 0; + uint32_t num_hqds = 0; unsigned int i, j; if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT) @@ -411,6 +412,12 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->gfx.gfx_ring[i].sched.ready && !adev->gfx.gfx_ring[i].no_user_submission) ++num_rings; + + if (!adev->gfx.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) + num_hqds += hweight32(adev->mes.gfx_hqd_mask[i]); + } + ib_start_alignment = 32; ib_size_alignment = 32; break; @@ -420,6 +427,12 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->gfx.compute_ring[i].sched.ready && !adev->gfx.compute_ring[i].no_user_submission) ++num_rings; + + if (!adev->sdma.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) + num_hqds += hweight32(adev->mes.compute_hqd_mask[i]); + } + ib_start_alignment = 32; ib_size_alignment = 32; break; @@ -429,6 +442,12 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->sdma.instance[i].ring.sched.ready && !adev->sdma.instance[i].ring.no_user_submission) ++num_rings; + + if (!adev->gfx.disable_uq) { + for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) + num_hqds += hweight32(adev->mes.sdma_hqd_mask[i]); + } + ib_start_alignment = 256; ib_size_alignment = 4; break; @@ -570,6 +589,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; + result->userq_num_hqds = num_hqds; result->ib_start_alignment = ib_start_alignment; result->ib_size_alignment = ib_size_alignment; return 0; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 45c4fa13499c..66c4a03ac9f9 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1493,6 +1493,8 @@ struct drm_amdgpu_info_hw_ip { __u32 available_rings; /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ __u32 ip_discovery_version; + /* Userq available hqds */ + __u32 userq_num_hqds; }; /* GFX metadata BO sizes and alignment info (in bytes) */ From 9ffab039bcb0bbfade0e659552d2fb912347a871 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 4 Jul 2025 15:17:43 +0800 Subject: [PATCH 320/358] drm/amdgpu: Replace HQD terminology with slots naming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The term "HQD" is CP-specific and doesn't accurately describe the queue resources for other IP blocks like SDMA, VCN, or VPE. This change: 1. Renames `num_hqds` to `num_slots` in amdgpu_kms.c to better reflect the generic nature of the resource counting 2. Updates the UAPI struct member from `userq_num_hqds` to `userq_num_slots` 3. Maintains the same functionality while using more appropriate terminology Signed-off-by: Jesse Zhang Reviewed-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 +++++----- include/uapi/drm/amdgpu_drm.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 3d4185a1d938..8a76960803c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -399,7 +399,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, uint32_t ib_size_alignment = 0; enum amd_ip_block_type type; unsigned int num_rings = 0; - uint32_t num_hqds = 0; + uint32_t num_slots = 0; unsigned int i, j; if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT) @@ -415,7 +415,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (!adev->gfx.disable_uq) { for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) - num_hqds += hweight32(adev->mes.gfx_hqd_mask[i]); + num_slots += hweight32(adev->mes.gfx_hqd_mask[i]); } ib_start_alignment = 32; @@ -430,7 +430,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (!adev->sdma.disable_uq) { for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) - num_hqds += hweight32(adev->mes.compute_hqd_mask[i]); + num_slots += hweight32(adev->mes.compute_hqd_mask[i]); } ib_start_alignment = 32; @@ -445,7 +445,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (!adev->gfx.disable_uq) { for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) - num_hqds += hweight32(adev->mes.sdma_hqd_mask[i]); + num_slots += hweight32(adev->mes.sdma_hqd_mask[i]); } ib_start_alignment = 256; @@ -589,7 +589,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, } result->capabilities_flags = 0; result->available_rings = (1 << num_rings) - 1; - result->userq_num_hqds = num_hqds; + result->userq_num_slots = num_slots; result->ib_start_alignment = ib_start_alignment; result->ib_size_alignment = ib_size_alignment; return 0; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 66c4a03ac9f9..bdedbaccf776 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1493,8 +1493,8 @@ struct drm_amdgpu_info_hw_ip { __u32 available_rings; /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ __u32 ip_discovery_version; - /* Userq available hqds */ - __u32 userq_num_hqds; + /* Userq available slots */ + __u32 userq_num_slots; }; /* GFX metadata BO sizes and alignment info (in bytes) */ From 14ae91a81ec8fa0bc23170d4aa16dd2a20d54105 Mon Sep 17 00:00:00 2001 From: Rhys Lloyd Date: Sat, 12 Jul 2025 19:51:08 -0700 Subject: [PATCH 321/358] gpu: nova-core: fix bounds check in PmuLookupTableEntry::new data is sliced from 2..6, but the bounds check data.len() < 5 does not satisfy those bounds. Fixes: 47c4846e4319 ("gpu: nova-core: vbios: Add support for FWSEC ucode extraction") Reviewed-by: Alexandre Courbot Reviewed-by: Joel Fernandes Signed-off-by: Rhys Lloyd Link: https://lore.kernel.org/r/20250713025108.9364-2-krakow20@gmail.com Signed-off-by: Alexandre Courbot --- drivers/gpu/nova-core/vbios.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/nova-core/vbios.rs b/drivers/gpu/nova-core/vbios.rs index 663fc50e8b66..5b5d9f38cbb3 100644 --- a/drivers/gpu/nova-core/vbios.rs +++ b/drivers/gpu/nova-core/vbios.rs @@ -901,7 +901,7 @@ struct PmuLookupTableEntry { impl PmuLookupTableEntry { fn new(data: &[u8]) -> Result { - if data.len() < 5 { + if data.len() < 6 { return Err(EINVAL); } From 8825dabaf67bb1517d939f28b6ba6fcf86d7f259 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 11 Jul 2025 21:48:33 +0530 Subject: [PATCH 322/358] drm/amd/pm: Use cached data for min/max clocks If dpm tables are already populated on SMU v13.0.6 SOCs, use the cached data. Otherwise, fetch values from firmware. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index 68624afe7d83..f2cf333b2e40 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -881,51 +881,51 @@ static int smu_v13_0_6_get_dpm_ultimate_freq(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max) { + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_table_context *smu_table = &smu->smu_table; struct PPTable_t *pptable = (struct PPTable_t *)smu_table->driver_pptable; - uint32_t clock_limit = 0, param; + struct smu_13_0_dpm_table *dpm_table; + uint32_t min_clk, max_clk, param; int ret = 0, clk_id = 0; - if (!smu_cmn_clk_dpm_is_enabled(smu, clk_type)) { + /* Use dpm tables, if data is already fetched */ + if (pptable->Init) { switch (clk_type) { case SMU_MCLK: case SMU_UCLK: - if (pptable->Init) - clock_limit = pptable->UclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.uclk_table; break; case SMU_GFXCLK: case SMU_SCLK: - if (pptable->Init) - clock_limit = pptable->MinGfxclkFrequency; + dpm_table = &dpm_context->dpm_tables.gfx_table; break; case SMU_SOCCLK: - if (pptable->Init) - clock_limit = pptable->SocclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.soc_table; break; case SMU_FCLK: - if (pptable->Init) - clock_limit = pptable->FclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.fclk_table; break; case SMU_VCLK: - if (pptable->Init) - clock_limit = pptable->VclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.vclk_table; break; case SMU_DCLK: - if (pptable->Init) - clock_limit = pptable->DclkFrequencyTable[0]; + dpm_table = &dpm_context->dpm_tables.dclk_table; break; default: - break; + return -EINVAL; } + min_clk = dpm_table->min; + max_clk = dpm_table->max; + if (min) - *min = clock_limit; - + *min = min_clk; if (max) - *max = clock_limit; + *max = max_clk; - return 0; + if (min_clk && max_clk) + return 0; } if (!(clk_type == SMU_GFXCLK || clk_type == SMU_SCLK)) { From e678e75d68ca28fbc03d22f078e641f123698779 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 11 Jul 2025 12:09:06 +0530 Subject: [PATCH 323/358] drm/amd/pm: Use cached metrics data on SMUv13.0.6 Cached metrics data validity is 1ms on SMUv13.0.6 SOCs. It's not reasonable for any client to query gpu_metrics at a faster rate and constantly interrupt PMFW. Signed-off-by: Lijo Lazar Reviewed-by: Asad Kamal Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index f2cf333b2e40..9cc294f4708b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -2693,7 +2693,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table bool per_inst; metrics_v0 = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); - ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, true); + ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false); if (ret) { kfree(metrics_v0); return ret; From 81df6bfad6a479530cef4b8ecbf848132d0fc0ab Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Wed, 16 Jul 2025 13:21:23 +0530 Subject: [PATCH 324/358] drm/amdgpu: Add WARN_ON to the resource clear function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the dirty bit when the memory resource is not cleared during BO release. v2(Christian): - Drop the cleared flag set to false. - Improve the amdgpu_vram_mgr_set_clear_state() function. v3: - Add back the resource clear flag set function call after being cleared during eviction (Christian). - Modified the patch subject name. Signed-off-by: Arunpravin Paneer Selvam Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index b256cbc2bc27..2c88d5fd87da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -66,7 +66,10 @@ to_amdgpu_vram_mgr_resource(struct ttm_resource *res) static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res) { - to_amdgpu_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED; + struct amdgpu_vram_mgr_resource *ares = to_amdgpu_vram_mgr_resource(res); + + WARN_ON(ares->flags & DRM_BUDDY_CLEARED); + ares->flags |= DRM_BUDDY_CLEARED; } #endif From ee60209b6ff62fcde05856d771544f14fcf1ec50 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 27 May 2025 23:19:29 -0400 Subject: [PATCH 325/358] drm/amdgpu/gfx9: re-emit unprocessed state on kcq reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-emit the unprocessed state after resetting the queue. Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ac058697054f..9c0cdf4dde71 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7189,7 +7189,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); spin_lock_irqsave(&kiq->ring_lock, flags); @@ -7246,13 +7246,7 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, DRM_ERROR("fail to remap queue\n"); return r; } - - r = amdgpu_ring_test_ring(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) From e22631b53aec436199e666967952d49dc3e82c55 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 27 May 2025 23:23:53 -0400 Subject: [PATCH 326/358] drm/amdgpu/gfx9.4.3: re-emit unprocessed state on kcq reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-emit the unprocessed state after resetting the queue. Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 7314ad08fde3..2763817c2255 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3571,7 +3571,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); spin_lock_irqsave(&kiq->ring_lock, flags); @@ -3628,12 +3628,7 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, return r; } - r = amdgpu_ring_test_ring(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } enum amdgpu_gfx_cp_ras_mem_id { From f410731d5cdd14efdfa055bf12d50b8367915b0f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 23 May 2025 00:33:04 -0400 Subject: [PATCH 327/358] drm/amdgpu/gfx10: re-emit unprocessed state on ring reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-emit the unprocessed state after resetting the queue. Drop the soft_recovery callbacks as the queue reset replaces it. Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 35 +++----------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 506454ed27bd..744f51a6f522 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9050,21 +9050,6 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } -static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, mmSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -9544,7 +9529,7 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); spin_lock_irqsave(&kiq->ring_lock, flags); @@ -9593,12 +9578,7 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, if (r) return r; - r = amdgpu_ring_test_ring(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, @@ -9617,7 +9597,7 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); spin_lock_irqsave(&kiq->ring_lock, flags); @@ -9671,12 +9651,7 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, if (r) return r; - r = amdgpu_ring_test_ring(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v10_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -9911,7 +9886,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, .reset = gfx_v10_0_reset_kgq, .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, @@ -9952,7 +9926,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v10_0_ring_soft_recovery, .emit_mem_sync = gfx_v10_0_emit_mem_sync, .reset = gfx_v10_0_reset_kcq, .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, From fa3385ac15fe5350867ad3c59a1ad0fb5f8e8fc1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 27 May 2025 22:05:13 -0400 Subject: [PATCH 328/358] drm/amdgpu/gfx11: re-emit unprocessed state on ring reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-emit the unprocessed state after resetting the queue. Drop the soft_recovery callbacks as the queue reset replaces it. Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 35 +++----------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 372dceceff35..465fcd650373 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6288,21 +6288,6 @@ static void gfx_v11_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask, 0x20); } -static void gfx_v11_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, regSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v11_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -6826,7 +6811,7 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); if (r) { @@ -6849,12 +6834,7 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, return r; } - r = amdgpu_ring_test_ring(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int gfx_v11_0_reset_compute_pipe(struct amdgpu_ring *ring) @@ -6997,7 +6977,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { @@ -7018,12 +6998,7 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, return r; } - r = amdgpu_ring_test_ring(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v11_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -7259,7 +7234,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kgq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, @@ -7301,7 +7275,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_wreg = gfx_v11_0_ring_emit_wreg, .emit_reg_wait = gfx_v11_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v11_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v11_0_ring_soft_recovery, .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kcq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, From 4da11b92d7ed4ce3be5d92feface8f3c2d5424bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 27 May 2025 22:29:31 -0400 Subject: [PATCH 329/358] drm/amdgpu/gfx12: re-emit unprocessed state on ring reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-emit the unprocessed state after resetting the queue. Drop the soft_recovery callbacks as the queue reset replaces it. Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 35 +++----------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 7220ed2fa2a3..5d32ae4b1a4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -4694,21 +4694,6 @@ static void gfx_v12_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask, 0x20); } -static void gfx_v12_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned vmid) -{ - struct amdgpu_device *adev = ring->adev; - uint32_t value = 0; - - value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); - value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); - value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); - value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - WREG32_SOC15(GC, 0, regSQ_CMD, value); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); -} - static void gfx_v12_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -5321,7 +5306,7 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); if (r) { @@ -5343,12 +5328,7 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, return r; } - r = amdgpu_ring_test_ring(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int gfx_v12_0_reset_compute_pipe(struct amdgpu_ring *ring) @@ -5444,7 +5424,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) return -EOPNOTSUPP; - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { @@ -5465,12 +5445,7 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, return r; } - r = amdgpu_ring_test_ring(ring); - if (r) - return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring) @@ -5548,7 +5523,6 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, .reset = gfx_v12_0_reset_kgq, .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader, @@ -5587,7 +5561,6 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_wreg = gfx_v12_0_ring_emit_wreg, .emit_reg_wait = gfx_v12_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v12_0_ring_emit_reg_write_reg_wait, - .soft_recovery = gfx_v12_0_ring_soft_recovery, .emit_mem_sync = gfx_v12_0_emit_mem_sync, .reset = gfx_v12_0_reset_kcq, .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader, From 4b1df3bad2e283dc299ac38fb94c379b35173a86 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 26 Jun 2025 09:52:55 -0400 Subject: [PATCH 330/358] drm/amdgpu/sdma5: re-emit unprocessed state on ring reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-emit the unprocessed state after resetting the queue. Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 999705e7b264..d8c19601dd2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1555,11 +1555,15 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, return -EINVAL; } - amdgpu_amdkfd_suspend(adev, true); - r = amdgpu_sdma_reset_engine(adev, ring->me, false); - amdgpu_amdkfd_resume(adev, true); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); - return r; + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, ring->me, true); + amdgpu_amdkfd_resume(adev, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int sdma_v5_0_stop_queue(struct amdgpu_ring *ring) From 1b49bddc5881f2dbf0ed8c53416620d60b59e8f3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 26 Jun 2025 09:53:18 -0400 Subject: [PATCH 331/358] drm/amdgpu/sdma5.2: re-emit unprocessed state on ring reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-emit the unprocessed state after resetting the queue. Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index e542195972dd..5d56029e88a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1469,11 +1469,15 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, return -EINVAL; } - amdgpu_amdkfd_suspend(adev, true); - r = amdgpu_sdma_reset_engine(adev, ring->me, false); - amdgpu_amdkfd_resume(adev, true); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); - return r; + amdgpu_amdkfd_suspend(adev, true); + r = amdgpu_sdma_reset_engine(adev, ring->me, true); + amdgpu_amdkfd_resume(adev, true); + if (r) + return r; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int sdma_v5_2_stop_queue(struct amdgpu_ring *ring) From 9753078f5492a4d3667f4832f105f65ae8a633cc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 13:11:54 -0400 Subject: [PATCH 332/358] drm/amdgpu/sdma6: re-emit unprocessed state on ring reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-emit the unprocessed state after resetting the queue. Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index c08e9a6cf682..5d31f318cb3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1586,7 +1586,7 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, return -EINVAL; } - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); if (r) @@ -1595,9 +1595,8 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, r = sdma_v6_0_gfx_resume_instance(adev, ring->me, true); if (r) return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static int sdma_v6_0_set_trap_irq_state(struct amdgpu_device *adev, From ea2791d05a2e8bd483df48f548e0293edc3bcc0f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 29 May 2025 13:12:35 -0400 Subject: [PATCH 333/358] drm/amdgpu/sdma7: re-emit unprocessed state on ring reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-emit the unprocessed state after resetting the queue. Reviewed-by: Jesse Zhang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index ba1f3e3b6eb6..13abc0c46eab 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -817,7 +817,7 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, return -EINVAL; } - drm_sched_wqueue_stop(&ring->sched); + amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); if (r) @@ -826,9 +826,8 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, r = sdma_v7_0_gfx_resume_instance(adev, ring->me, true); if (r) return r; - amdgpu_fence_driver_force_completion(ring); - drm_sched_wqueue_start(&ring->sched); - return 0; + + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } /** From 6ac55eab4fc41e0ea80f9064945e4340f13d8b5c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Jul 2025 11:55:05 -0400 Subject: [PATCH 334/358] drm/amdgpu: move reset support type checks into the caller Rather than checking in the callbacks, check if the reset type is supported in the caller. Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 5 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 31 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 3 --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 ----- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 ----- drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 6 ----- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 3 --- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 3 --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c | 3 --- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 3 --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 3 --- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 3 --- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 6 ----- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 3 --- drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c | 3 --- 25 files changed, 37 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 5cb7bf9420f7..32a08529307d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -112,6 +112,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) amdgpu_job_core_dump(adev, job); if (amdgpu_gpu_recovery && + amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_SOFT_RESET) && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { dev_err(adev->dev, "ring %s timeout, but soft recovered\n", s_job->sched->name); @@ -131,7 +132,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) /* attempt a per ring reset */ if (unlikely(adev->debug_disable_gpu_ring_reset)) { dev_err(adev->dev, "Ring reset disabled by debug mask\n"); - } else if (amdgpu_gpu_recovery && ring->funcs->reset) { + } else if (amdgpu_gpu_recovery && + amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) && + ring->funcs->reset) { dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); r = amdgpu_ring_reset(ring, job->vmid, &job->hw_fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index acac646a4e4e..a5c3f64cbce6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -825,3 +825,34 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, drm_sched_wqueue_start(&ring->sched); return 0; } + +bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring, + u32 reset_type) +{ + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + if (ring->adev->gfx.gfx_supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_COMPUTE: + if (ring->adev->gfx.compute_supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_SDMA: + if (ring->adev->sdma.supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_VCN_DEC: + case AMDGPU_RING_TYPE_VCN_ENC: + if (ring->adev->vcn.supported_reset & reset_type) + return true; + break; + case AMDGPU_RING_TYPE_VCN_JPEG: + if (ring->adev->jpeg.supported_reset & reset_type) + return true; + break; + default: + break; + } + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 309e7bb6001b..7670f5d82b9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -568,4 +568,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, struct amdgpu_fence *guilty_fence); int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, struct amdgpu_fence *guilty_fence); +bool amdgpu_ring_is_reset_type_supported(struct amdgpu_ring *ring, + u32 reset_type); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index d799bc74936c..f1f67521c29c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1522,9 +1522,6 @@ int amdgpu_vcn_ring_reset(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; - if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - if (adev->vcn.inst[ring->me].using_unified_queue) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 744f51a6f522..7bd506f06eb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -9523,9 +9523,6 @@ static int gfx_v10_0_reset_kgq(struct amdgpu_ring *ring, u64 addr; int r; - if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -9591,9 +9588,6 @@ static int gfx_v10_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 465fcd650373..c01c241a1b06 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -6808,9 +6808,6 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); @@ -6974,9 +6971,6 @@ static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r = 0; - if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 5d32ae4b1a4c..09bf72237d1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -5303,9 +5303,6 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (!(adev->gfx.gfx_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, false); @@ -5421,9 +5418,6 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9c0cdf4dde71..20b30f4b3c7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7183,9 +7183,6 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 2763817c2255..51babf5c78c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -3565,9 +3565,6 @@ static int gfx_v9_4_3_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int r; - if (!(adev->gfx.compute_supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index b93d6af8f6e5..58239c405fda 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -773,9 +773,6 @@ static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, { int r; - if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = jpeg_v2_0_stop(ring->adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index b6d5ba0bdc14..3e2c389242db 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -650,9 +650,6 @@ static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { - if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v2_5_stop_inst(ring->adev, ring->me); jpeg_v2_5_start_inst(ring->adev, ring->me); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index a229d7eb900c..a44eb2667664 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -564,9 +564,6 @@ static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, { int r; - if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = jpeg_v3_0_stop(ring->adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index f3a9073b8b24..da3ee69f1a3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -729,9 +729,6 @@ static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, { int r; - if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = jpeg_v4_0_stop(ring->adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 1892c278ea3c..481d1a2dbe5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -774,9 +774,6 @@ static int jpeg_v4_0_5_ring_reset(struct amdgpu_ring *ring, { int r; - if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = jpeg_v4_0_5_stop(ring->adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 0b4de0c6476a..e0a71909252b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -650,9 +650,6 @@ static int jpeg_v5_0_0_ring_reset(struct amdgpu_ring *ring, { int r; - if (!(ring->adev->jpeg.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = jpeg_v5_0_0_stop(ring->adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index e622db1f818b..54523dc1f702 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -836,9 +836,6 @@ static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid, struct amdgpu_fence *timedout_fence) { - if (amdgpu_sriov_vf(ring->adev)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); jpeg_v5_0_1_core_stall_reset(ring); jpeg_v5_0_1_init_jrbc(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 20fad2525969..36b1ca73c2ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1664,9 +1664,6 @@ static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, u32 id = ring->me; int r; - if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_amdkfd_suspend(adev, true); r = amdgpu_sdma_reset_engine(adev, id, false); amdgpu_amdkfd_resume(adev, true); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index d8c19601dd2e..7dc67a22a7a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1547,9 +1547,6 @@ static int sdma_v5_0_reset_queue(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - if (ring->me >= adev->sdma.num_instances) { dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 5d56029e88a6..3bd44c24f692 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1461,9 +1461,6 @@ static int sdma_v5_2_reset_queue(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - if (ring->me >= adev->sdma.num_instances) { dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 5d31f318cb3c..e6d8eddda2bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1578,9 +1578,6 @@ static int sdma_v6_0_reset_queue(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - if (ring->me >= adev->sdma.num_instances) { dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 13abc0c46eab..b8b06d4c5882 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -809,9 +809,6 @@ static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; int r; - if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - if (ring->me >= adev->sdma.num_instances) { dev_err(adev->dev, "sdma instance not found\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index f642a06a77b5..1924e075b66f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1976,9 +1976,6 @@ static int vcn_v4_0_ring_reset(struct amdgpu_ring *ring, struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; int r; - if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = vcn_v4_0_stop(vinst); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3bda19b92cde..2a3663b551af 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -1603,12 +1603,6 @@ static int vcn_v4_0_3_ring_reset(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; - if (amdgpu_sriov_vf(ring->adev)) - return -EOPNOTSUPP; - - if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); vcn_inst = GET_INST(VCN, ring->me); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 039989ab452a..caf2d95a85d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -1474,9 +1474,6 @@ static int vcn_v4_0_5_ring_reset(struct amdgpu_ring *ring, struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; int r; - if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = vcn_v4_0_5_stop(vinst); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index ec0268b5666c..07a6e9582880 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -1201,9 +1201,6 @@ static int vcn_v5_0_0_ring_reset(struct amdgpu_ring *ring, struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[ring->me]; int r; - if (!(adev->vcn.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) - return -EOPNOTSUPP; - amdgpu_ring_reset_helper_begin(ring, timedout_fence); r = vcn_v5_0_0_stop(vinst); if (r) From d9e9aa3e971b37c6d6dfd15ad8dc65537a925725 Mon Sep 17 00:00:00 2001 From: Raag Jadav Date: Tue, 15 Jul 2025 03:25:03 +0530 Subject: [PATCH 335/358] drm/xe: Don't fail probe on unsupported mailbox command If the device is running older pcode firmware, it is possible that newer mailbox commands are not supported by it. The sysfs attributes aren't useful in that case, but we shouldn't fail driver probe because of it. As of now, it is unknown if we can distinguish unsupported commands before attempting them. But until we figure out a way to do that, fix the regressions. v2: Add debug message (Lucas) Fixes: cdc36b66cd41 ("drm/xe: Expose fan control and voltage regulator version") Signed-off-by: Raag Jadav Tested-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250714215503.2897748-1-raag.jadav@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit ed5461daa150b037e36b8202381da1ef85d6b16b) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device_sysfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c index e5fd0cd537bc..bd9015761aa0 100644 --- a/drivers/gpu/drm/xe/xe_device_sysfs.c +++ b/drivers/gpu/drm/xe/xe_device_sysfs.c @@ -160,8 +160,13 @@ static int late_bind_create_files(struct device *dev) ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0), &cap, NULL); - if (ret) + if (ret) { + if (ret == -ENXIO) { + drm_dbg(&xe->drm, "Late binding not supported by firmware\n"); + ret = 0; + } goto out; + } if (REG_FIELD_GET(V1_FAN_SUPPORTED, cap)) { ret = sysfs_create_file(&dev->kobj, &dev_attr_lb_fan_control_version.attr); From 6aaceed7fe1a400082ec5990884b11ef7266a605 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 15 Jul 2025 11:14:22 -0700 Subject: [PATCH 336/358] drm/xe/oa: Fix static checker warning about null gt There is a static checker warning that gt returned by xe_device_get_gt can be NULL and that is being dereferenced. Use xe_root_mmio_gt instead, which is equivalent and cannot return a NULL gt 0. Fixes: 10d42ef34bce ("drm/xe/oa: Assign hwe for OAM_SAG") Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Link: https://lore.kernel.org/r/20250715181422.2807624-1-ashutosh.dixit@intel.com (cherry picked from commit 308dc9b27874d0e8a0258869b9e681b0fdd2e579) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_oa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index d991fbd90f20..5729e7d3e335 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1941,7 +1941,7 @@ static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) /* If not provided, OA unit defaults to OA unit 0 as per uapi */ if (!param->oa_unit) - param->oa_unit = &xe_device_get_gt(oa->xe, 0)->oa.oa_unit[0]; + param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0]; /* When we have an exec_q, get hwe from the exec_q */ if (param->exec_q) { From 2bd986021c297ba675e831c3164bf9bdbbca3bc3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Jul 2025 17:59:44 -0500 Subject: [PATCH 337/358] drm/xe: Fix a NULL vs IS_ERR() bug in xe_i2c_register_adapter() The fwnode_create_software_node() function returns error pointers. It never returns NULL. Update the checks to match. Fixes: f0e53aadd702 ("drm/xe: Support for I2C attached MCUs") Signed-off-by: Dan Carpenter Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/65825d00-81ab-4665-af51-4fff6786a250@sabinyo.mountain Signed-off-by: Rodrigo Vivi (cherry picked from commit 2f264d58cc805a3cefc6b98097f90fbc388136ef) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_i2c.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index db9c0340be5c..1f19718db559 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -96,8 +96,8 @@ static int xe_i2c_register_adapter(struct xe_i2c *i2c) int ret; fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL); - if (!fwnode) - return -ENOMEM; + if (IS_ERR(fwnode)) + return PTR_ERR(fwnode); /* * Not using platform_device_register_full() here because we don't have From dc94168eaa6f6f2476c4e1a894bd8d031df6226d Mon Sep 17 00:00:00 2001 From: Zhanjun Dong Date: Mon, 21 Jul 2025 17:45:20 -0400 Subject: [PATCH 338/358] drm/xe/uc: Fix missing unwind goto Fix missing unwind goto on error handling. Fixes: b2c4ac219fa4 ("drm/xe/uc: Disable GuC communication on hardware initialization error") Signed-off-by: Zhanjun Dong Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://lore.kernel.org/r/20250721214520.954014-1-zhanjun.dong@intel.com (cherry picked from commit 176f44a5ec0b074aaf44852db77d0c183c36696d) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_uc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c index 3e0c3af235f2..465bda355443 100644 --- a/drivers/gpu/drm/xe/xe_uc.c +++ b/drivers/gpu/drm/xe/xe_uc.c @@ -164,7 +164,7 @@ static int vf_uc_load_hw(struct xe_uc *uc) err = xe_guc_opt_in_features_enable(&uc->guc); if (err) - return err; + goto err_out; err = xe_gt_record_default_lrcs(uc_to_gt(uc)); if (err) From cccb918e0231fefba059f049acced18760242136 Mon Sep 17 00:00:00 2001 From: Lukasz Laguna Date: Thu, 17 Jul 2025 17:54:20 +0200 Subject: [PATCH 339/358] drm/xe/vf: Don't register I2C devices if VF VF drivers can't access I2C devices, so skip their registration when running as VF. Signed-off-by: Lukasz Laguna Fixes: f0e53aadd702 ("drm/xe: Support for I2C attached MCUs") Reviewed-by: Rodrigo Vivi Link: https://lore.kernel.org/r/20250717155420.25298-1-lukasz.laguna@intel.com Signed-off-by: Rodrigo Vivi (cherry picked from commit 9a220e065914b67b55d3d0ab91c3e215742fdd73) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_i2c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c index 1f19718db559..bc7dc2099470 100644 --- a/drivers/gpu/drm/xe/xe_i2c.c +++ b/drivers/gpu/drm/xe/xe_i2c.c @@ -283,6 +283,9 @@ int xe_i2c_probe(struct xe_device *xe) if (xe->info.platform != XE_BATTLEMAGE) return 0; + if (IS_SRIOV_VF(xe)) + return 0; + xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep); if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE) return 0; From a2e1407eb8405e59c56b2325d910a73fd917eb3e Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 23 Jul 2025 19:56:39 +0200 Subject: [PATCH 340/358] drm/xe/guc: Clear whole g2h_fence during initialization The struct g2h_fence must be explicitly initializated using the g2h_fence_init() function to avoid trash values in its members, but we missed to update this helper function with the new member. To fix that and avoid any future mistakes, memset the whole struct first, then update remaining non-zero members. Fixes: 94de94d24ea8 ("drm/xe/guc: Cancel ongoing H2G requests when stopping CT") Signed-off-by: Michal Wajdeczko Cc: Matthew Brost Cc: Lukasz Laguna Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20250723175639.206875-1-michal.wajdeczko@intel.com (cherry picked from commit 159afd92bae8153bdd8d8b34aea0d463fe19c978) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_ct.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index b6acccfcd351..3f4e6a46ff16 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -95,12 +95,8 @@ struct g2h_fence { static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { + memset(g2h_fence, 0, sizeof(*g2h_fence)); g2h_fence->response_buffer = response_buffer; - g2h_fence->response_data = 0; - g2h_fence->response_len = 0; - g2h_fence->fail = false; - g2h_fence->retry = false; - g2h_fence->done = false; g2h_fence->seqno = ~0x0; } From 4846856c3a4afa882b6d1b842ed2fad6f3781f4d Mon Sep 17 00:00:00 2001 From: Shuicheng Lin Date: Thu, 24 Jul 2025 19:38:55 +0000 Subject: [PATCH 341/358] drm/xe/hw_engine_group: Avoid call kfree() for drmm_kzalloc() Memory allocated with drmm_kzalloc() should not be freed using kfree(), as it is managed by the DRM subsystem. The memory will be automatically freed when the associated drm_device is released. These 3 group pointers are allocated using drmm_kzalloc() in hw_engine_group_alloc(), so they don't require manual deallocation. Fixes: 67979060740f ("drm/xe/hw_engine_group: Fix potential leak") Cc: Michal Wajdeczko Cc: Matthew Brost Signed-off-by: Shuicheng Lin Reviewed-by: Matthew Brost Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20250724193854.1124510-2-shuicheng.lin@intel.com (cherry picked from commit f98de826b418885a21ece67f0f5b921ae759b7bf) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_hw_engine_group.c | 28 ++++++------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c index 87a6dcb1b4b5..c926f840c87b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_group.c +++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c @@ -75,25 +75,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) enum xe_hw_engine_id id; struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs; struct xe_device *xe = gt_to_xe(gt); - int err; group_rcs_ccs = hw_engine_group_alloc(xe); - if (IS_ERR(group_rcs_ccs)) { - err = PTR_ERR(group_rcs_ccs); - goto err_group_rcs_ccs; - } + if (IS_ERR(group_rcs_ccs)) + return PTR_ERR(group_rcs_ccs); group_bcs = hw_engine_group_alloc(xe); - if (IS_ERR(group_bcs)) { - err = PTR_ERR(group_bcs); - goto err_group_bcs; - } + if (IS_ERR(group_bcs)) + return PTR_ERR(group_bcs); group_vcs_vecs = hw_engine_group_alloc(xe); - if (IS_ERR(group_vcs_vecs)) { - err = PTR_ERR(group_vcs_vecs); - goto err_group_vcs_vecs; - } + if (IS_ERR(group_vcs_vecs)) + return PTR_ERR(group_vcs_vecs); for_each_hw_engine(hwe, gt, id) { switch (hwe->class) { @@ -116,15 +109,6 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt) } return 0; - -err_group_vcs_vecs: - kfree(group_vcs_vecs); -err_group_bcs: - kfree(group_bcs); -err_group_rcs_ccs: - kfree(group_rcs_ccs); - - return err; } /** From 942ac8da6388c25fe62b2792c78715e0ea6e649b Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Tue, 22 Jul 2025 16:10:54 +0200 Subject: [PATCH 342/358] drm/xe/configfs: Fix pci_dev reference leak We are using pci_get_domain_bus_and_slot() function to verify if the given config directory name matches any existing PCI device, but we missed to call matching pci_dev_put() to release reference. While around, also change error code in case of no device match, to make it more specific than generic formatting error. Fixes: 16280ded45fb ("drm/xe: Add configfs to enable survivability mode") Signed-off-by: Michal Wajdeczko Cc: Lucas De Marchi Reviewed-by: Lucas De Marchi Reviewed-by: Jonathan Cavitt Link: https://lore.kernel.org/r/20250722141059.30707-2-michal.wajdeczko@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 0bdd05c2a82bbf2419415d012fd4f5faeca7f1af) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_configfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c index 8ec1ff1e4e80..e9b46a2d0019 100644 --- a/drivers/gpu/drm/xe/xe_configfs.c +++ b/drivers/gpu/drm/xe/xe_configfs.c @@ -267,7 +267,8 @@ static struct config_group *xe_config_make_device_group(struct config_group *gro pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function)); if (!pdev) - return ERR_PTR(-EINVAL); + return ERR_PTR(-ENODEV); + pci_dev_put(pdev); dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) From 3556dac8289456bc8b28670546b969f543967856 Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Thu, 10 Jul 2025 20:57:37 -0400 Subject: [PATCH 343/358] drm/amd/display: Fix divide by zero when calculating min ODM factor [WHY&HOW] If the debug option is set to disable_dsc the max slice width and/or dispclk can be zero. This causes a divide by zero when calculating the min ODM combine factor. Add a check to ensure they are valid first. Reviewed-by: Wenjing Liu Signed-off-by: Dillon Varone Signed-off-by: Wayne Lin Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c | 29 +++++++++++---------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index a454d16e6586..1f53a9f0c0ac 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -152,7 +152,7 @@ uint32_t dc_bandwidth_in_kbps_from_timing( } /* Forward Declerations */ -static unsigned int get_min_slice_count_for_odm( +static unsigned int get_min_dsc_slice_count_for_odm( const struct display_stream_compressor *dsc, const struct dsc_enc_caps *dsc_enc_caps, const struct dc_crtc_timing *timing); @@ -466,7 +466,7 @@ bool dc_dsc_compute_bandwidth_range( struct dc_dsc_bw_range *range) { bool is_dsc_possible = false; - unsigned int min_slice_count; + unsigned int min_dsc_slice_count; struct dsc_enc_caps dsc_enc_caps; struct dsc_enc_caps dsc_common_caps; struct dc_dsc_config config = {0}; @@ -478,14 +478,14 @@ bool dc_dsc_compute_bandwidth_range( get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); - min_slice_count = get_min_slice_count_for_odm(dsc, &dsc_enc_caps, timing); + min_dsc_slice_count = get_min_dsc_slice_count_for_odm(dsc, &dsc_enc_caps, timing); is_dsc_possible = intersect_dsc_caps(dsc_sink_caps, &dsc_enc_caps, timing->pixel_encoding, &dsc_common_caps); if (is_dsc_possible) is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, 0, timing, - &options, link_encoding, min_slice_count, &config); + &options, link_encoding, min_dsc_slice_count, &config); if (is_dsc_possible) is_dsc_possible = decide_dsc_bandwidth_range(min_bpp_x16, max_bpp_x16, @@ -593,14 +593,12 @@ static void build_dsc_enc_caps( struct dc *dc; - memset(&single_dsc_enc_caps, 0, sizeof(struct dsc_enc_caps)); - if (!dsc || !dsc->ctx || !dsc->ctx->dc || !dsc->funcs->dsc_get_single_enc_caps) return; dc = dsc->ctx->dc; - if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_max_clock_khz || !dc->res_pool) + if (!dc->clk_mgr || !dc->clk_mgr->funcs->get_max_clock_khz || !dc->res_pool || dc->debug.disable_dsc) return; /* get max DSCCLK from clk_mgr */ @@ -634,7 +632,7 @@ static inline uint32_t dsc_div_by_10_round_up(uint32_t value) return (value + 9) / 10; } -static unsigned int get_min_slice_count_for_odm( +static unsigned int get_min_dsc_slice_count_for_odm( const struct display_stream_compressor *dsc, const struct dsc_enc_caps *dsc_enc_caps, const struct dc_crtc_timing *timing) @@ -651,6 +649,10 @@ static unsigned int get_min_slice_count_for_odm( } } + /* validate parameters */ + if (max_dispclk_khz == 0 || dsc_enc_caps->max_slice_width == 0) + return 1; + /* consider minimum odm slices required due to * 1) display pipe throughput (dispclk) * 2) max image width per slice @@ -669,13 +671,12 @@ static void get_dsc_enc_caps( { memset(dsc_enc_caps, 0, sizeof(struct dsc_enc_caps)); - if (!dsc) + if (!dsc || !dsc->ctx || !dsc->ctx->dc || dsc->ctx->dc->debug.disable_dsc) return; /* check if reported cap global or only for a single DCN DSC enc */ if (dsc->funcs->dsc_get_enc_caps) { - if (!dsc->ctx->dc->debug.disable_dsc) - dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz); + dsc->funcs->dsc_get_enc_caps(dsc_enc_caps, pixel_clock_100Hz); } else { build_dsc_enc_caps(dsc, dsc_enc_caps); } @@ -1295,10 +1296,10 @@ bool dc_dsc_compute_config( { bool is_dsc_possible = false; struct dsc_enc_caps dsc_enc_caps; - unsigned int min_slice_count; + unsigned int min_dsc_slice_count; get_dsc_enc_caps(dsc, &dsc_enc_caps, timing->pix_clk_100hz); - min_slice_count = get_min_slice_count_for_odm(dsc, &dsc_enc_caps, timing); + min_dsc_slice_count = get_min_dsc_slice_count_for_odm(dsc, &dsc_enc_caps, timing); is_dsc_possible = setup_dsc_config(dsc_sink_caps, &dsc_enc_caps, @@ -1306,7 +1307,7 @@ bool dc_dsc_compute_config( timing, options, link_encoding, - min_slice_count, + min_dsc_slice_count, dsc_cfg); return is_dsc_possible; } From c90f2e1172c51fa25492471dc9910e2d7c1444b9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 15 Jul 2025 16:50:22 -0700 Subject: [PATCH 344/358] drm/amdgpu: Initialize data to NULL in imu_v12_0_program_rlc_ram() After a recent change in clang to expose uninitialized warnings from const variables and pointers [1], there is a warning in imu_v12_0_program_rlc_ram() because data is passed uninitialized to program_imu_rlc_ram(): drivers/gpu/drm/amd/amdgpu/imu_v12_0.c:374:30: error: variable 'data' is uninitialized when used here [-Werror,-Wuninitialized] 374 | program_imu_rlc_ram(adev, data, (const u32)size); | ^~~~ As this warning happens early in clang's frontend, it does not realize that due to the assignment of r to -EINVAL, program_imu_rlc_ram() is never actually called, and even if it were, data would not be dereferenced because size is 0. Just initialize data to NULL to silence the warning, as the commit that added program_imu_rlc_ram() mentioned it would eventually be used over the old method, at which point data can be properly initialized and used. Cc: stable@vger.kernel.org Closes: https://github.com/ClangBuiltLinux/linux/issues/2107 Fixes: 56159fffaab5 ("drm/amdgpu: use new method to program rlc ram") Link: https://github.com/llvm/llvm-project/commit/2464313eef01c5b1edf0eccf57a32cdee01472c7 [1] Signed-off-by: Nathan Chancellor Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/imu_v12_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c index df898dbb746e..8cb6b1854d24 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -362,7 +362,7 @@ static void program_imu_rlc_ram(struct amdgpu_device *adev, static void imu_v12_0_program_rlc_ram(struct amdgpu_device *adev) { u32 reg_data, size = 0; - const u32 *data; + const u32 *data = NULL; int r = -EINVAL; WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2); From b4a69f7f29c8a459ad6b4d8a8b72450f1d9fd288 Mon Sep 17 00:00:00 2001 From: Peter Shkenev Date: Thu, 17 Jul 2025 23:48:17 +0300 Subject: [PATCH 345/358] drm/amdgpu: check if hubbub is NULL in debugfs/amdgpu_dm_capabilities HUBBUB structure is not initialized on DCE hardware, so check if it is NULL to avoid null dereference while accessing amdgpu_dm_capabilities file in debugfs. Signed-off-by: Peter Shkenev Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index c7d13e743e6c..b726bcd18e29 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3988,7 +3988,7 @@ static int capabilities_show(struct seq_file *m, void *unused) struct hubbub *hubbub = dc->res_pool->hubbub; - if (hubbub->funcs->get_mall_en) + if (hubbub && hubbub->funcs->get_mall_en) hubbub->funcs->get_mall_en(hubbub, &mall_in_use); if (dc->cap_funcs.get_subvp_en) From 284d4dfe850e665f0e7d4dfaf4d3d3da76d11fb0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Jun 2025 11:22:26 -0400 Subject: [PATCH 346/358] drm/amdgpu: track whether a queue is a kernel queue in amdgpu_mqd_prop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Used to to set the MQD appropriately for each queue type. Kernel queues have additional privileges. Acked-by: Christian König Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.16.x --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a1737556a77e..ef3af170dda4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -883,6 +883,7 @@ struct amdgpu_mqd_prop { uint64_t csa_addr; uint64_t fence_address; bool tmz_queue; + bool kernel_queue; }; struct amdgpu_mqd { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index a5c3f64cbce6..6379bb25bf5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -719,6 +719,7 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, prop->eop_gpu_addr = ring->eop_gpu_addr; prop->use_doorbell = ring->use_doorbell; prop->doorbell_index = ring->doorbell_index; + prop->kernel_queue = true; /* map_queues packet doesn't need activate the queue, * so only kiq need set this field. From 1f02f2044bda1db1fd995bc35961ab075fa7b5a2 Mon Sep 17 00:00:00 2001 From: Gang Ba Date: Tue, 8 Jul 2025 14:36:13 -0400 Subject: [PATCH 347/358] drm/amdgpu: Avoid extra evict-restore process. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If vm belongs to another process, this is fclose after fork, wait may enable signaling KFD eviction fence and cause parent process queue evicted. [677852.634569] amdkfd_fence_enable_signaling+0x56/0x70 [amdgpu] [677852.634814] __dma_fence_enable_signaling+0x3e/0xe0 [677852.634820] dma_fence_wait_timeout+0x3a/0x140 [677852.634825] amddma_resv_wait_timeout+0x7f/0xf0 [amdkcl] [677852.634831] amdgpu_vm_wait_idle+0x2d/0x60 [amdgpu] [677852.635026] amdgpu_flush+0x34/0x50 [amdgpu] [677852.635208] filp_flush+0x38/0x90 [677852.635213] filp_close+0x14/0x30 [677852.635216] do_close_on_exec+0xdd/0x130 [677852.635221] begin_new_exec+0x1da/0x490 [677852.635225] load_elf_binary+0x307/0xea0 [677852.635231] ? srso_alias_return_thunk+0x5/0xfbef5 [677852.635235] ? ima_bprm_check+0xa2/0xd0 [677852.635240] search_binary_handler+0xda/0x260 [677852.635245] exec_binprm+0x58/0x1a0 [677852.635249] bprm_execve.part.0+0x16f/0x210 [677852.635254] bprm_execve+0x45/0x80 [677852.635257] do_execveat_common.isra.0+0x190/0x200 Suggested-by: Christian König Signed-off-by: Gang Ba Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d5c0637d7392..5cacf5717016 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2414,13 +2414,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, - DMA_RESV_USAGE_BOOKKEEP, - true, timeout); + timeout = drm_sched_entity_flush(&vm->immediate, timeout); if (timeout <= 0) return timeout; - return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); + return drm_sched_entity_flush(&vm->delayed, timeout); } static void amdgpu_vm_destroy_task_info(struct kref *kref) From 9c2883057b3c861879b647f34e8bc448954e8729 Mon Sep 17 00:00:00 2001 From: Lauri Tirkkonen Date: Mon, 21 Jul 2025 09:59:40 +0900 Subject: [PATCH 348/358] drm/amd/display: fix initial backlight brightness calculation DIV_ROUND_CLOSEST(x, 100) returns either 0 or 1 if 0 Cc: stable@vger.kernel.org Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/aH2Q_HJvxKbW74vU@hacktheplanet.fi Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 096b23ad4845..468c9c5a6773 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4983,9 +4983,9 @@ amdgpu_dm_register_backlight_device(struct amdgpu_dm_connector *aconnector) caps = &dm->backlight_caps[aconnector->bl_idx]; if (get_brightness_range(caps, &min, &max)) { if (power_supply_is_system_supplied() > 0) - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->ac_level, 100); + props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->ac_level, 100); else - props.brightness = (max - min) * DIV_ROUND_CLOSEST(caps->dc_level, 100); + props.brightness = DIV_ROUND_CLOSEST((max - min) * caps->dc_level, 100); /* min is zero, so max needs to be adjusted */ props.max_brightness = max - min; drm_dbg(drm, "Backlight caps: min: %d, max: %d, ac %d, dc %d\n", min, max, From dfe9707c075a365ccd1f82cceabdf6ab55a77b5f Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 21 Jul 2025 18:52:36 +0530 Subject: [PATCH 349/358] drm/amd/display: Fix misuse of /** to /* in 'dce_i2c_hw.c' Fix the comment style before cntl_stuck_hw_workaround() by replacing '/**' with '/*' since it is not a kdoc comment. Fixes the below with gcc W=1: display/dc/dce/dce_i2c_hw.c:380: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * If we boot without an HDMI display, the I2C engine does not get initialized Fixes: 04d57f4462a6 ("drm/amd/display: Workaround for stuck I2C arbitrage") Cc: Alvin Lee Cc: Dominik Kaszewski Cc: Ivan Lipski Cc: Harry Wentland Cc: Tom Chung Cc: Roman Li Cc: Alex Hung Cc: Aurabindo Pillai Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Hung Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c index 4e06468a6284..0421b267a0b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_i2c_hw.c @@ -377,10 +377,16 @@ static bool setup_engine( } /** + * cntl_stuck_hw_workaround - Workaround for I2C engine stuck state + * @dce_i2c_hw: Pointer to dce_i2c_hw structure + * * If we boot without an HDMI display, the I2C engine does not get initialized * correctly. One of its symptoms is that SW_USE_I2C does not get cleared after - * acquire, so that after setting SW_DONE_USING_I2C on release, the engine gets + * acquire. After setting SW_DONE_USING_I2C on release, the engine gets * immediately reacquired by SW, preventing DMUB from using it. + * + * This function checks the I2C arbitration status and applies a release + * workaround if necessary. */ static void cntl_stuck_hw_workaround(struct dce_i2c_hw *dce_i2c_hw) { From 8e0d1edb5c16732b695eaf4bd7096b1569817cf0 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 24 Jul 2025 15:16:18 +0800 Subject: [PATCH 350/358] drm/amd/amdgpu: fix missing lock for cper.ring->rptr/wptr access Add lock protection for 'ring->wptr'/'ring->rptr' to ensure the correct execution. Fixes: 8652920d2c00 ("drm/amdgpu: add mutex lock for cper ring") Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index 15dde1f50328..25252231a68a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -459,7 +459,7 @@ static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos) void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) { - u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask; + u64 pos, wptr_old, rptr; int rec_cnt_dw = count >> 2; u32 chunk, ent_sz; u8 *s = (u8 *)src; @@ -472,9 +472,11 @@ void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) return; } - wptr_old = ring->wptr; - mutex_lock(&ring->adev->cper.ring_lock); + + wptr_old = ring->wptr; + rptr = *ring->rptr_cpu_addr & ring->ptr_mask; + while (count) { ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr); chunk = umin(ent_sz, count); From 0395cde08e1f7eee810b5799466e41635a21e599 Mon Sep 17 00:00:00 2001 From: Frank Min Date: Wed, 4 Jun 2025 21:39:34 +0800 Subject: [PATCH 351/358] drm/amdgpu: add kicker fws loading for gfx12/smu14/psp14 1. Add kicker firmwares loading for gfx12/smu14/psp14 2. Register additional MODULE_FIRMWARE entries for kicker fws - gc_12_0_1_rlc_kicker.bin - gc_12_0_1_imu_kicker.bin - psp_14_0_3_sos_kicker.bin - psp_14_0_3_ta_kicker.bin - smu_14_0_3_kicker.bin Signed-off-by: Frank Min Reviewed-by: Gui Chengming Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c | 14 ++++++++++---- drivers/gpu/drm/amd/amdgpu/imu_v12_0.c | 11 ++++++++--- drivers/gpu/drm/amd/amdgpu/psp_v14_0.c | 2 ++ drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c | 11 ++++++++--- 5 files changed, 29 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index a0b50a8ac9c4..e96f24e9ad57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -32,6 +32,7 @@ static const struct kicker_device kicker_device_list[] = { {0x744B, 0x00}, + {0x7551, 0xC8} }; static void amdgpu_ucode_print_common_hdr(const struct common_firmware_header *hdr) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 09bf72237d1d..3e138527d534 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -79,6 +79,7 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_1_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_me.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc_kicker.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin"); static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { @@ -586,7 +587,7 @@ static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char * static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) { - char ucode_prefix[15]; + char ucode_prefix[30]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -613,9 +614,14 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_rlc.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c index 8cb6b1854d24..58cd87db8061 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -34,12 +34,13 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_0_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu_kicker.bin"); #define TRANSFER_RAM_MASK 0x001c0000 static int imu_v12_0_init_microcode(struct amdgpu_device *adev) { - char ucode_prefix[15]; + char ucode_prefix[30]; int err; const struct imu_firmware_header_v1_0 *imu_hdr; struct amdgpu_firmware_info *info = NULL; @@ -47,8 +48,12 @@ static int imu_v12_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s_imu.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 36ef4a72ad1d..38dfc5c19f2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -34,7 +34,9 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta_kicker.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_5_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_5_ta.bin"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index 76c1adda83db..f9b0938c57ea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -62,13 +62,14 @@ const int decoded_link_width[8] = {0, 1, 2, 4, 8, 12, 16, 32}; MODULE_FIRMWARE("amdgpu/smu_14_0_2.bin"); MODULE_FIRMWARE("amdgpu/smu_14_0_3.bin"); +MODULE_FIRMWARE("amdgpu/smu_14_0_3_kicker.bin"); #define ENABLE_IMU_ARG_GFXOFF_ENABLE 1 int smu_v14_0_init_microcode(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - char ucode_prefix[15]; + char ucode_prefix[30]; int err = 0; const struct smc_firmware_header_v1_0 *hdr; const struct common_firmware_header *header; @@ -79,8 +80,12 @@ int smu_v14_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, - "amdgpu/%s.bin", ucode_prefix); + if (amdgpu_is_kicker_fw(adev)) + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_kicker.bin", ucode_prefix); + else + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; From 02f3ec53177243d32ee8b6f8ba99136d7887ee3a Mon Sep 17 00:00:00 2001 From: Roman Li Date: Mon, 14 Jul 2025 14:37:33 -0400 Subject: [PATCH 352/358] drm/amd/display: Disable dsc_power_gate for dcn314 by default [Why] "REG_WAIT timeout 1us * 1000 tries - dcn314_dsc_pg_control line" warnings seen after resuming from s2idle. DCN314 has issues with DSC power gating that cause REG_WAIT timeouts when attempting to power down DSC blocks. [How] Disable dsc_power_gate for dcn314 by default. Reviewed-by: Nicholas Kazlauskas Signed-off-by: Roman Li Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c index de708fdc1e80..663c49cce4aa 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn314/dcn314_resource.c @@ -926,6 +926,7 @@ static const struct dc_debug_options debug_defaults_drv = { .seamless_boot_odm_combine = true, .enable_legacy_fast_update = true, .using_dml2 = false, + .disable_dsc_power_gate = true, }; static const struct dc_panel_config panel_config_defaults = { From a5ce8695d6d1b40d6960d2d298b579042c158f25 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Sun, 6 Jul 2025 08:38:05 -0500 Subject: [PATCH 353/358] drm/amd/display: Avoid configuring PSR granularity if PSR-SU not supported [Why] If PSR-SU is disabled on the link, then configuring su_y granularity in mod_power_calc_psr_configs() can lead to assertions in psr_su_set_dsc_slice_height(). [How] Check the PSR version in amdgpu_dm_link_setup_psr() to determine whether or not to configure granularity. Reviewed-by: Sun peng (Leo) Li Signed-off-by: Mario Limonciello Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index f984cb0cb889..ff7b867ae98b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -119,8 +119,10 @@ bool amdgpu_dm_link_setup_psr(struct dc_stream_state *stream) psr_config.allow_multi_disp_optimizations = (amdgpu_dc_feature_mask & DC_PSR_ALLOW_MULTI_DISP_OPT); - if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config)) - return false; + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) { + if (!psr_su_set_dsc_slice_height(dc, link, stream, &psr_config)) + return false; + } ret = dc_link_setup_psr(link, stream, &psr_config, &psr_context); From b174084b3fe15ad1acc69530e673c1535d2e4f85 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 15 Jul 2025 14:41:46 -0500 Subject: [PATCH 354/358] drm/amd/display: Only finalize atomic_obj if it was initialized [Why] If amdgpu_dm failed to initalize before amdgpu_dm_initialize_drm_device() completed then freeing atomic_obj will lead to list corruption. [How] Check if atomic_obj state is initialized before trying to free. Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 468c9c5a6773..5db0df2b0ecb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5410,7 +5410,8 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm) { - drm_atomic_private_obj_fini(&dm->atomic_obj); + if (dm->atomic_obj.state) + drm_atomic_private_obj_fini(&dm->atomic_obj); } /****************************************************************************** From 2b6943df54136f40aff8a6d7ba7c26724d89a0bd Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 15 May 2025 15:16:17 -0500 Subject: [PATCH 355/358] drm/amd/display: Pass up errors for reset GPU that fails to init HW [Why] If a GPU is in reset and the hardware fails to initialize the rest of the resume sequence shouldn't be run. [How] Pass error code up to caller of dm_resume(). Reviewed-by: Alex Hung Signed-off-by: Mario Limonciello Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5db0df2b0ecb..2a175fc0399c 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3398,8 +3398,10 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) link_enc_cfg_copy(adev->dm.dc->current_state, dc_state); r = dm_dmub_hw_init(adev); - if (r) + if (r) { drm_err(adev_to_drm(adev), "DMUB interface failed to initialize: status=%d\n", r); + return r; + } dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D0); dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0); From 2d418e4fd9f1eca7dfce80de86dd702d36a06a25 Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Thu, 17 Jul 2025 13:58:35 -0400 Subject: [PATCH 356/358] drm/amd/display: Allow DCN301 to clear update flags [Why & How] Not letting DCN301 to clear after surface/stream update results in artifacts when switching between active overlay planes. The issue is known and has been solved initially. See below: (https://gitlab.freedesktop.org/drm/amd/-/issues/3441) Fixes: f354556e29f4 ("drm/amd/display: limit clear_update_flags t dcn32 and above") Reviewed-by: Mario Limonciello Signed-off-by: Ivan Lipski Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c31f7f8e409f..28aca7017f0f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -5443,7 +5443,8 @@ bool dc_update_planes_and_stream(struct dc *dc, else ret = update_planes_and_stream_v2(dc, srf_updates, surface_count, stream, stream_update); - if (ret && dc->ctx->dce_version >= DCN_VERSION_3_2) + if (ret && (dc->ctx->dce_version >= DCN_VERSION_3_2 || + dc->ctx->dce_version == DCN_VERSION_3_01)) clear_update_flags(srf_updates, surface_count, stream); return ret; From a0b34e4c8663b13e45c78267b4de3004b1a72490 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 18 Jul 2025 15:53:54 -0400 Subject: [PATCH 357/358] drm/amdgpu: update mmhub 4.1.0 client id mappings Update the client id mapping so the correct clients get printed when there is a mmhub page fault. Tested-by: David (Ming Qiang) Wu Reviewed-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c | 34 +++++++++-------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index f2ab5001b492..951998454b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -37,39 +37,31 @@ static const char *mmhub_client_ids_v4_1_0[][2] = { [0][0] = "VMC", [4][0] = "DCEDMC", - [5][0] = "DCEVGA", [6][0] = "MP0", [7][0] = "MP1", [8][0] = "MPIO", - [16][0] = "HDP", - [17][0] = "LSDMA", - [18][0] = "JPEG", - [19][0] = "VCNU0", - [21][0] = "VSCH", - [22][0] = "VCNU1", - [23][0] = "VCN1", - [32+20][0] = "VCN0", - [2][1] = "DBGUNBIO", + [16][0] = "LSDMA", + [17][0] = "JPEG", + [19][0] = "VCNU", + [22][0] = "VSCH", + [23][0] = "HDP", + [32+23][0] = "VCNRD", [3][1] = "DCEDWB", [4][1] = "DCEDMC", - [5][1] = "DCEVGA", [6][1] = "MP0", [7][1] = "MP1", [8][1] = "MPIO", [10][1] = "DBGU0", [11][1] = "DBGU1", - [12][1] = "DBGU2", - [13][1] = "DBGU3", + [12][1] = "DBGUNBIO", [14][1] = "XDP", [15][1] = "OSSSYS", - [16][1] = "HDP", - [17][1] = "LSDMA", - [18][1] = "JPEG", - [19][1] = "VCNU0", - [20][1] = "VCN0", - [21][1] = "VSCH", - [22][1] = "VCNU1", - [23][1] = "VCN1", + [16][1] = "LSDMA", + [17][1] = "JPEG", + [18][1] = "VCNWR", + [19][1] = "VCNU", + [22][1] = "VSCH", + [23][1] = "HDP", }; static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, From f62408efc8669b82541295a4611494c8c8c52684 Mon Sep 17 00:00:00 2001 From: Lukasz Laguna Date: Tue, 29 Jul 2025 14:34:37 +0200 Subject: [PATCH 358/358] drm/xe/vf: Disable CSC support on VF CSC is not accessible by VF drivers, so disable its support flag on VF to prevent further initialization attempts. Fixes: e02cea83d32d ("drm/xe/gsc: add Battlemage support") Signed-off-by: Lukasz Laguna Cc: Alexander Usyskin Cc: Michal Wajdeczko Reviewed-by: Michal Wajdeczko Signed-off-by: Michal Wajdeczko Link: https://lore.kernel.org/r/20250729123437.5933-1-lukasz.laguna@intel.com (cherry picked from commit 552dbba1caaf0cb40ce961806d757615e26ec668) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 6dc84e4ed281..5bd2f7d7b4ea 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -681,6 +681,7 @@ static void sriov_update_device_info(struct xe_device *xe) /* disable features that are not available/applicable to VFs */ if (IS_SRIOV_VF(xe)) { xe->info.probe_display = 0; + xe->info.has_heci_cscfi = 0; xe->info.has_heci_gscfi = 0; xe->info.skip_guc_pc = 1; xe->info.skip_pcode = 1;