mirror of
https://github.com/torvalds/linux.git
synced 2026-06-04 20:46:48 +02:00
Merge branch 'for-6.15-console-suspend-api-cleanup' into for-linus
This commit is contained in:
commit
f49040c7aa
|
|
@ -55,6 +55,15 @@ Description:
|
|||
An attribute which indicates whether the patch supports
|
||||
atomic-replace.
|
||||
|
||||
What: /sys/kernel/livepatch/<patch>/stack_order
|
||||
Date: Jan 2025
|
||||
KernelVersion: 6.14.0
|
||||
Description:
|
||||
This attribute specifies the sequence in which live patch modules
|
||||
are applied to the system. If multiple live patches modify the same
|
||||
function, the implementation with the biggest 'stack_order' number
|
||||
is used, unless a transition is currently in progress.
|
||||
|
||||
What: /sys/kernel/livepatch/<patch>/<object>
|
||||
Date: Nov 2014
|
||||
KernelVersion: 3.19.0
|
||||
|
|
|
|||
281
Documentation/accel/amdxdna/amdnpu.rst
Normal file
281
Documentation/accel/amdxdna/amdnpu.rst
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
=========
|
||||
AMD NPU
|
||||
=========
|
||||
|
||||
:Copyright: |copy| 2024 Advanced Micro Devices, Inc.
|
||||
:Author: Sonal Santan <sonal.santan@amd.com>
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
AMD NPU (Neural Processing Unit) is a multi-user AI inference accelerator
|
||||
integrated into AMD client APU. NPU enables efficient execution of Machine
|
||||
Learning applications like CNN, LLM, etc. NPU is based on
|
||||
`AMD XDNA Architecture`_. NPU is managed by **amdxdna** driver.
|
||||
|
||||
|
||||
Hardware Description
|
||||
====================
|
||||
|
||||
AMD NPU consists of the following hardware components:
|
||||
|
||||
AMD XDNA Array
|
||||
--------------
|
||||
|
||||
AMD XDNA Array comprises of 2D array of compute and memory tiles built with
|
||||
`AMD AI Engine Technology`_. Each column has 4 rows of compute tiles and 1
|
||||
row of memory tile. Each compute tile contains a VLIW processor with its own
|
||||
dedicated program and data memory. The memory tile acts as L2 memory. The 2D
|
||||
array can be partitioned at a column boundary creating a spatially isolated
|
||||
partition which can be bound to a workload context.
|
||||
|
||||
Each column also has dedicated DMA engines to move data between host DDR and
|
||||
memory tile.
|
||||
|
||||
AMD Phoenix and AMD Hawk Point client NPU have a 4x5 topology, i.e., 4 rows of
|
||||
compute tiles arranged into 5 columns. AMD Strix Point client APU have 4x8
|
||||
topology, i.e., 4 rows of compute tiles arranged into 8 columns.
|
||||
|
||||
Shared L2 Memory
|
||||
----------------
|
||||
|
||||
The single row of memory tiles create a pool of software managed on chip L2
|
||||
memory. DMA engines are used to move data between host DDR and memory tiles.
|
||||
AMD Phoenix and AMD Hawk Point NPUs have a total of 2560 KB of L2 memory.
|
||||
AMD Strix Point NPU has a total of 4096 KB of L2 memory.
|
||||
|
||||
Microcontroller
|
||||
---------------
|
||||
|
||||
A microcontroller runs NPU Firmware which is responsible for command processing,
|
||||
XDNA Array partition setup, XDNA Array configuration, workload context
|
||||
management and workload orchestration.
|
||||
|
||||
NPU Firmware uses a dedicated instance of an isolated non-privileged context
|
||||
called ERT to service each workload context. ERT is also used to execute user
|
||||
provided ``ctrlcode`` associated with the workload context.
|
||||
|
||||
NPU Firmware uses a single isolated privileged context called MERT to service
|
||||
management commands from the amdxdna driver.
|
||||
|
||||
Mailboxes
|
||||
---------
|
||||
|
||||
The microcontroller and amdxdna driver use a privileged channel for management
|
||||
tasks like setting up of contexts, telemetry, query, error handling, setting up
|
||||
user channel, etc. As mentioned before, privileged channel requests are
|
||||
serviced by MERT. The privileged channel is bound to a single mailbox.
|
||||
|
||||
The microcontroller and amdxdna driver use a dedicated user channel per
|
||||
workload context. The user channel is primarily used for submitting work to
|
||||
the NPU. As mentioned before, a user channel requests are serviced by an
|
||||
instance of ERT. Each user channel is bound to its own dedicated mailbox.
|
||||
|
||||
PCIe EP
|
||||
-------
|
||||
|
||||
NPU is visible to the x86 host CPU as a PCIe device with multiple BARs and some
|
||||
MSI-X interrupt vectors. NPU uses a dedicated high bandwidth SoC level fabric
|
||||
for reading or writing into host memory. Each instance of ERT gets its own
|
||||
dedicated MSI-X interrupt. MERT gets a single instance of MSI-X interrupt.
|
||||
|
||||
The number of PCIe BARs varies depending on the specific device. Based on their
|
||||
functions, PCIe BARs can generally be categorized into the following types.
|
||||
|
||||
* PSP BAR: Expose the AMD PSP (Platform Security Processor) function
|
||||
* SMU BAR: Expose the AMD SMU (System Management Unit) function
|
||||
* SRAM BAR: Expose ring buffers for the mailbox
|
||||
* Mailbox BAR: Expose the mailbox control registers (head, tail and ISR
|
||||
registers etc.)
|
||||
* Public Register BAR: Expose public registers
|
||||
|
||||
On specific devices, the above-mentioned BAR type might be combined into a
|
||||
single physical PCIe BAR. Or a module might require two physical PCIe BARs to
|
||||
be fully functional. For example,
|
||||
|
||||
* On AMD Phoenix device, PSP, SMU, Public Register BARs are on PCIe BAR index 0.
|
||||
* On AMD Strix Point device, Mailbox and Public Register BARs are on PCIe BAR
|
||||
index 0. The PSP has some registers in PCIe BAR index 0 (Public Register BAR)
|
||||
and PCIe BAR index 4 (PSP BAR).
|
||||
|
||||
Process Isolation Hardware
|
||||
--------------------------
|
||||
|
||||
As explained before, XDNA Array can be dynamically divided into isolated
|
||||
spatial partitions, each of which may have one or more columns. The spatial
|
||||
partition is setup by programming the column isolation registers by the
|
||||
microcontroller. Each spatial partition is associated with a PASID which is
|
||||
also programmed by the microcontroller. Hence multiple spatial partitions in
|
||||
the NPU can make concurrent host access protected by PASID.
|
||||
|
||||
The NPU FW itself uses microcontroller MMU enforced isolated contexts for
|
||||
servicing user and privileged channel requests.
|
||||
|
||||
|
||||
Mixed Spatial and Temporal Scheduling
|
||||
=====================================
|
||||
|
||||
AMD XDNA architecture supports mixed spatial and temporal (time sharing)
|
||||
scheduling of 2D array. This means that spatial partitions may be setup and
|
||||
torn down dynamically to accommodate various workloads. A *spatial* partition
|
||||
may be *exclusively* bound to one workload context while another partition may
|
||||
be *temporarily* bound to more than one workload contexts. The microcontroller
|
||||
updates the PASID for a temporarily shared partition to match the context that
|
||||
has been bound to the partition at any moment.
|
||||
|
||||
Resource Solver
|
||||
---------------
|
||||
|
||||
The Resource Solver component of the amdxdna driver manages the allocation
|
||||
of 2D array among various workloads. Every workload describes the number
|
||||
of columns required to run the NPU binary in its metadata. The Resource Solver
|
||||
component uses hints passed by the workload and its own heuristics to
|
||||
decide 2D array (re)partition strategy and mapping of workloads for spatial and
|
||||
temporal sharing of columns. The FW enforces the context-to-column(s) resource
|
||||
binding decisions made by the Resource Solver.
|
||||
|
||||
AMD Phoenix and AMD Hawk Point client NPU can support 6 concurrent workload
|
||||
contexts. AMD Strix Point can support 16 concurrent workload contexts.
|
||||
|
||||
|
||||
Application Binaries
|
||||
====================
|
||||
|
||||
A NPU application workload is comprised of two separate binaries which are
|
||||
generated by the NPU compiler.
|
||||
|
||||
1. AMD XDNA Array overlay, which is used to configure a NPU spatial partition.
|
||||
The overlay contains instructions for setting up the stream switch
|
||||
configuration and ELF for the compute tiles. The overlay is loaded on the
|
||||
spatial partition bound to the workload by the associated ERT instance.
|
||||
Refer to the
|
||||
`Versal Adaptive SoC AIE-ML Architecture Manual (AM020)`_ for more details.
|
||||
|
||||
2. ``ctrlcode``, used for orchestrating the overlay loaded on the spatial
|
||||
partition. ``ctrlcode`` is executed by the ERT running in protected mode on
|
||||
the microcontroller in the context of the workload. ``ctrlcode`` is made up
|
||||
of a sequence of opcodes named ``XAie_TxnOpcode``. Refer to the
|
||||
`AI Engine Run Time`_ for more details.
|
||||
|
||||
|
||||
Special Host Buffers
|
||||
====================
|
||||
|
||||
Per-context Instruction Buffer
|
||||
------------------------------
|
||||
|
||||
Every workload context uses a host resident 64 MB buffer which is memory
|
||||
mapped into the ERT instance created to service the workload. The ``ctrlcode``
|
||||
used by the workload is copied into this special memory. This buffer is
|
||||
protected by PASID like all other input/output buffers used by that workload.
|
||||
Instruction buffer is also mapped into the user space of the workload.
|
||||
|
||||
Global Privileged Buffer
|
||||
------------------------
|
||||
|
||||
In addition, the driver also allocates a single buffer for maintenance tasks
|
||||
like recording errors from MERT. This global buffer uses the global IOMMU
|
||||
domain and is only accessible by MERT.
|
||||
|
||||
|
||||
High-level Use Flow
|
||||
===================
|
||||
|
||||
Here are the steps to run a workload on AMD NPU:
|
||||
|
||||
1. Compile the workload into an overlay and a ``ctrlcode`` binary.
|
||||
2. Userspace opens a context in the driver and provides the overlay.
|
||||
3. The driver checks with the Resource Solver for provisioning a set of columns
|
||||
for the workload.
|
||||
4. The driver then asks MERT to create a context on the device with the desired
|
||||
columns.
|
||||
5. MERT then creates an instance of ERT. MERT also maps the Instruction Buffer
|
||||
into ERT memory.
|
||||
6. The userspace then copies the ``ctrlcode`` to the Instruction Buffer.
|
||||
7. Userspace then creates a command buffer with pointers to input, output, and
|
||||
instruction buffer; it then submits command buffer with the driver and goes
|
||||
to sleep waiting for completion.
|
||||
8. The driver sends the command over the Mailbox to ERT.
|
||||
9. ERT *executes* the ``ctrlcode`` in the instruction buffer.
|
||||
10. Execution of the ``ctrlcode`` kicks off DMAs to and from the host DDR while
|
||||
AMD XDNA Array is running.
|
||||
11. When ERT reaches end of ``ctrlcode``, it raises an MSI-X to send completion
|
||||
signal to the driver which then wakes up the waiting workload.
|
||||
|
||||
|
||||
Boot Flow
|
||||
=========
|
||||
|
||||
amdxdna driver uses PSP to securely load signed NPU FW and kick off the boot
|
||||
of the NPU microcontroller. amdxdna driver then waits for the alive signal in
|
||||
a special location on BAR 0. The NPU is switched off during SoC suspend and
|
||||
turned on after resume where the NPU FW is reloaded, and the handshake is
|
||||
performed again.
|
||||
|
||||
|
||||
Userspace components
|
||||
====================
|
||||
|
||||
Compiler
|
||||
--------
|
||||
|
||||
Peano is an LLVM based open-source compiler for AMD XDNA Array compute tile
|
||||
available at:
|
||||
https://github.com/Xilinx/llvm-aie
|
||||
|
||||
The open-source IREE compiler supports graph compilation of ML models for AMD
|
||||
NPU and uses Peano underneath. It is available at:
|
||||
https://github.com/nod-ai/iree-amd-aie
|
||||
|
||||
Usermode Driver (UMD)
|
||||
---------------------
|
||||
|
||||
The open-source XRT runtime stack interfaces with amdxdna kernel driver. XRT
|
||||
can be found at:
|
||||
https://github.com/Xilinx/XRT
|
||||
|
||||
The open-source XRT shim for NPU is can be found at:
|
||||
https://github.com/amd/xdna-driver
|
||||
|
||||
|
||||
DMA Operation
|
||||
=============
|
||||
|
||||
DMA operation instructions are encoded in the ``ctrlcode`` as
|
||||
``XAIE_IO_BLOCKWRITE`` opcode. When ERT executes ``XAIE_IO_BLOCKWRITE``, DMA
|
||||
operations between host DDR and L2 memory are effected.
|
||||
|
||||
|
||||
Error Handling
|
||||
==============
|
||||
|
||||
When MERT detects an error in AMD XDNA Array, it pauses execution for that
|
||||
workload context and sends an asynchronous message to the driver over the
|
||||
privileged channel. The driver then sends a buffer pointer to MERT to capture
|
||||
the register states for the partition bound to faulting workload context. The
|
||||
driver then decodes the error by reading the contents of the buffer pointer.
|
||||
|
||||
|
||||
Telemetry
|
||||
=========
|
||||
|
||||
MERT can report various kinds of telemetry information like the following:
|
||||
|
||||
* L1 interrupt counter
|
||||
* DMA counter
|
||||
* Deep Sleep counter
|
||||
* etc.
|
||||
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
- `AMD XDNA Architecture <https://www.amd.com/en/technologies/xdna.html>`_
|
||||
- `AMD AI Engine Technology <https://www.xilinx.com/products/technology/ai-engine.html>`_
|
||||
- `Peano <https://github.com/Xilinx/llvm-aie>`_
|
||||
- `Versal Adaptive SoC AIE-ML Architecture Manual (AM020) <https://docs.amd.com/r/en-US/am020-versal-aie-ml>`_
|
||||
- `AI Engine Run Time <https://github.com/Xilinx/aie-rt/tree/release/main_aig>`_
|
||||
11
Documentation/accel/amdxdna/index.rst
Normal file
11
Documentation/accel/amdxdna/index.rst
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
=====================================
|
||||
accel/amdxdna NPU driver
|
||||
=====================================
|
||||
|
||||
The accel/amdxdna driver supports the AMD NPU (Neural Processing Unit).
|
||||
|
||||
.. toctree::
|
||||
|
||||
amdnpu
|
||||
|
|
@ -8,6 +8,7 @@ Compute Accelerators
|
|||
:maxdepth: 1
|
||||
|
||||
introduction
|
||||
amdxdna/index
|
||||
qaic/index
|
||||
|
||||
.. only:: subproject and html
|
||||
|
|
|
|||
|
|
@ -64,13 +64,14 @@ v1 is available under :ref:`Documentation/admin-guide/cgroup-v1/index.rst <cgrou
|
|||
5-6. Device
|
||||
5-7. RDMA
|
||||
5-7-1. RDMA Interface Files
|
||||
5-8. HugeTLB
|
||||
5.8-1. HugeTLB Interface Files
|
||||
5-9. Misc
|
||||
5.9-1 Miscellaneous cgroup Interface Files
|
||||
5.9-2 Migration and Ownership
|
||||
5-10. Others
|
||||
5-10-1. perf_event
|
||||
5-8. DMEM
|
||||
5-9. HugeTLB
|
||||
5.9-1. HugeTLB Interface Files
|
||||
5-10. Misc
|
||||
5.10-1 Miscellaneous cgroup Interface Files
|
||||
5.10-2 Migration and Ownership
|
||||
5-11. Others
|
||||
5-11-1. perf_event
|
||||
5-N. Non-normative information
|
||||
5-N-1. CPU controller root cgroup process behaviour
|
||||
5-N-2. IO controller root cgroup process behaviour
|
||||
|
|
@ -2626,6 +2627,49 @@ RDMA Interface Files
|
|||
mlx4_0 hca_handle=1 hca_object=20
|
||||
ocrdma1 hca_handle=1 hca_object=23
|
||||
|
||||
DMEM
|
||||
----
|
||||
|
||||
The "dmem" controller regulates the distribution and accounting of
|
||||
device memory regions. Because each memory region may have its own page size,
|
||||
which does not have to be equal to the system page size, the units are always bytes.
|
||||
|
||||
DMEM Interface Files
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
dmem.max, dmem.min, dmem.low
|
||||
A readwrite nested-keyed file that exists for all the cgroups
|
||||
except root that describes current configured resource limit
|
||||
for a region.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 1073741824
|
||||
drm/0000:03:00.0/stolen max
|
||||
|
||||
The semantics are the same as for the memory cgroup controller, and are
|
||||
calculated in the same way.
|
||||
|
||||
dmem.capacity
|
||||
A read-only file that describes maximum region capacity.
|
||||
It only exists on the root cgroup. Not all memory can be
|
||||
allocated by cgroups, as the kernel reserves some for
|
||||
internal use.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 8514437120
|
||||
drm/0000:03:00.0/stolen 67108864
|
||||
|
||||
dmem.current
|
||||
A read-only file that describes current resource usage.
|
||||
It exists for all the cgroup except root.
|
||||
|
||||
An example for xe follows::
|
||||
|
||||
drm/0000:03:00.0/vram0 12550144
|
||||
drm/0000:03:00.0/stolen 8650752
|
||||
|
||||
HugeTLB
|
||||
-------
|
||||
|
||||
|
|
|
|||
|
|
@ -5532,7 +5532,42 @@
|
|||
|
||||
rcutorture.gp_cond= [KNL]
|
||||
Use conditional/asynchronous update-side
|
||||
primitives, if available.
|
||||
normal-grace-period primitives, if available.
|
||||
|
||||
rcutorture.gp_cond_exp= [KNL]
|
||||
Use conditional/asynchronous update-side
|
||||
expedited-grace-period primitives, if available.
|
||||
|
||||
rcutorture.gp_cond_full= [KNL]
|
||||
Use conditional/asynchronous update-side
|
||||
normal-grace-period primitives that also take
|
||||
concurrent expedited grace periods into account,
|
||||
if available.
|
||||
|
||||
rcutorture.gp_cond_exp_full= [KNL]
|
||||
Use conditional/asynchronous update-side
|
||||
expedited-grace-period primitives that also take
|
||||
concurrent normal grace periods into account,
|
||||
if available.
|
||||
|
||||
rcutorture.gp_cond_wi= [KNL]
|
||||
Nominal wait interval for normal conditional
|
||||
grace periods (specified by rcutorture's
|
||||
gp_cond and gp_cond_full module parameters),
|
||||
in microseconds. The actual wait interval will
|
||||
be randomly selected to nanosecond granularity up
|
||||
to this wait interval. Defaults to 16 jiffies,
|
||||
for example, 16,000 microseconds on a system
|
||||
with HZ=1000.
|
||||
|
||||
rcutorture.gp_cond_wi_exp= [KNL]
|
||||
Nominal wait interval for expedited conditional
|
||||
grace periods (specified by rcutorture's
|
||||
gp_cond_exp and gp_cond_exp_full module
|
||||
parameters), in microseconds. The actual wait
|
||||
interval will be randomly selected to nanosecond
|
||||
granularity up to this wait interval. Defaults to
|
||||
128 microseconds.
|
||||
|
||||
rcutorture.gp_exp= [KNL]
|
||||
Use expedited update-side primitives, if available.
|
||||
|
|
@ -5541,6 +5576,43 @@
|
|||
Use normal (non-expedited) asynchronous
|
||||
update-side primitives, if available.
|
||||
|
||||
rcutorture.gp_poll= [KNL]
|
||||
Use polled update-side normal-grace-period
|
||||
primitives, if available.
|
||||
|
||||
rcutorture.gp_poll_exp= [KNL]
|
||||
Use polled update-side expedited-grace-period
|
||||
primitives, if available.
|
||||
|
||||
rcutorture.gp_poll_full= [KNL]
|
||||
Use polled update-side normal-grace-period
|
||||
primitives that also take concurrent expedited
|
||||
grace periods into account, if available.
|
||||
|
||||
rcutorture.gp_poll_exp_full= [KNL]
|
||||
Use polled update-side expedited-grace-period
|
||||
primitives that also take concurrent normal
|
||||
grace periods into account, if available.
|
||||
|
||||
rcutorture.gp_poll_wi= [KNL]
|
||||
Nominal wait interval for normal conditional
|
||||
grace periods (specified by rcutorture's
|
||||
gp_poll and gp_poll_full module parameters),
|
||||
in microseconds. The actual wait interval will
|
||||
be randomly selected to nanosecond granularity up
|
||||
to this wait interval. Defaults to 16 jiffies,
|
||||
for example, 16,000 microseconds on a system
|
||||
with HZ=1000.
|
||||
|
||||
rcutorture.gp_poll_wi_exp= [KNL]
|
||||
Nominal wait interval for expedited conditional
|
||||
grace periods (specified by rcutorture's
|
||||
gp_poll_exp and gp_poll_exp_full module
|
||||
parameters), in microseconds. The actual wait
|
||||
interval will be randomly selected to nanosecond
|
||||
granularity up to this wait interval. Defaults to
|
||||
128 microseconds.
|
||||
|
||||
rcutorture.gp_sync= [KNL]
|
||||
Use normal (non-expedited) synchronous
|
||||
update-side primitives, if available. If all
|
||||
|
|
@ -5594,6 +5666,22 @@
|
|||
Set time (jiffies) between CPU-hotplug operations,
|
||||
or zero to disable CPU-hotplug testing.
|
||||
|
||||
rcutorture.preempt_duration= [KNL]
|
||||
Set duration (in milliseconds) of preemptions
|
||||
by a high-priority FIFO real-time task. Set to
|
||||
zero (the default) to disable. The CPUs to
|
||||
preempt are selected randomly from the set that
|
||||
are online at a given point in time. Races with
|
||||
CPUs going offline are ignored, with that attempt
|
||||
at preemption skipped.
|
||||
|
||||
rcutorture.preempt_interval= [KNL]
|
||||
Set interval (in milliseconds, defaulting to one
|
||||
second) between preemptions by a high-priority
|
||||
FIFO real-time task. This delay is mediated
|
||||
by an hrtimer and is further fuzzed to avoid
|
||||
inadvertent synchronizations.
|
||||
|
||||
rcutorture.read_exit_burst= [KNL]
|
||||
The number of times in a given read-then-exit
|
||||
episode that a set of read-then-exit kthreads
|
||||
|
|
|
|||
9
Documentation/core-api/cgroup.rst
Normal file
9
Documentation/core-api/cgroup.rst
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
==================
|
||||
Cgroup Kernel APIs
|
||||
==================
|
||||
|
||||
Device Memory Cgroup API (dmemcg)
|
||||
=================================
|
||||
.. kernel-doc:: kernel/cgroup/dmem.c
|
||||
:export:
|
||||
|
||||
|
|
@ -109,6 +109,7 @@ more memory-management documentation in Documentation/mm/index.rst.
|
|||
dma-isa-lpc
|
||||
swiotlb
|
||||
mm-api
|
||||
cgroup
|
||||
genalloc
|
||||
pin_user_pages
|
||||
boot-time-mm
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ properties:
|
|||
enum:
|
||||
- brcm,bcm2711-hdmi0
|
||||
- brcm,bcm2711-hdmi1
|
||||
- brcm,bcm2712-hdmi0
|
||||
- brcm,bcm2712-hdmi1
|
||||
|
||||
reg:
|
||||
items:
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ properties:
|
|||
compatible:
|
||||
enum:
|
||||
- brcm,bcm2711-hvs
|
||||
- brcm,bcm2712-hvs
|
||||
- brcm,bcm2835-hvs
|
||||
|
||||
reg:
|
||||
|
|
@ -36,7 +37,9 @@ if:
|
|||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: brcm,bcm2711-hvs
|
||||
enum:
|
||||
- brcm,bcm2711-hvs
|
||||
- brcm,bcm2712-hvs
|
||||
|
||||
then:
|
||||
required:
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ properties:
|
|||
- brcm,bcm2711-pixelvalve2
|
||||
- brcm,bcm2711-pixelvalve3
|
||||
- brcm,bcm2711-pixelvalve4
|
||||
- brcm,bcm2712-pixelvalve0
|
||||
- brcm,bcm2712-pixelvalve1
|
||||
- brcm,bcm2712-pixelvalve2
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
|
|
|||
|
|
@ -11,7 +11,10 @@ maintainers:
|
|||
|
||||
properties:
|
||||
compatible:
|
||||
const: brcm,bcm2835-txp
|
||||
enum:
|
||||
- brcm,bcm2712-mop
|
||||
- brcm,bcm2712-moplet
|
||||
- brcm,bcm2835-txp
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ properties:
|
|||
compatible:
|
||||
enum:
|
||||
- brcm,bcm2711-vc5
|
||||
- brcm,bcm2712-vc6
|
||||
- brcm,bcm2835-vc4
|
||||
- brcm,cygnus-vc4
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ properties:
|
|||
enum:
|
||||
- renesas,r8a779a0-dsi-csi2-tx # for V3U
|
||||
- renesas,r8a779g0-dsi-csi2-tx # for V4H
|
||||
- renesas,r8a779h0-dsi-csi2-tx # for V4M
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
|
|
|||
|
|
@ -80,12 +80,12 @@ properties:
|
|||
- const: 4
|
||||
|
||||
port@2:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description: Video port for LVDS Channel-A output (panel or bridge).
|
||||
$ref: '#/$defs/lvds-port'
|
||||
|
||||
port@3:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description: Video port for LVDS Channel-B output (panel or bridge).
|
||||
$ref: '#/$defs/lvds-port'
|
||||
|
||||
required:
|
||||
- port@0
|
||||
|
|
@ -96,6 +96,36 @@ required:
|
|||
- reg
|
||||
- ports
|
||||
|
||||
$defs:
|
||||
lvds-port:
|
||||
$ref: /schemas/graph.yaml#/$defs/port-base
|
||||
unevaluatedProperties: false
|
||||
|
||||
properties:
|
||||
endpoint:
|
||||
$ref: /schemas/media/video-interfaces.yaml#
|
||||
unevaluatedProperties: false
|
||||
|
||||
properties:
|
||||
ti,lvds-termination-ohms:
|
||||
description: The value of near end differential termination in ohms.
|
||||
enum: [100, 200]
|
||||
default: 200
|
||||
|
||||
ti,lvds-vod-swing-clock-microvolt:
|
||||
description: LVDS diferential output voltage <min max> for clock
|
||||
lanes in microvolts.
|
||||
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
ti,lvds-vod-swing-data-microvolt:
|
||||
description: LVDS diferential output voltage <min max> for data
|
||||
lanes in microvolts.
|
||||
$ref: /schemas/types.yaml#/definitions/uint32-array
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
allOf:
|
||||
- if:
|
||||
properties:
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ title: MSM Display Port Controller
|
|||
|
||||
maintainers:
|
||||
- Kuogee Hsieh <quic_khsieh@quicinc.com>
|
||||
- Abhinav Kumar <quic_abhinavk@quicinc.com>
|
||||
|
||||
description: |
|
||||
Device tree bindings for DisplayPort host controller for MSM targets
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ properties:
|
|||
- qcom,sdm845-dsi-ctrl
|
||||
- qcom,sm6115-dsi-ctrl
|
||||
- qcom,sm6125-dsi-ctrl
|
||||
- qcom,sm6150-dsi-ctrl
|
||||
- qcom,sm6350-dsi-ctrl
|
||||
- qcom,sm6375-dsi-ctrl
|
||||
- qcom,sm7150-dsi-ctrl
|
||||
|
|
@ -349,6 +350,7 @@ allOf:
|
|||
enum:
|
||||
- qcom,sc7180-dsi-ctrl
|
||||
- qcom,sc7280-dsi-ctrl
|
||||
- qcom,sm6150-dsi-ctrl
|
||||
- qcom,sm7150-dsi-ctrl
|
||||
- qcom,sm8150-dsi-ctrl
|
||||
- qcom,sm8250-dsi-ctrl
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ properties:
|
|||
- qcom,dsi-phy-14nm-660
|
||||
- qcom,dsi-phy-14nm-8953
|
||||
- qcom,sm6125-dsi-phy-14nm
|
||||
- qcom,sm6150-dsi-phy-14nm
|
||||
|
||||
reg:
|
||||
items:
|
||||
|
|
|
|||
|
|
@ -168,7 +168,8 @@ examples:
|
|||
reg = <0xaf54000 0x104>,
|
||||
<0xaf54200 0x0c0>,
|
||||
<0xaf55000 0x770>,
|
||||
<0xaf56000 0x09c>;
|
||||
<0xaf56000 0x09c>,
|
||||
<0xaf57000 0x09c>;
|
||||
|
||||
interrupt-parent = <&mdss0>;
|
||||
interrupts = <12>;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,108 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/msm/qcom,sm6150-dpu.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm SM6150 Display DPU
|
||||
|
||||
maintainers:
|
||||
- Abhinav Kumar <quic_abhinavk@quicinc.com>
|
||||
- Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
|
||||
|
||||
$ref: /schemas/display/msm/dpu-common.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6150-dpu
|
||||
|
||||
reg:
|
||||
items:
|
||||
- description: Address offset and size for mdp register set
|
||||
- description: Address offset and size for vbif register set
|
||||
|
||||
reg-names:
|
||||
items:
|
||||
- const: mdp
|
||||
- const: vbif
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: Display ahb clock
|
||||
- description: Display hf axi clock
|
||||
- description: Display core clock
|
||||
- description: Display vsync clock
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: iface
|
||||
- const: bus
|
||||
- const: core
|
||||
- const: vsync
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/power/qcom,rpmhpd.h>
|
||||
|
||||
display-controller@ae01000 {
|
||||
compatible = "qcom,sm6150-dpu";
|
||||
reg = <0x0ae01000 0x8f000>,
|
||||
<0x0aeb0000 0x2008>;
|
||||
reg-names = "mdp", "vbif";
|
||||
|
||||
clocks = <&dispcc_mdss_ahb_clk>,
|
||||
<&gcc_disp_hf_axi_clk>,
|
||||
<&dispcc_mdss_mdp_clk>,
|
||||
<&dispcc_mdss_vsync_clk>;
|
||||
clock-names = "iface", "bus", "core", "vsync";
|
||||
|
||||
assigned-clocks = <&dispcc_mdss_vsync_clk>;
|
||||
assigned-clock-rates = <19200000>;
|
||||
|
||||
operating-points-v2 = <&mdp_opp_table>;
|
||||
power-domains = <&rpmhpd RPMHPD_CX>;
|
||||
|
||||
interrupt-parent = <&mdss>;
|
||||
interrupts = <0>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
dpu_intf0_out: endpoint {
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
dpu_intf1_out: endpoint {
|
||||
remote-endpoint = <&mdss_dsi0_in>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mdp_opp_table: opp-table {
|
||||
compatible = "operating-points-v2";
|
||||
|
||||
opp-19200000 {
|
||||
opp-hz = /bits/ 64 <19200000>;
|
||||
required-opps = <&rpmhpd_opp_low_svs>;
|
||||
};
|
||||
|
||||
opp-25600000 {
|
||||
opp-hz = /bits/ 64 <25600000>;
|
||||
required-opps = <&rpmhpd_opp_svs>;
|
||||
};
|
||||
|
||||
opp-307200000 {
|
||||
opp-hz = /bits/ 64 <307200000>;
|
||||
required-opps = <&rpmhpd_opp_nom>;
|
||||
};
|
||||
};
|
||||
};
|
||||
...
|
||||
|
|
@ -0,0 +1,245 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/msm/qcom,sm6150-mdss.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Qualcomm SM6150 Display MDSS
|
||||
|
||||
maintainers:
|
||||
- Abhinav Kumar <quic_abhinavk@quicinc.com>
|
||||
- Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
|
||||
|
||||
description:
|
||||
Device tree bindings for MSM Mobile Display Subsystem(MDSS) that encapsulates
|
||||
sub-blocks like DPU display controller, DSI and DP interfaces etc. Device tree
|
||||
bindings of MDSS are mentioned for SM6150 target.
|
||||
|
||||
$ref: /schemas/display/msm/mdss-common.yaml#
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: qcom,sm6150-mdss
|
||||
|
||||
clocks:
|
||||
items:
|
||||
- description: Display AHB clock from gcc
|
||||
- description: Display hf axi clock
|
||||
- description: Display core clock
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: iface
|
||||
- const: bus
|
||||
- const: core
|
||||
|
||||
iommus:
|
||||
maxItems: 1
|
||||
|
||||
interconnects:
|
||||
maxItems: 2
|
||||
|
||||
interconnect-names:
|
||||
maxItems: 2
|
||||
|
||||
patternProperties:
|
||||
"^display-controller@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6150-dpu
|
||||
|
||||
"^dsi@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
properties:
|
||||
compatible:
|
||||
items:
|
||||
- const: qcom,sm6150-dsi-ctrl
|
||||
- const: qcom,mdss-dsi-ctrl
|
||||
|
||||
"^phy@[0-9a-f]+$":
|
||||
type: object
|
||||
additionalProperties: true
|
||||
properties:
|
||||
compatible:
|
||||
const: qcom,sm6150-dsi-phy-14nm
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/qcom,rpmh.h>
|
||||
#include <dt-bindings/interconnect/qcom,icc.h>
|
||||
#include <dt-bindings/interconnect/qcom,qcs615-rpmh.h>
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/power/qcom,rpmhpd.h>
|
||||
|
||||
display-subsystem@ae00000 {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
compatible = "qcom,sm6150-mdss";
|
||||
reg = <0x0ae00000 0x1000>;
|
||||
reg-names = "mdss";
|
||||
|
||||
interconnects = <&mmss_noc MASTER_MDP0 QCOM_ICC_TAG_ALWAYS
|
||||
&mc_virt SLAVE_EBI1 QCOM_ICC_TAG_ALWAYS>,
|
||||
<&gem_noc MASTER_APPSS_PROC QCOM_ICC_TAG_ACTIVE_ONLY
|
||||
&config_noc SLAVE_DISPLAY_CFG QCOM_ICC_TAG_ACTIVE_ONLY>;
|
||||
interconnect-names = "mdp0-mem", "cpu-cfg";
|
||||
|
||||
power-domains = <&dispcc_mdss_gdsc>;
|
||||
|
||||
clocks = <&dispcc_mdss_ahb_clk>,
|
||||
<&gcc_disp_hf_axi_clk>,
|
||||
<&dispcc_mdss_mdp_clk>;
|
||||
|
||||
interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <1>;
|
||||
|
||||
iommus = <&apps_smmu 0x800 0x0>;
|
||||
|
||||
ranges;
|
||||
|
||||
display-controller@ae01000 {
|
||||
compatible = "qcom,sm6150-dpu";
|
||||
reg = <0x0ae01000 0x8f000>,
|
||||
<0x0aeb0000 0x2008>;
|
||||
reg-names = "mdp", "vbif";
|
||||
|
||||
clocks = <&dispcc_mdss_ahb_clk>,
|
||||
<&gcc_disp_hf_axi_clk>,
|
||||
<&dispcc_mdss_mdp_clk>,
|
||||
<&dispcc_mdss_vsync_clk>;
|
||||
clock-names = "iface", "bus", "core", "vsync";
|
||||
|
||||
assigned-clocks = <&dispcc_mdss_vsync_clk>;
|
||||
assigned-clock-rates = <19200000>;
|
||||
|
||||
operating-points-v2 = <&mdp_opp_table>;
|
||||
power-domains = <&rpmhpd RPMHPD_CX>;
|
||||
|
||||
interrupt-parent = <&mdss>;
|
||||
interrupts = <0>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
dpu_intf0_out: endpoint {
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
dpu_intf1_out: endpoint {
|
||||
remote-endpoint = <&mdss_dsi0_in>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mdp_opp_table: opp-table {
|
||||
compatible = "operating-points-v2";
|
||||
|
||||
opp-19200000 {
|
||||
opp-hz = /bits/ 64 <19200000>;
|
||||
required-opps = <&rpmhpd_opp_low_svs>;
|
||||
};
|
||||
|
||||
opp-25600000 {
|
||||
opp-hz = /bits/ 64 <25600000>;
|
||||
required-opps = <&rpmhpd_opp_svs>;
|
||||
};
|
||||
|
||||
opp-307200000 {
|
||||
opp-hz = /bits/ 64 <307200000>;
|
||||
required-opps = <&rpmhpd_opp_nom>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
dsi@ae94000 {
|
||||
compatible = "qcom,sm6150-dsi-ctrl",
|
||||
"qcom,mdss-dsi-ctrl";
|
||||
reg = <0x0ae94000 0x400>;
|
||||
reg-names = "dsi_ctrl";
|
||||
|
||||
interrupt-parent = <&mdss>;
|
||||
interrupts = <4>;
|
||||
|
||||
clocks = <&dispcc_mdss_byte0_clk>,
|
||||
<&dispcc_mdss_byte0_intf_clk>,
|
||||
<&dispcc_mdss_pclk0_clk>,
|
||||
<&dispcc_mdss_esc0_clk>,
|
||||
<&dispcc_mdss_ahb_clk>,
|
||||
<&gcc_disp_hf_axi_clk>;
|
||||
clock-names = "byte",
|
||||
"byte_intf",
|
||||
"pixel",
|
||||
"core",
|
||||
"iface",
|
||||
"bus";
|
||||
|
||||
assigned-clocks = <&dispcc_mdss_byte0_clk_src>,
|
||||
<&dispcc_mdss_pclk0_clk_src>;
|
||||
assigned-clock-parents = <&mdss_dsi0_phy 0>,
|
||||
<&mdss_dsi0_phy 1>;
|
||||
|
||||
operating-points-v2 = <&dsi0_opp_table>;
|
||||
|
||||
phys = <&mdss_dsi0_phy>;
|
||||
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
|
||||
port@0 {
|
||||
reg = <0>;
|
||||
mdss_dsi0_in: endpoint {
|
||||
remote-endpoint = <&dpu_intf1_out>;
|
||||
};
|
||||
};
|
||||
|
||||
port@1 {
|
||||
reg = <1>;
|
||||
mdss_dsi0_out: endpoint {
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
dsi0_opp_table: opp-table {
|
||||
compatible = "operating-points-v2";
|
||||
|
||||
opp-164000000 {
|
||||
opp-hz = /bits/ 64 <164000000>;
|
||||
required-opps = <&rpmhpd_opp_low_svs>;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
mdss_dsi0_phy: phy@ae94400 {
|
||||
compatible = "qcom,sm6150-dsi-phy-14nm";
|
||||
reg = <0x0ae94400 0x100>,
|
||||
<0x0ae94500 0x300>,
|
||||
<0x0ae94800 0x188>;
|
||||
reg-names = "dsi_phy",
|
||||
"dsi_phy_lane",
|
||||
"dsi_pll";
|
||||
|
||||
#clock-cells = <1>;
|
||||
#phy-cells = <0>;
|
||||
|
||||
clocks = <&dispcc_mdss_ahb_clk>,
|
||||
<&rpmhcc RPMH_CXO_CLK>;
|
||||
clock-names = "iface", "ref";
|
||||
};
|
||||
};
|
||||
...
|
||||
|
|
@ -42,6 +42,8 @@ properties:
|
|||
# Admatec 9904379 10.1" 1024x600 LVDS panel
|
||||
- admatec,9904379
|
||||
- auo,b101ew05
|
||||
# AUO G084SN05 V9 8.4" 800x600 LVDS panel
|
||||
- auo,g084sn05
|
||||
# Chunghwa Picture Tubes Ltd. 7" WXGA (800x1280) TFT LCD LVDS panel
|
||||
- chunghwa,claa070wp03xg
|
||||
# EDT ETML0700Z9NDHA 7.0" WSVGA (1024x600) color TFT LCD LVDS panel
|
||||
|
|
|
|||
|
|
@ -206,12 +206,16 @@ properties:
|
|||
- mitsubishi,aa070mc01-ca1
|
||||
# Mitsubishi AA084XE01 8.4" XGA TFT LCD panel
|
||||
- mitsubishi,aa084xe01
|
||||
# Multi-Inno Technology Co.,Ltd MI0700A2T-30 7" 800x480 TFT Resistive Touch Module
|
||||
- multi-inno,mi0700a2t-30
|
||||
# Multi-Inno Technology Co.,Ltd MI0700S4T-6 7" 800x480 TFT Resistive Touch Module
|
||||
- multi-inno,mi0700s4t-6
|
||||
# Multi-Inno Technology Co.,Ltd MI0800FT-9 8" 800x600 TFT Resistive Touch Module
|
||||
- multi-inno,mi0800ft-9
|
||||
# Multi-Inno Technology Co.,Ltd MI1010AIT-1CP 10.1" 1280x800 LVDS IPS Cap Touch Mod.
|
||||
- multi-inno,mi1010ait-1cp
|
||||
# Multi-Inno Technology Co.,Ltd MI1010Z1T-1CP11 10.1" 1024x600 TFT Resistive Touch Module
|
||||
- multi-inno,mi1010z1t-1cp11
|
||||
# NEC LCD Technologies, Ltd. 12.1" WXGA (1280x800) LVDS TFT LCD panel
|
||||
- nec,nl12880bc20-05
|
||||
# NEC LCD Technologies,Ltd. WQVGA TFT LCD panel
|
||||
|
|
@ -280,10 +284,14 @@ properties:
|
|||
- team-source-display,tst043015cmhx
|
||||
# Tianma Micro-electronics TM070JDHG30 7.0" WXGA TFT LCD panel
|
||||
- tianma,tm070jdhg30
|
||||
# Tianma Micro-electronics TM070JDHG34-00 7.0" WXGA (1280x800) LVDS TFT LCD panel
|
||||
- tianma,tm070jdhg34-00
|
||||
# Tianma Micro-electronics TM070JVHG33 7.0" WXGA TFT LCD panel
|
||||
- tianma,tm070jvhg33
|
||||
# Tianma Micro-electronics TM070RVHG71 7.0" WXGA TFT LCD panel
|
||||
- tianma,tm070rvhg71
|
||||
# Topland TIAN-G07017-01 7.0" WSVGA TFT-LCD panel with capacitive touch
|
||||
- topland,tian-g07017-01
|
||||
# Toshiba 8.9" WXGA (1280x768) TFT LCD panel
|
||||
- toshiba,lt089ac29000
|
||||
# TPK U.S.A. LLC Fusion 7" 800 x 480 (WVGA) LCD panel with capacitive touch
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ properties:
|
|||
- samsung,atna45af01
|
||||
# Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel
|
||||
- samsung,atna45dc02
|
||||
# Samsung 15.6" 3K (2880x1620 pixels) eDP AMOLED panel
|
||||
- samsung,atna56ac03
|
||||
- const: samsung,atna33xc20
|
||||
|
||||
enable-gpios: true
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ properties:
|
|||
- renesas,du-r8a77995 # for R-Car D3 compatible DU
|
||||
- renesas,du-r8a779a0 # for R-Car V3U compatible DU
|
||||
- renesas,du-r8a779g0 # for R-Car V4H compatible DU
|
||||
- renesas,du-r8a779h0 # for R-Car V4M compatible DU
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
|
@ -69,14 +70,12 @@ properties:
|
|||
$ref: /schemas/graph.yaml#/properties/port
|
||||
unevaluatedProperties: false
|
||||
|
||||
required:
|
||||
- port@0
|
||||
- port@1
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
renesas,cmms:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle-array
|
||||
minItems: 1
|
||||
maxItems: 4
|
||||
items:
|
||||
maxItems: 1
|
||||
description:
|
||||
|
|
@ -85,6 +84,8 @@ properties:
|
|||
|
||||
renesas,vsps:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle-array
|
||||
minItems: 1
|
||||
maxItems: 4
|
||||
items:
|
||||
items:
|
||||
- description: phandle to VSP instance that serves the DU channel
|
||||
|
|
@ -489,9 +490,11 @@ allOf:
|
|||
|
||||
renesas,cmms:
|
||||
minItems: 4
|
||||
maxItems: 4
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 4
|
||||
maxItems: 4
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
|
|
@ -558,9 +561,11 @@ allOf:
|
|||
|
||||
renesas,cmms:
|
||||
minItems: 3
|
||||
maxItems: 3
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 3
|
||||
maxItems: 3
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
|
|
@ -627,9 +632,11 @@ allOf:
|
|||
|
||||
renesas,cmms:
|
||||
minItems: 3
|
||||
maxItems: 3
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 3
|
||||
maxItems: 3
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
|
|
@ -683,7 +690,7 @@ allOf:
|
|||
- port@1
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 1
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
|
|
@ -746,9 +753,11 @@ allOf:
|
|||
|
||||
renesas,cmms:
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
renesas,vsps:
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
|
|
@ -799,6 +808,54 @@ allOf:
|
|||
|
||||
renesas,vsps:
|
||||
minItems: 2
|
||||
maxItems: 2
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
- interrupts
|
||||
- resets
|
||||
- reset-names
|
||||
- renesas,vsps
|
||||
|
||||
- if:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
enum:
|
||||
- renesas,du-r8a779h0
|
||||
then:
|
||||
properties:
|
||||
clocks:
|
||||
items:
|
||||
- description: Functional clock
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: du.0
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
reset-names:
|
||||
items:
|
||||
- const: du.0
|
||||
|
||||
ports:
|
||||
properties:
|
||||
port@0:
|
||||
description: DSI 0
|
||||
port@1: false
|
||||
port@2: false
|
||||
port@3: false
|
||||
|
||||
required:
|
||||
- port@0
|
||||
|
||||
renesas,vsps:
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- clock-names
|
||||
|
|
|
|||
|
|
@ -0,0 +1,120 @@
|
|||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/display/rockchip/rockchip,rk3588-mipi-dsi2.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Rockchip specific extensions to the Synopsys Designware MIPI DSI2
|
||||
|
||||
maintainers:
|
||||
- Heiko Stuebner <heiko@sntech.de>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- rockchip,rk3588-mipi-dsi2
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
interrupts:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
maxItems: 2
|
||||
|
||||
clock-names:
|
||||
items:
|
||||
- const: pclk
|
||||
- const: sys
|
||||
|
||||
rockchip,grf:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description:
|
||||
This SoC uses GRF regs to switch between vopl/vopb.
|
||||
|
||||
phys:
|
||||
maxItems: 1
|
||||
|
||||
phy-names:
|
||||
const: dcphy
|
||||
|
||||
power-domains:
|
||||
maxItems: 1
|
||||
|
||||
resets:
|
||||
maxItems: 1
|
||||
|
||||
reset-names:
|
||||
const: apb
|
||||
|
||||
ports:
|
||||
$ref: /schemas/graph.yaml#/properties/ports
|
||||
|
||||
properties:
|
||||
port@0:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description: Input node to receive pixel data.
|
||||
|
||||
port@1:
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
description: DSI output node to panel.
|
||||
|
||||
required:
|
||||
- port@0
|
||||
- port@1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- clocks
|
||||
- clock-names
|
||||
- rockchip,grf
|
||||
- phys
|
||||
- phy-names
|
||||
- ports
|
||||
- reg
|
||||
|
||||
allOf:
|
||||
- $ref: /schemas/display/dsi-controller.yaml#
|
||||
|
||||
unevaluatedProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/clock/rockchip,rk3588-cru.h>
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
#include <dt-bindings/interrupt-controller/irq.h>
|
||||
#include <dt-bindings/phy/phy.h>
|
||||
#include <dt-bindings/power/rk3588-power.h>
|
||||
#include <dt-bindings/reset/rockchip,rk3588-cru.h>
|
||||
|
||||
soc {
|
||||
#address-cells = <2>;
|
||||
#size-cells = <2>;
|
||||
|
||||
dsi@fde20000 {
|
||||
compatible = "rockchip,rk3588-mipi-dsi2";
|
||||
reg = <0x0 0xfde20000 0x0 0x10000>;
|
||||
interrupts = <GIC_SPI 167 IRQ_TYPE_LEVEL_HIGH 0>;
|
||||
clocks = <&cru PCLK_DSIHOST0>, <&cru CLK_DSIHOST0>;
|
||||
clock-names = "pclk", "sys";
|
||||
resets = <&cru SRST_P_DSIHOST0>;
|
||||
reset-names = "apb";
|
||||
power-domains = <&power RK3588_PD_VOP>;
|
||||
phys = <&mipidcphy0 PHY_TYPE_DPHY>;
|
||||
phy-names = "dcphy";
|
||||
rockchip,grf = <&vop_grf>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
#size-cells = <0>;
|
||||
dsi0_in: port@0 {
|
||||
reg = <0>;
|
||||
};
|
||||
|
||||
dsi0_out: port@1 {
|
||||
reg = <1>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
|
@ -100,12 +100,16 @@ properties:
|
|||
- description: Video layer, plane 1 (U/V or U)
|
||||
- description: Video layer, plane 2 (V)
|
||||
- description: Graphics layer
|
||||
- description: Audio channel 0
|
||||
- description: Audio channel 1
|
||||
dma-names:
|
||||
items:
|
||||
- const: vid0
|
||||
- const: vid1
|
||||
- const: vid2
|
||||
- const: gfx0
|
||||
- const: aud0
|
||||
- const: aud1
|
||||
|
||||
phys:
|
||||
description: PHYs for the DP data lanes
|
||||
|
|
@ -194,11 +198,13 @@ examples:
|
|||
power-domains = <&pd_dp>;
|
||||
resets = <&reset ZYNQMP_RESET_DP>;
|
||||
|
||||
dma-names = "vid0", "vid1", "vid2", "gfx0";
|
||||
dma-names = "vid0", "vid1", "vid2", "gfx0", "aud0", "aud1";
|
||||
dmas = <&xlnx_dpdma 0>,
|
||||
<&xlnx_dpdma 1>,
|
||||
<&xlnx_dpdma 2>,
|
||||
<&xlnx_dpdma 3>;
|
||||
<&xlnx_dpdma 3>,
|
||||
<&xlnx_dpdma 4>,
|
||||
<&xlnx_dpdma 5>;
|
||||
|
||||
phys = <&psgtr 1 PHY_TYPE_DP 0 3>,
|
||||
<&psgtr 0 PHY_TYPE_DP 1 3>;
|
||||
|
|
|
|||
|
|
@ -1524,6 +1524,8 @@ patternProperties:
|
|||
description: Topeet
|
||||
"^topic,.*":
|
||||
description: Topic Embedded Systems
|
||||
"^topland,.*":
|
||||
description: Topland Electronics (H.K) Co., Ltd.
|
||||
"^toppoly,.*":
|
||||
description: TPO (deprecated, use tpo)
|
||||
deprecated: true
|
||||
|
|
|
|||
54
Documentation/gpu/drm-compute.rst
Normal file
54
Documentation/gpu/drm-compute.rst
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
==================================
|
||||
Long running workloads and compute
|
||||
==================================
|
||||
|
||||
Long running workloads (compute) are workloads that will not complete in 10
|
||||
seconds. (The time let the user wait before he reaches for the power button).
|
||||
This means that other techniques need to be used to manage those workloads,
|
||||
that cannot use fences.
|
||||
|
||||
Some hardware may schedule compute jobs, and have no way to pre-empt them, or
|
||||
have their memory swapped out from them. Or they simply want their workload
|
||||
not to be preempted or swapped out at all.
|
||||
|
||||
This means that it differs from what is described in driver-api/dma-buf.rst.
|
||||
|
||||
As with normal compute jobs, dma-fence may not be used at all. In this case,
|
||||
not even to force preemption. The driver with is simply forced to unmap a BO
|
||||
from the long compute job's address space on unbind immediately, not even
|
||||
waiting for the workload to complete. Effectively this terminates the workload
|
||||
when there is no hardware support to recover.
|
||||
|
||||
Since this is undesirable, there need to be mitigations to prevent a workload
|
||||
from being terminated. There are several possible approach, all with their
|
||||
advantages and drawbacks.
|
||||
|
||||
The first approach you will likely try is to pin all buffers used by compute.
|
||||
This guarantees that the job will run uninterrupted, but also allows a very
|
||||
denial of service attack by pinning as much memory as possible, hogging the
|
||||
all GPU memory, and possibly a huge chunk of CPU memory.
|
||||
|
||||
A second approach that will work slightly better on its own is adding an option
|
||||
not to evict when creating a new job (any kind). If all of userspace opts in
|
||||
to this flag, it would prevent cooperating userspace from forced terminating
|
||||
older compute jobs to start a new one.
|
||||
|
||||
If job preemption and recoverable pagefaults are not available, those are the
|
||||
only approaches possible. So even with those, you want a separate way of
|
||||
controlling resources. The standard kernel way of doing so is cgroups.
|
||||
|
||||
This creates a third option, using cgroups to prevent eviction. Both GPU and
|
||||
driver-allocated CPU memory would be accounted to the correct cgroup, and
|
||||
eviction would be made cgroup aware. This allows the GPU to be partitioned
|
||||
into cgroups, that will allow jobs to run next to each other without
|
||||
interference.
|
||||
|
||||
The interface to the cgroup would be similar to the current CPU memory
|
||||
interface, with similar semantics for min/low/high/max, if eviction can
|
||||
be made cgroup aware.
|
||||
|
||||
What should be noted is that each memory region (tiled memory for example)
|
||||
should have its own accounting.
|
||||
|
||||
The key is set to the regionid set by the driver, for example "tile0".
|
||||
For the value of $card, we use drmGetUnique().
|
||||
|
|
@ -221,6 +221,9 @@ Panel Helper Reference
|
|||
.. kernel-doc:: drivers/gpu/drm/drm_panel_orientation_quirks.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/drm_panel_backlight_quirks.c
|
||||
:export:
|
||||
|
||||
Panel Self Refresh Helper Reference
|
||||
===================================
|
||||
|
||||
|
|
|
|||
|
|
@ -145,57 +145,57 @@ both.
|
|||
Memory
|
||||
^^^^^^
|
||||
|
||||
- drm-memory-<region>: <uint> [KiB|MiB]
|
||||
|
||||
Each possible memory type which can be used to store buffer objects by the
|
||||
GPU in question shall be given a stable and unique name to be returned as the
|
||||
string here.
|
||||
Each possible memory type which can be used to store buffer objects by the GPU
|
||||
in question shall be given a stable and unique name to be used as the "<region>"
|
||||
string.
|
||||
|
||||
The region name "memory" is reserved to refer to normal system memory.
|
||||
|
||||
Value shall reflect the amount of storage currently consumed by the buffer
|
||||
The value shall reflect the amount of storage currently consumed by the buffer
|
||||
objects belong to this client, in the respective memory region.
|
||||
|
||||
Default unit shall be bytes with optional unit specifiers of 'KiB' or 'MiB'
|
||||
indicating kibi- or mebi-bytes.
|
||||
|
||||
This key is deprecated and is an alias for drm-resident-<region>. Only one of
|
||||
the two should be present in the output.
|
||||
- drm-total-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of all requested buffers, including both shared and private
|
||||
memory. The backing store for the buffers does not need to be currently
|
||||
instantiated to count under this category. To avoid double-counting, if a buffer
|
||||
has multiple regions where it can be allocated to, the implementation should
|
||||
consistently select a single region for accounting purposes.
|
||||
|
||||
- drm-shared-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of buffers that are shared with another file (e.g., have more
|
||||
than a single handle).
|
||||
|
||||
- drm-total-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of all created buffers including shared and private memory. The
|
||||
backing store for the buffers does not have to be currently instantiated to be
|
||||
counted under this category.
|
||||
The total size of buffers that are shared with another file (i.e., have more
|
||||
than one handle). The same requirement to avoid double-counting that applies to
|
||||
drm-total-<region> also applies here.
|
||||
|
||||
- drm-resident-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of buffers that are resident (have their backing store present or
|
||||
instantiated) in the specified region.
|
||||
The total size of buffers that are resident (i.e., have their backing store
|
||||
present or instantiated) in the specified region.
|
||||
|
||||
This is an alias for drm-memory-<region> and only one of the two should be
|
||||
present in the output.
|
||||
- drm-memory-<region>: <uint> [KiB|MiB]
|
||||
|
||||
This key is deprecated and is only printed by amdgpu; it is an alias for
|
||||
drm-resident-<region>.
|
||||
|
||||
- drm-purgeable-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of buffers that are purgeable.
|
||||
The total size of buffers that are resident and purgeable.
|
||||
|
||||
For example drivers which implement a form of 'madvise' like functionality can
|
||||
here count buffers which have instantiated backing store, but have been marked
|
||||
with an equivalent of MADV_DONTNEED.
|
||||
For example, drivers that implement functionality similar to 'madvise' can count
|
||||
buffers that have instantiated backing stores but have been marked with an
|
||||
equivalent of MADV_DONTNEED.
|
||||
|
||||
- drm-active-<region>: <uint> [KiB|MiB]
|
||||
|
||||
The total size of buffers that are active on one or more engines.
|
||||
|
||||
One practical example of this can be presence of unsignaled fences in an GEM
|
||||
buffer reservation object. Therefore the active category is a subset of
|
||||
resident.
|
||||
One practical example of this could be the presence of unsignaled fences in a
|
||||
GEM buffer reservation object. Therefore, the active category is a subset of the
|
||||
resident category.
|
||||
|
||||
Implementation Details
|
||||
======================
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ GPU Driver Developer's Guide
|
|||
drm-usage-stats
|
||||
driver-uapi
|
||||
drm-client
|
||||
drm-compute
|
||||
drivers
|
||||
backlight
|
||||
vga-switcheroo
|
||||
|
|
|
|||
|
|
@ -23,4 +23,5 @@ DG2, etc is provided to prototype the driver.
|
|||
xe_firmware
|
||||
xe_tile
|
||||
xe_debugging
|
||||
xe_devcoredump
|
||||
xe-drm-usage-stats.rst
|
||||
|
|
|
|||
14
Documentation/gpu/xe/xe_devcoredump.rst
Normal file
14
Documentation/gpu/xe/xe_devcoredump.rst
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
.. SPDX-License-Identifier: (GPL-2.0+ OR MIT)
|
||||
|
||||
==================
|
||||
Xe Device Coredump
|
||||
==================
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c
|
||||
:doc: Xe device coredump
|
||||
|
||||
Internal API
|
||||
============
|
||||
|
||||
.. kernel-doc:: drivers/gpu/drm/xe/xe_devcoredump.c
|
||||
:internal:
|
||||
|
|
@ -9,9 +9,10 @@ Fprobe - Function entry/exit probe
|
|||
Introduction
|
||||
============
|
||||
|
||||
Fprobe is a function entry/exit probe mechanism based on ftrace.
|
||||
Instead of using ftrace full feature, if you only want to attach callbacks
|
||||
on function entry and exit, similar to the kprobes and kretprobes, you can
|
||||
Fprobe is a function entry/exit probe based on the function-graph tracing
|
||||
feature in ftrace.
|
||||
Instead of tracing all functions, if you want to attach callbacks on specific
|
||||
function entry and exit, similar to the kprobes and kretprobes, you can
|
||||
use fprobe. Compared with kprobes and kretprobes, fprobe gives faster
|
||||
instrumentation for multiple functions with single handler. This document
|
||||
describes how to use fprobe.
|
||||
|
|
@ -91,12 +92,14 @@ The prototype of the entry/exit callback function are as follows:
|
|||
|
||||
.. code-block:: c
|
||||
|
||||
int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long ret_ip, struct pt_regs *regs, void *entry_data);
|
||||
int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long ret_ip, struct ftrace_regs *fregs, void *entry_data);
|
||||
|
||||
void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long ret_ip, struct pt_regs *regs, void *entry_data);
|
||||
void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long ret_ip, struct ftrace_regs *fregs, void *entry_data);
|
||||
|
||||
Note that the @entry_ip is saved at function entry and passed to exit handler.
|
||||
If the entry callback function returns !0, the corresponding exit callback will be cancelled.
|
||||
Note that the @entry_ip is saved at function entry and passed to exit
|
||||
handler.
|
||||
If the entry callback function returns !0, the corresponding exit callback
|
||||
will be cancelled.
|
||||
|
||||
@fp
|
||||
This is the address of `fprobe` data structure related to this handler.
|
||||
|
|
@ -112,12 +115,10 @@ If the entry callback function returns !0, the corresponding exit callback will
|
|||
This is the return address that the traced function will return to,
|
||||
somewhere in the caller. This can be used at both entry and exit.
|
||||
|
||||
@regs
|
||||
This is the `pt_regs` data structure at the entry and exit. Note that
|
||||
the instruction pointer of @regs may be different from the @entry_ip
|
||||
in the entry_handler. If you need traced instruction pointer, you need
|
||||
to use @entry_ip. On the other hand, in the exit_handler, the instruction
|
||||
pointer of @regs is set to the current return address.
|
||||
@fregs
|
||||
This is the `ftrace_regs` data structure at the entry and exit. This
|
||||
includes the function parameters, or the return values. So user can
|
||||
access thos values via appropriate `ftrace_regs_*` APIs.
|
||||
|
||||
@entry_data
|
||||
This is a local storage to share the data between entry and exit handlers.
|
||||
|
|
@ -125,6 +126,17 @@ If the entry callback function returns !0, the corresponding exit callback will
|
|||
and `entry_data_size` field when registering the fprobe, the storage is
|
||||
allocated and passed to both `entry_handler` and `exit_handler`.
|
||||
|
||||
Entry data size and exit handlers on the same function
|
||||
======================================================
|
||||
|
||||
Since the entry data is passed via per-task stack and it has limited size,
|
||||
the entry data size per probe is limited to `15 * sizeof(long)`. You also need
|
||||
to take care that the different fprobes are probing on the same function, this
|
||||
limit becomes smaller. The entry data size is aligned to `sizeof(long)` and
|
||||
each fprobe which has exit handler uses a `sizeof(long)` space on the stack,
|
||||
you should keep the number of fprobes on the same function as small as
|
||||
possible.
|
||||
|
||||
Share the callbacks with kprobes
|
||||
================================
|
||||
|
||||
|
|
@ -165,8 +177,8 @@ This counter counts up when;
|
|||
- fprobe fails to take ftrace_recursion lock. This usually means that a function
|
||||
which is traced by other ftrace users is called from the entry_handler.
|
||||
|
||||
- fprobe fails to setup the function exit because of the shortage of rethook
|
||||
(the shadow stack for hooking the function return.)
|
||||
- fprobe fails to setup the function exit because of failing to allocate the
|
||||
data buffer from the per-task shadow stack.
|
||||
|
||||
The `fprobe::nmissed` field counts up in both cases. Therefore, the former
|
||||
skips both of entry and exit callback and the latter skips the exit
|
||||
|
|
|
|||
27
MAINTAINERS
27
MAINTAINERS
|
|
@ -1201,6 +1201,17 @@ L: linux-spi@vger.kernel.org
|
|||
S: Supported
|
||||
F: drivers/spi/spi-amd.c
|
||||
|
||||
AMD XDNA DRIVER
|
||||
M: Min Ma <min.ma@amd.com>
|
||||
M: Lizhi Hou <lizhi.hou@amd.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Supported
|
||||
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
||||
F: Documentation/accel/amdxdna/
|
||||
F: drivers/accel/amdxdna/
|
||||
F: include/trace/events/amdxdna.h
|
||||
F: include/uapi/drm/amdxdna_accel.h
|
||||
|
||||
AMD XGBE DRIVER
|
||||
M: "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
|
||||
L: netdev@vger.kernel.org
|
||||
|
|
@ -7076,7 +7087,8 @@ T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
|||
F: drivers/gpu/drm/sun4i/sun8i*
|
||||
|
||||
DRM DRIVER FOR ARM PL111 CLCD
|
||||
S: Orphan
|
||||
M: Linus Walleij <linus.walleij@linaro.org>
|
||||
S: Maintained
|
||||
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
||||
F: drivers/gpu/drm/pl111/
|
||||
|
||||
|
|
@ -7391,7 +7403,7 @@ L: virtualization@lists.linux.dev
|
|||
S: Obsolete
|
||||
W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
|
||||
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
||||
F: drivers/gpu/drm/tiny/cirrus.c
|
||||
F: drivers/gpu/drm/tiny/cirrus-qemu.c
|
||||
|
||||
DRM DRIVER FOR QXL VIRTUAL GPU
|
||||
M: Dave Airlie <airlied@redhat.com>
|
||||
|
|
@ -7802,6 +7814,7 @@ F: drivers/gpu/drm/rockchip/
|
|||
|
||||
DRM DRIVERS FOR STI
|
||||
M: Alain Volmat <alain.volmat@foss.st.com>
|
||||
M: Raphael Gallais-Pou <rgallaispou@gmail.com>
|
||||
L: dri-devel@lists.freedesktop.org
|
||||
S: Maintained
|
||||
T: git https://gitlab.freedesktop.org/drm/misc/kernel.git
|
||||
|
|
@ -13315,7 +13328,7 @@ L: linux-kernel@vger.kernel.org
|
|||
L: linux-arch@vger.kernel.org
|
||||
L: lkmm@lists.linux.dev
|
||||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev
|
||||
F: Documentation/atomic_bitops.txt
|
||||
F: Documentation/atomic_t.txt
|
||||
F: Documentation/core-api/refcount-vs-atomic.rst
|
||||
|
|
@ -19629,7 +19642,7 @@ R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
|||
R: Lai Jiangshan <jiangshanlai@gmail.com>
|
||||
L: rcu@vger.kernel.org
|
||||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev
|
||||
F: tools/testing/selftests/rcutorture
|
||||
|
||||
RDACM20 Camera Sensor
|
||||
|
|
@ -19708,7 +19721,7 @@ R: Zqiang <qiang.zhang1211@gmail.com>
|
|||
L: rcu@vger.kernel.org
|
||||
S: Supported
|
||||
W: http://www.rdrop.com/users/paulmck/RCU/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev
|
||||
F: Documentation/RCU/
|
||||
F: include/linux/rcu*
|
||||
F: kernel/rcu/
|
||||
|
|
@ -21606,7 +21619,7 @@ R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
|
|||
L: rcu@vger.kernel.org
|
||||
S: Supported
|
||||
W: http://www.rdrop.com/users/paulmck/RCU/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev
|
||||
F: include/linux/srcu*.h
|
||||
F: kernel/rcu/srcu*.c
|
||||
|
||||
|
|
@ -23731,7 +23744,7 @@ M: "Paul E. McKenney" <paulmck@kernel.org>
|
|||
M: Josh Triplett <josh@joshtriplett.org>
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git dev
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev
|
||||
F: Documentation/RCU/torture.rst
|
||||
F: kernel/locking/locktorture.c
|
||||
F: kernel/rcu/rcuscale.c
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ config ARC
|
|||
# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
|
||||
select GENERIC_IRQ_SHOW
|
||||
select GENERIC_PCI_IOMAP
|
||||
select GENERIC_PENDING_IRQ if SMP
|
||||
select GENERIC_SCHED_CLOCK
|
||||
select GENERIC_SMP_IDLE_THREAD
|
||||
select GENERIC_IOREMAP
|
||||
|
|
|
|||
|
|
@ -357,8 +357,6 @@ static void idu_cascade_isr(struct irq_desc *desc)
|
|||
static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
|
||||
{
|
||||
irq_set_chip_and_handler(virq, &idu_irq_chip, handle_level_irq);
|
||||
irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -127,29 +127,6 @@ void crash_smp_send_stop(void)
|
|||
cpus_stopped = 1;
|
||||
}
|
||||
|
||||
static void machine_kexec_mask_interrupts(void)
|
||||
{
|
||||
unsigned int i;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_irq_desc(i, desc) {
|
||||
struct irq_chip *chip;
|
||||
|
||||
chip = irq_desc_get_chip(desc);
|
||||
if (!chip)
|
||||
continue;
|
||||
|
||||
if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
|
||||
chip->irq_eoi(&desc->irq_data);
|
||||
|
||||
if (chip->irq_mask)
|
||||
chip->irq_mask(&desc->irq_data);
|
||||
|
||||
if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
|
||||
chip->irq_disable(&desc->irq_data);
|
||||
}
|
||||
}
|
||||
|
||||
void machine_crash_shutdown(struct pt_regs *regs)
|
||||
{
|
||||
local_irq_disable();
|
||||
|
|
|
|||
|
|
@ -149,6 +149,7 @@ config ARM64
|
|||
select GENERIC_IDLE_POLL_SETUP
|
||||
select GENERIC_IOREMAP
|
||||
select GENERIC_IRQ_IPI
|
||||
select GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD
|
||||
select GENERIC_IRQ_PROBE
|
||||
select GENERIC_IRQ_SHOW
|
||||
select GENERIC_IRQ_SHOW_LEVEL
|
||||
|
|
@ -216,9 +217,11 @@ config ARM64
|
|||
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
select HAVE_GUP_FAST
|
||||
select HAVE_FTRACE_GRAPH_FUNC
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_FUNCTION_ERROR_INJECTION
|
||||
select HAVE_FUNCTION_GRAPH_FREGS
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_RETVAL
|
||||
select HAVE_GCC_PLUGINS
|
||||
|
|
|
|||
|
|
@ -135,8 +135,6 @@ config ARCH_K3
|
|||
select SOC_TI
|
||||
select TI_MESSAGE_MANAGER
|
||||
select TI_SCI_PROTOCOL
|
||||
select TI_SCI_INTR_IRQCHIP
|
||||
select TI_SCI_INTA_IRQCHIP
|
||||
select TI_K3_SOCINFO
|
||||
help
|
||||
This enables support for Texas Instruments' K3 multicore SoC
|
||||
|
|
|
|||
|
|
@ -1306,11 +1306,14 @@ zynqmp_dpsub: display@fd4a0000 {
|
|||
"dp_vtc_pixel_clk_in";
|
||||
power-domains = <&zynqmp_firmware PD_DP>;
|
||||
resets = <&zynqmp_reset ZYNQMP_RESET_DP>;
|
||||
dma-names = "vid0", "vid1", "vid2", "gfx0";
|
||||
dma-names = "vid0", "vid1", "vid2", "gfx0",
|
||||
"aud0", "aud1";
|
||||
dmas = <&zynqmp_dpdma ZYNQMP_DPDMA_VIDEO0>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_VIDEO1>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_VIDEO2>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_GRAPHICS>;
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_GRAPHICS>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_AUDIO0>,
|
||||
<&zynqmp_dpdma ZYNQMP_DPDMA_AUDIO1>;
|
||||
|
||||
ports {
|
||||
#address-cells = <1>;
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ syscall-y += unistd_32.h
|
|||
syscall-y += unistd_compat_32.h
|
||||
|
||||
generic-y += early_ioremap.h
|
||||
generic-y += fprobe.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mmzone.h
|
||||
generic-y += qrwlock.h
|
||||
|
|
|
|||
|
|
@ -52,6 +52,8 @@ extern unsigned long ftrace_graph_call;
|
|||
extern void return_to_handler(void);
|
||||
|
||||
unsigned long ftrace_call_adjust(unsigned long addr);
|
||||
unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip);
|
||||
#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
|
||||
#define HAVE_ARCH_FTRACE_REGS
|
||||
|
|
@ -129,6 +131,38 @@ ftrace_override_function_with_return(struct ftrace_regs *fregs)
|
|||
arch_ftrace_regs(fregs)->pc = arch_ftrace_regs(fregs)->lr;
|
||||
}
|
||||
|
||||
static __always_inline unsigned long
|
||||
ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs)
|
||||
{
|
||||
return arch_ftrace_regs(fregs)->fp;
|
||||
}
|
||||
|
||||
static __always_inline unsigned long
|
||||
ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
|
||||
{
|
||||
return arch_ftrace_regs(fregs)->lr;
|
||||
}
|
||||
|
||||
static __always_inline struct pt_regs *
|
||||
ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
|
||||
{
|
||||
struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs);
|
||||
|
||||
memcpy(regs->regs, afregs->regs, sizeof(afregs->regs));
|
||||
regs->sp = afregs->sp;
|
||||
regs->pc = afregs->pc;
|
||||
regs->regs[29] = afregs->fp;
|
||||
regs->regs[30] = afregs->lr;
|
||||
return regs;
|
||||
}
|
||||
|
||||
#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
|
||||
(_regs)->pc = arch_ftrace_regs(fregs)->pc; \
|
||||
(_regs)->regs[29] = arch_ftrace_regs(fregs)->fp; \
|
||||
(_regs)->sp = arch_ftrace_regs(fregs)->sp; \
|
||||
(_regs)->pstate = PSR_MODE_EL1h; \
|
||||
} while (0)
|
||||
|
||||
int ftrace_regs_query_register_offset(const char *name);
|
||||
|
||||
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
|
||||
|
|
@ -186,23 +220,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym,
|
|||
|
||||
#ifndef __ASSEMBLY__
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
struct fgraph_ret_regs {
|
||||
/* x0 - x7 */
|
||||
unsigned long regs[8];
|
||||
|
||||
unsigned long fp;
|
||||
unsigned long __unused;
|
||||
};
|
||||
|
||||
static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->regs[0];
|
||||
}
|
||||
|
||||
static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->fp;
|
||||
}
|
||||
|
||||
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
|
||||
unsigned long frame_pointer);
|
||||
|
|
|
|||
|
|
@ -179,18 +179,6 @@ int main(void)
|
|||
DEFINE(FTRACE_OPS_FUNC, offsetof(struct ftrace_ops, func));
|
||||
#endif
|
||||
BLANK();
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
DEFINE(FGRET_REGS_X0, offsetof(struct fgraph_ret_regs, regs[0]));
|
||||
DEFINE(FGRET_REGS_X1, offsetof(struct fgraph_ret_regs, regs[1]));
|
||||
DEFINE(FGRET_REGS_X2, offsetof(struct fgraph_ret_regs, regs[2]));
|
||||
DEFINE(FGRET_REGS_X3, offsetof(struct fgraph_ret_regs, regs[3]));
|
||||
DEFINE(FGRET_REGS_X4, offsetof(struct fgraph_ret_regs, regs[4]));
|
||||
DEFINE(FGRET_REGS_X5, offsetof(struct fgraph_ret_regs, regs[5]));
|
||||
DEFINE(FGRET_REGS_X6, offsetof(struct fgraph_ret_regs, regs[6]));
|
||||
DEFINE(FGRET_REGS_X7, offsetof(struct fgraph_ret_regs, regs[7]));
|
||||
DEFINE(FGRET_REGS_FP, offsetof(struct fgraph_ret_regs, fp));
|
||||
DEFINE(FGRET_REGS_SIZE, sizeof(struct fgraph_ret_regs));
|
||||
#endif
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
|
||||
DEFINE(FTRACE_OPS_DIRECT_CALL, offsetof(struct ftrace_ops, direct_call));
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -329,24 +329,28 @@ SYM_FUNC_END(ftrace_stub_graph)
|
|||
* @fp is checked against the value passed by ftrace_graph_caller().
|
||||
*/
|
||||
SYM_CODE_START(return_to_handler)
|
||||
/* save return value regs */
|
||||
sub sp, sp, #FGRET_REGS_SIZE
|
||||
stp x0, x1, [sp, #FGRET_REGS_X0]
|
||||
stp x2, x3, [sp, #FGRET_REGS_X2]
|
||||
stp x4, x5, [sp, #FGRET_REGS_X4]
|
||||
stp x6, x7, [sp, #FGRET_REGS_X6]
|
||||
str x29, [sp, #FGRET_REGS_FP] // parent's fp
|
||||
/* Make room for ftrace_regs */
|
||||
sub sp, sp, #FREGS_SIZE
|
||||
|
||||
/* Save return value regs */
|
||||
stp x0, x1, [sp, #FREGS_X0]
|
||||
stp x2, x3, [sp, #FREGS_X2]
|
||||
stp x4, x5, [sp, #FREGS_X4]
|
||||
stp x6, x7, [sp, #FREGS_X6]
|
||||
|
||||
/* Save the callsite's FP */
|
||||
str x29, [sp, #FREGS_FP]
|
||||
|
||||
mov x0, sp
|
||||
bl ftrace_return_to_handler // addr = ftrace_return_to_hander(regs);
|
||||
bl ftrace_return_to_handler // addr = ftrace_return_to_hander(fregs);
|
||||
mov x30, x0 // restore the original return address
|
||||
|
||||
/* restore return value regs */
|
||||
ldp x0, x1, [sp, #FGRET_REGS_X0]
|
||||
ldp x2, x3, [sp, #FGRET_REGS_X2]
|
||||
ldp x4, x5, [sp, #FGRET_REGS_X4]
|
||||
ldp x6, x7, [sp, #FGRET_REGS_X6]
|
||||
add sp, sp, #FGRET_REGS_SIZE
|
||||
/* Restore return value regs */
|
||||
ldp x0, x1, [sp, #FREGS_X0]
|
||||
ldp x2, x3, [sp, #FREGS_X2]
|
||||
ldp x4, x5, [sp, #FREGS_X4]
|
||||
ldp x6, x7, [sp, #FREGS_X6]
|
||||
add sp, sp, #FREGS_SIZE
|
||||
|
||||
ret
|
||||
SYM_CODE_END(return_to_handler)
|
||||
|
|
|
|||
|
|
@ -143,6 +143,69 @@ unsigned long ftrace_call_adjust(unsigned long addr)
|
|||
return addr;
|
||||
}
|
||||
|
||||
/* Convert fentry_ip to the symbol address without kallsyms */
|
||||
unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
|
||||
{
|
||||
u32 insn;
|
||||
|
||||
/*
|
||||
* When using patchable-function-entry without pre-function NOPS, ftrace
|
||||
* entry is the address of the first NOP after the function entry point.
|
||||
*
|
||||
* The compiler has either generated:
|
||||
*
|
||||
* func+00: func: NOP // To be patched to MOV X9, LR
|
||||
* func+04: NOP // To be patched to BL <caller>
|
||||
*
|
||||
* Or:
|
||||
*
|
||||
* func-04: BTI C
|
||||
* func+00: func: NOP // To be patched to MOV X9, LR
|
||||
* func+04: NOP // To be patched to BL <caller>
|
||||
*
|
||||
* The fentry_ip is the address of `BL <caller>` which is at `func + 4`
|
||||
* bytes in either case.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
|
||||
return fentry_ip - AARCH64_INSN_SIZE;
|
||||
|
||||
/*
|
||||
* When using patchable-function-entry with pre-function NOPs, BTI is
|
||||
* a bit different.
|
||||
*
|
||||
* func+00: func: NOP // To be patched to MOV X9, LR
|
||||
* func+04: NOP // To be patched to BL <caller>
|
||||
*
|
||||
* Or:
|
||||
*
|
||||
* func+00: func: BTI C
|
||||
* func+04: NOP // To be patched to MOV X9, LR
|
||||
* func+08: NOP // To be patched to BL <caller>
|
||||
*
|
||||
* The fentry_ip is the address of `BL <caller>` which is at either
|
||||
* `func + 4` or `func + 8` depends on whether there is a BTI.
|
||||
*/
|
||||
|
||||
/* If there is no BTI, the func address should be one instruction before. */
|
||||
if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
|
||||
return fentry_ip - AARCH64_INSN_SIZE;
|
||||
|
||||
/* We want to be extra safe in case entry ip is on the page edge,
|
||||
* but otherwise we need to avoid get_kernel_nofault()'s overhead.
|
||||
*/
|
||||
if ((fentry_ip & ~PAGE_MASK) < AARCH64_INSN_SIZE * 2) {
|
||||
if (get_kernel_nofault(insn, (u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2)))
|
||||
return 0;
|
||||
} else {
|
||||
insn = *(u32 *)(fentry_ip - AARCH64_INSN_SIZE * 2);
|
||||
}
|
||||
|
||||
if (aarch64_insn_is_bti(le32_to_cpu((__le32)insn)))
|
||||
return fentry_ip - AARCH64_INSN_SIZE * 2;
|
||||
|
||||
return fentry_ip - AARCH64_INSN_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace a single instruction, which may be a branch or NOP.
|
||||
* If @validate == true, a replaced instruction is checked against 'old'.
|
||||
|
|
@ -481,7 +544,20 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
|
|||
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct ftrace_regs *fregs)
|
||||
{
|
||||
prepare_ftrace_return(ip, &arch_ftrace_regs(fregs)->lr, arch_ftrace_regs(fregs)->fp);
|
||||
unsigned long return_hooker = (unsigned long)&return_to_handler;
|
||||
unsigned long frame_pointer = arch_ftrace_regs(fregs)->fp;
|
||||
unsigned long *parent = &arch_ftrace_regs(fregs)->lr;
|
||||
unsigned long old;
|
||||
|
||||
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
||||
return;
|
||||
|
||||
old = *parent;
|
||||
|
||||
if (!function_graph_enter_regs(old, ip, frame_pointer,
|
||||
(void *)frame_pointer, fregs)) {
|
||||
*parent = return_hooker;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -207,37 +207,6 @@ void machine_kexec(struct kimage *kimage)
|
|||
BUG(); /* Should never get here. */
|
||||
}
|
||||
|
||||
static void machine_kexec_mask_interrupts(void)
|
||||
{
|
||||
unsigned int i;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_irq_desc(i, desc) {
|
||||
struct irq_chip *chip;
|
||||
int ret;
|
||||
|
||||
chip = irq_desc_get_chip(desc);
|
||||
if (!chip)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* First try to remove the active state. If this
|
||||
* fails, try to EOI the interrupt.
|
||||
*/
|
||||
ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
|
||||
|
||||
if (ret && irqd_irq_inprogress(&desc->irq_data) &&
|
||||
chip->irq_eoi)
|
||||
chip->irq_eoi(&desc->irq_data);
|
||||
|
||||
if (chip->irq_mask)
|
||||
chip->irq_mask(&desc->irq_data);
|
||||
|
||||
if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
|
||||
chip->irq_disable(&desc->irq_data);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* machine_crash_shutdown - shutdown non-crashing cpus and save registers
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ config HEXAGON
|
|||
# select ARCH_HAS_CPU_IDLE_WAIT
|
||||
# select GPIOLIB
|
||||
# select HAVE_CLK
|
||||
# select GENERIC_PENDING_IRQ if SMP
|
||||
select GENERIC_ATOMIC64
|
||||
select HAVE_PERF_EVENTS
|
||||
# GENERIC_ALLOCATOR is used by dma_alloc_coherent()
|
||||
|
|
|
|||
|
|
@ -129,16 +129,18 @@ config LOONGARCH
|
|||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
|
||||
select HAVE_FTRACE_REGS_HAVING_PT_REGS
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_REGS
|
||||
select HAVE_EBPF_JIT
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !ARCH_STRICT_ALIGN
|
||||
select HAVE_EXIT_THREAD
|
||||
select HAVE_GUP_FAST
|
||||
select HAVE_FTRACE_GRAPH_FUNC
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_ARG_ACCESS_API
|
||||
select HAVE_FUNCTION_ERROR_INJECTION
|
||||
select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_FREGS
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_GCC_PLUGINS
|
||||
|
|
|
|||
12
arch/loongarch/include/asm/fprobe.h
Normal file
12
arch/loongarch/include/asm/fprobe.h
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_LOONGARCH_FPROBE_H
|
||||
#define _ASM_LOONGARCH_FPROBE_H
|
||||
|
||||
/*
|
||||
* Explicitly undef ARCH_DEFINE_ENCODE_FPROBE_HEADER, because loongarch does not
|
||||
* have enough number of fixed MSBs of the address of kernel objects for
|
||||
* encoding the size of data in fprobe_header. Use 2-entries encoding instead.
|
||||
*/
|
||||
#undef ARCH_DEFINE_ENCODE_FPROBE_HEADER
|
||||
|
||||
#endif /* _ASM_LOONGARCH_FPROBE_H */
|
||||
|
|
@ -57,6 +57,16 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs, unsigned long ip)
|
|||
instruction_pointer_set(&arch_ftrace_regs(fregs)->regs, ip);
|
||||
}
|
||||
|
||||
#undef ftrace_regs_get_frame_pointer
|
||||
#define ftrace_regs_get_frame_pointer(fregs) \
|
||||
(arch_ftrace_regs(fregs)->regs.regs[22])
|
||||
|
||||
static __always_inline unsigned long
|
||||
ftrace_regs_get_return_address(struct ftrace_regs *fregs)
|
||||
{
|
||||
return *(unsigned long *)(arch_ftrace_regs(fregs)->regs.regs[1]);
|
||||
}
|
||||
|
||||
#define ftrace_graph_func ftrace_graph_func
|
||||
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct ftrace_regs *fregs);
|
||||
|
|
@ -78,26 +88,4 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
|
|||
|
||||
#endif /* CONFIG_FUNCTION_TRACER */
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
struct fgraph_ret_regs {
|
||||
/* a0 - a1 */
|
||||
unsigned long regs[2];
|
||||
|
||||
unsigned long fp;
|
||||
unsigned long __unused;
|
||||
};
|
||||
|
||||
static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->regs[0];
|
||||
}
|
||||
|
||||
static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->fp;
|
||||
}
|
||||
#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_LOONGARCH_FTRACE_H */
|
||||
|
|
|
|||
|
|
@ -280,18 +280,6 @@ static void __used output_pbe_defines(void)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
static void __used output_fgraph_ret_regs_defines(void)
|
||||
{
|
||||
COMMENT("LoongArch fgraph_ret_regs offsets.");
|
||||
OFFSET(FGRET_REGS_A0, fgraph_ret_regs, regs[0]);
|
||||
OFFSET(FGRET_REGS_A1, fgraph_ret_regs, regs[1]);
|
||||
OFFSET(FGRET_REGS_FP, fgraph_ret_regs, fp);
|
||||
DEFINE(FGRET_REGS_SIZE, sizeof(struct fgraph_ret_regs));
|
||||
BLANK();
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __used output_kvm_defines(void)
|
||||
{
|
||||
COMMENT("KVM/LoongArch Specific offsets.");
|
||||
|
|
|
|||
|
|
@ -243,8 +243,16 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
|||
{
|
||||
struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
|
||||
unsigned long *parent = (unsigned long *)®s->regs[1];
|
||||
unsigned long return_hooker = (unsigned long)&return_to_handler;
|
||||
unsigned long old;
|
||||
|
||||
prepare_ftrace_return(ip, (unsigned long *)parent);
|
||||
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
||||
return;
|
||||
|
||||
old = *parent;
|
||||
|
||||
if (!function_graph_enter_regs(old, ip, 0, parent, fregs))
|
||||
*parent = return_hooker;
|
||||
}
|
||||
#else
|
||||
static int ftrace_modify_graph_caller(bool enable)
|
||||
|
|
|
|||
|
|
@ -79,10 +79,11 @@ SYM_FUNC_START(ftrace_graph_caller)
|
|||
SYM_FUNC_END(ftrace_graph_caller)
|
||||
|
||||
SYM_FUNC_START(return_to_handler)
|
||||
PTR_ADDI sp, sp, -FGRET_REGS_SIZE
|
||||
PTR_S a0, sp, FGRET_REGS_A0
|
||||
PTR_S a1, sp, FGRET_REGS_A1
|
||||
PTR_S zero, sp, FGRET_REGS_FP
|
||||
/* Save return value regs */
|
||||
PTR_ADDI sp, sp, -PT_SIZE
|
||||
PTR_S a0, sp, PT_R4
|
||||
PTR_S a1, sp, PT_R5
|
||||
PTR_S zero, sp, PT_R22
|
||||
|
||||
move a0, sp
|
||||
bl ftrace_return_to_handler
|
||||
|
|
@ -90,9 +91,11 @@ SYM_FUNC_START(return_to_handler)
|
|||
/* Restore the real parent address: a0 -> ra */
|
||||
move ra, a0
|
||||
|
||||
PTR_L a0, sp, FGRET_REGS_A0
|
||||
PTR_L a1, sp, FGRET_REGS_A1
|
||||
PTR_ADDI sp, sp, FGRET_REGS_SIZE
|
||||
/* Restore return value regs */
|
||||
PTR_L a0, sp, PT_R4
|
||||
PTR_L a1, sp, PT_R5
|
||||
PTR_ADDI sp, sp, PT_SIZE
|
||||
|
||||
jr ra
|
||||
SYM_FUNC_END(return_to_handler)
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
|
|
|||
|
|
@ -140,19 +140,19 @@ SYM_CODE_END(ftrace_graph_caller)
|
|||
SYM_CODE_START(return_to_handler)
|
||||
UNWIND_HINT_UNDEFINED
|
||||
/* Save return value regs */
|
||||
PTR_ADDI sp, sp, -FGRET_REGS_SIZE
|
||||
PTR_S a0, sp, FGRET_REGS_A0
|
||||
PTR_S a1, sp, FGRET_REGS_A1
|
||||
PTR_S zero, sp, FGRET_REGS_FP
|
||||
PTR_ADDI sp, sp, -PT_SIZE
|
||||
PTR_S a0, sp, PT_R4
|
||||
PTR_S a1, sp, PT_R5
|
||||
PTR_S zero, sp, PT_R22
|
||||
|
||||
move a0, sp
|
||||
bl ftrace_return_to_handler
|
||||
move ra, a0
|
||||
|
||||
/* Restore return value regs */
|
||||
PTR_L a0, sp, FGRET_REGS_A0
|
||||
PTR_L a1, sp, FGRET_REGS_A1
|
||||
PTR_ADDI sp, sp, FGRET_REGS_SIZE
|
||||
PTR_L a0, sp, PT_R4
|
||||
PTR_L a1, sp, PT_R5
|
||||
PTR_ADDI sp, sp, PT_SIZE
|
||||
|
||||
jr ra
|
||||
SYM_CODE_END(return_to_handler)
|
||||
|
|
|
|||
|
|
@ -241,6 +241,7 @@ config PPC
|
|||
select HAVE_EBPF_JIT
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
select HAVE_GUP_FAST
|
||||
select HAVE_FTRACE_GRAPH_FUNC
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_ARG_ACCESS_API
|
||||
select HAVE_FUNCTION_DESCRIPTORS if PPC64_ELF_ABI_V1
|
||||
|
|
|
|||
|
|
@ -43,6 +43,13 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *
|
|||
return arch_ftrace_regs(fregs)->regs.msr ? &arch_ftrace_regs(fregs)->regs : NULL;
|
||||
}
|
||||
|
||||
#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
|
||||
(_regs)->result = 0; \
|
||||
(_regs)->nip = arch_ftrace_regs(fregs)->regs.nip; \
|
||||
(_regs)->gpr[1] = arch_ftrace_regs(fregs)->regs.gpr[1]; \
|
||||
asm volatile("mfmsr %0" : "=r" ((_regs)->msr)); \
|
||||
} while (0)
|
||||
|
||||
static __always_inline void
|
||||
ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
|
||||
unsigned long ip)
|
||||
|
|
@ -50,6 +57,12 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
|
|||
regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip);
|
||||
}
|
||||
|
||||
static __always_inline unsigned long
|
||||
ftrace_regs_get_return_address(struct ftrace_regs *fregs)
|
||||
{
|
||||
return arch_ftrace_regs(fregs)->regs.link;
|
||||
}
|
||||
|
||||
struct ftrace_ops;
|
||||
|
||||
#define ftrace_graph_func ftrace_graph_func
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ struct pt_regs;
|
|||
extern void kexec_smp_wait(void); /* get and clear naca physid, wait for
|
||||
master to copy new code to 0 */
|
||||
extern void default_machine_kexec(struct kimage *image);
|
||||
extern void machine_kexec_mask_interrupts(void);
|
||||
|
||||
void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer,
|
||||
unsigned long start_address) __noreturn;
|
||||
|
|
|
|||
|
|
@ -658,7 +658,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
|||
struct ftrace_ops *op, struct ftrace_regs *fregs)
|
||||
{
|
||||
unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1];
|
||||
int bit;
|
||||
|
||||
if (unlikely(ftrace_graph_is_dead()))
|
||||
goto out;
|
||||
|
|
@ -666,14 +665,9 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
|||
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
||||
goto out;
|
||||
|
||||
bit = ftrace_test_recursion_trylock(ip, parent_ip);
|
||||
if (bit < 0)
|
||||
goto out;
|
||||
|
||||
if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
|
||||
if (!function_graph_enter_regs(parent_ip, ip, 0, (unsigned long *)sp, fregs))
|
||||
parent_ip = ppc_function_entry(return_to_handler);
|
||||
|
||||
ftrace_test_recursion_unlock(bit);
|
||||
out:
|
||||
arch_ftrace_regs(fregs)->regs.link = parent_ip;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -787,10 +787,10 @@ int ftrace_disable_ftrace_graph_caller(void)
|
|||
* in current thread info. Return the address we want to divert to.
|
||||
*/
|
||||
static unsigned long
|
||||
__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp)
|
||||
__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp,
|
||||
struct ftrace_regs *fregs)
|
||||
{
|
||||
unsigned long return_hooker;
|
||||
int bit;
|
||||
|
||||
if (unlikely(ftrace_graph_is_dead()))
|
||||
goto out;
|
||||
|
|
@ -798,16 +798,11 @@ __prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp
|
|||
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
||||
goto out;
|
||||
|
||||
bit = ftrace_test_recursion_trylock(ip, parent);
|
||||
if (bit < 0)
|
||||
goto out;
|
||||
|
||||
return_hooker = ppc_function_entry(return_to_handler);
|
||||
|
||||
if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
|
||||
if (!function_graph_enter_regs(parent, ip, 0, (unsigned long *)sp, fregs))
|
||||
parent = return_hooker;
|
||||
|
||||
ftrace_test_recursion_unlock(bit);
|
||||
out:
|
||||
return parent;
|
||||
}
|
||||
|
|
@ -816,13 +811,14 @@ __prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp
|
|||
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct ftrace_regs *fregs)
|
||||
{
|
||||
arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip, arch_ftrace_regs(fregs)->regs.gpr[1]);
|
||||
arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip,
|
||||
arch_ftrace_regs(fregs)->regs.gpr[1], fregs);
|
||||
}
|
||||
#else
|
||||
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
|
||||
unsigned long sp)
|
||||
{
|
||||
return __prepare_ftrace_return(parent, ip, sp);
|
||||
return __prepare_ftrace_return(parent, ip, sp, NULL);
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
|
|
|||
|
|
@ -22,28 +22,6 @@
|
|||
#include <asm/setup.h>
|
||||
#include <asm/firmware.h>
|
||||
|
||||
void machine_kexec_mask_interrupts(void) {
|
||||
unsigned int i;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_irq_desc(i, desc) {
|
||||
struct irq_chip *chip;
|
||||
|
||||
chip = irq_desc_get_chip(desc);
|
||||
if (!chip)
|
||||
continue;
|
||||
|
||||
if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
|
||||
chip->irq_eoi(&desc->irq_data);
|
||||
|
||||
if (chip->irq_mask)
|
||||
chip->irq_mask(&desc->irq_data);
|
||||
|
||||
if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
|
||||
chip->irq_disable(&desc->irq_data);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CRASH_DUMP
|
||||
void machine_crash_shutdown(struct pt_regs *regs)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
* Copyright (C) 2005 IBM Corporation.
|
||||
*/
|
||||
|
||||
#include <linux/irq.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/string.h>
|
||||
|
|
|
|||
|
|
@ -146,9 +146,10 @@ config RISCV
|
|||
select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE)
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_FTRACE_GRAPH_FUNC
|
||||
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_FREGS
|
||||
select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !PREEMPTION
|
||||
select HAVE_EBPF_JIT if MMU
|
||||
select HAVE_GUP_FAST if MMU
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ syscall-y += syscall_table_64.h
|
|||
|
||||
generic-y += early_ioremap.h
|
||||
generic-y += flat.h
|
||||
generic-y += fprobe.h
|
||||
generic-y += kvm_para.h
|
||||
generic-y += mmzone.h
|
||||
generic-y += mcs_spinlock.h
|
||||
|
|
|
|||
|
|
@ -168,6 +168,11 @@ static __always_inline unsigned long ftrace_regs_get_stack_pointer(const struct
|
|||
return arch_ftrace_regs(fregs)->sp;
|
||||
}
|
||||
|
||||
static __always_inline unsigned long ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs)
|
||||
{
|
||||
return arch_ftrace_regs(fregs)->s0;
|
||||
}
|
||||
|
||||
static __always_inline unsigned long ftrace_regs_get_argument(struct ftrace_regs *fregs,
|
||||
unsigned int n)
|
||||
{
|
||||
|
|
@ -181,6 +186,11 @@ static __always_inline unsigned long ftrace_regs_get_return_value(const struct f
|
|||
return arch_ftrace_regs(fregs)->a0;
|
||||
}
|
||||
|
||||
static __always_inline unsigned long ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
|
||||
{
|
||||
return arch_ftrace_regs(fregs)->ra;
|
||||
}
|
||||
|
||||
static __always_inline void ftrace_regs_set_return_value(struct ftrace_regs *fregs,
|
||||
unsigned long ret)
|
||||
{
|
||||
|
|
@ -192,6 +202,20 @@ static __always_inline void ftrace_override_function_with_return(struct ftrace_r
|
|||
arch_ftrace_regs(fregs)->epc = arch_ftrace_regs(fregs)->ra;
|
||||
}
|
||||
|
||||
static __always_inline struct pt_regs *
|
||||
ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
|
||||
{
|
||||
struct __arch_ftrace_regs *afregs = arch_ftrace_regs(fregs);
|
||||
|
||||
memcpy(®s->a0, afregs->args, sizeof(afregs->args));
|
||||
regs->epc = afregs->epc;
|
||||
regs->ra = afregs->ra;
|
||||
regs->sp = afregs->sp;
|
||||
regs->s0 = afregs->s0;
|
||||
regs->t1 = afregs->t1;
|
||||
return regs;
|
||||
}
|
||||
|
||||
int ftrace_regs_query_register_offset(const char *name);
|
||||
|
||||
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
||||
|
|
@ -208,25 +232,4 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsi
|
|||
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
struct fgraph_ret_regs {
|
||||
unsigned long a1;
|
||||
unsigned long a0;
|
||||
unsigned long s0;
|
||||
unsigned long ra;
|
||||
};
|
||||
|
||||
static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->a0;
|
||||
}
|
||||
|
||||
static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->s0;
|
||||
}
|
||||
#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_RISCV_FTRACE_H */
|
||||
|
|
|
|||
|
|
@ -214,7 +214,22 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
|
|||
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct ftrace_regs *fregs)
|
||||
{
|
||||
prepare_ftrace_return(&arch_ftrace_regs(fregs)->ra, ip, arch_ftrace_regs(fregs)->s0);
|
||||
unsigned long return_hooker = (unsigned long)&return_to_handler;
|
||||
unsigned long frame_pointer = arch_ftrace_regs(fregs)->s0;
|
||||
unsigned long *parent = &arch_ftrace_regs(fregs)->ra;
|
||||
unsigned long old;
|
||||
|
||||
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We don't suffer access faults, so no extra fault-recovery assembly
|
||||
* is needed here.
|
||||
*/
|
||||
old = *parent;
|
||||
|
||||
if (!function_graph_enter_regs(old, ip, frame_pointer, parent, fregs))
|
||||
*parent = return_hooker;
|
||||
}
|
||||
#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
|
||||
extern void ftrace_graph_call(void);
|
||||
|
|
|
|||
|
|
@ -114,29 +114,6 @@ void machine_shutdown(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
static void machine_kexec_mask_interrupts(void)
|
||||
{
|
||||
unsigned int i;
|
||||
struct irq_desc *desc;
|
||||
|
||||
for_each_irq_desc(i, desc) {
|
||||
struct irq_chip *chip;
|
||||
|
||||
chip = irq_desc_get_chip(desc);
|
||||
if (!chip)
|
||||
continue;
|
||||
|
||||
if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
|
||||
chip->irq_eoi(&desc->irq_data);
|
||||
|
||||
if (chip->irq_mask)
|
||||
chip->irq_mask(&desc->irq_data);
|
||||
|
||||
if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
|
||||
chip->irq_disable(&desc->irq_data);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* machine_crash_shutdown - Prepare to kexec after a kernel crash
|
||||
*
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@
|
|||
#include <asm/asm-offsets.h>
|
||||
#include <asm/ftrace.h>
|
||||
|
||||
#define ABI_SIZE_ON_STACK 80
|
||||
|
||||
.text
|
||||
|
||||
.macro SAVE_ABI_STATE
|
||||
|
|
@ -26,12 +28,12 @@
|
|||
* register if a0 was not saved.
|
||||
*/
|
||||
.macro SAVE_RET_ABI_STATE
|
||||
addi sp, sp, -4*SZREG
|
||||
REG_S s0, 2*SZREG(sp)
|
||||
REG_S ra, 3*SZREG(sp)
|
||||
REG_S a0, 1*SZREG(sp)
|
||||
REG_S a1, 0*SZREG(sp)
|
||||
addi s0, sp, 4*SZREG
|
||||
addi sp, sp, -ABI_SIZE_ON_STACK
|
||||
REG_S ra, 1*SZREG(sp)
|
||||
REG_S s0, 8*SZREG(sp)
|
||||
REG_S a0, 10*SZREG(sp)
|
||||
REG_S a1, 11*SZREG(sp)
|
||||
addi s0, sp, ABI_SIZE_ON_STACK
|
||||
.endm
|
||||
|
||||
.macro RESTORE_ABI_STATE
|
||||
|
|
@ -41,11 +43,11 @@
|
|||
.endm
|
||||
|
||||
.macro RESTORE_RET_ABI_STATE
|
||||
REG_L ra, 3*SZREG(sp)
|
||||
REG_L s0, 2*SZREG(sp)
|
||||
REG_L a0, 1*SZREG(sp)
|
||||
REG_L a1, 0*SZREG(sp)
|
||||
addi sp, sp, 4*SZREG
|
||||
REG_L ra, 1*SZREG(sp)
|
||||
REG_L s0, 8*SZREG(sp)
|
||||
REG_L a0, 10*SZREG(sp)
|
||||
REG_L a1, 11*SZREG(sp)
|
||||
addi sp, sp, ABI_SIZE_ON_STACK
|
||||
.endm
|
||||
|
||||
SYM_TYPED_FUNC_START(ftrace_stub)
|
||||
|
|
|
|||
|
|
@ -183,16 +183,18 @@ config S390
|
|||
select HAVE_DMA_CONTIGUOUS
|
||||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
|
||||
select HAVE_FTRACE_REGS_HAVING_PT_REGS
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_REGS
|
||||
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
|
||||
select HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
select HAVE_GUP_FAST
|
||||
select HAVE_FENTRY
|
||||
select HAVE_FTRACE_GRAPH_FUNC
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_ARG_ACCESS_API
|
||||
select HAVE_FUNCTION_ERROR_INJECTION
|
||||
select HAVE_FUNCTION_GRAPH_RETVAL
|
||||
select HAVE_FUNCTION_GRAPH_FREGS
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_GCC_PLUGINS
|
||||
|
|
|
|||
10
arch/s390/include/asm/fprobe.h
Normal file
10
arch/s390/include/asm/fprobe.h
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _ASM_S390_FPROBE_H
|
||||
#define _ASM_S390_FPROBE_H
|
||||
|
||||
#include <asm-generic/fprobe.h>
|
||||
|
||||
#undef FPROBE_HEADER_MSB_PATTERN
|
||||
#define FPROBE_HEADER_MSB_PATTERN 0
|
||||
|
||||
#endif /* _ASM_S390_FPROBE_H */
|
||||
|
|
@ -39,6 +39,7 @@ struct dyn_arch_ftrace { };
|
|||
|
||||
struct module;
|
||||
struct dyn_ftrace;
|
||||
struct ftrace_ops;
|
||||
|
||||
bool ftrace_need_init_nop(void);
|
||||
#define ftrace_need_init_nop ftrace_need_init_nop
|
||||
|
|
@ -62,23 +63,6 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *
|
|||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
struct fgraph_ret_regs {
|
||||
unsigned long gpr2;
|
||||
unsigned long fp;
|
||||
};
|
||||
|
||||
static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->gpr2;
|
||||
}
|
||||
|
||||
static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->fp;
|
||||
}
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
||||
static __always_inline void
|
||||
ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
|
||||
unsigned long ip)
|
||||
|
|
@ -86,6 +70,25 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
|
|||
arch_ftrace_regs(fregs)->regs.psw.addr = ip;
|
||||
}
|
||||
|
||||
#undef ftrace_regs_get_frame_pointer
|
||||
static __always_inline unsigned long
|
||||
ftrace_regs_get_frame_pointer(struct ftrace_regs *fregs)
|
||||
{
|
||||
return ftrace_regs_get_stack_pointer(fregs);
|
||||
}
|
||||
|
||||
static __always_inline unsigned long
|
||||
ftrace_regs_get_return_address(const struct ftrace_regs *fregs)
|
||||
{
|
||||
return arch_ftrace_regs(fregs)->regs.gprs[14];
|
||||
}
|
||||
|
||||
#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
|
||||
(_regs)->psw.mask = 0; \
|
||||
(_regs)->psw.addr = arch_ftrace_regs(fregs)->regs.psw.addr; \
|
||||
(_regs)->gprs[15] = arch_ftrace_regs(fregs)->regs.gprs[15]; \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
|
||||
/*
|
||||
* When an ftrace registered caller is tracing a function that is
|
||||
|
|
@ -126,6 +129,10 @@ static inline bool arch_syscall_match_sym_name(const char *sym,
|
|||
return !strcmp(sym + 7, name) || !strcmp(sym + 8, name);
|
||||
}
|
||||
|
||||
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct ftrace_regs *fregs);
|
||||
#define ftrace_graph_func ftrace_graph_func
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_FUNCTION_TRACER
|
||||
|
|
|
|||
|
|
@ -175,12 +175,6 @@ int main(void)
|
|||
DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
|
||||
DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
|
||||
DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
/* function graph return value tracing */
|
||||
OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2);
|
||||
OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp);
|
||||
DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs));
|
||||
#endif
|
||||
OFFSET(__FTRACE_REGS_PT_REGS, __arch_ftrace_regs, regs);
|
||||
DEFINE(__FTRACE_REGS_SIZE, sizeof(struct __arch_ftrace_regs));
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ void do_restart(void *arg);
|
|||
void __init startup_init(void);
|
||||
void die(struct pt_regs *regs, const char *str);
|
||||
int setup_profiling_timer(unsigned int multiplier);
|
||||
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
|
||||
|
||||
struct s390_mmap_arg_struct;
|
||||
struct fadvise64_64_args;
|
||||
|
|
|
|||
|
|
@ -261,43 +261,23 @@ void ftrace_arch_code_modify_post_process(void)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
/*
|
||||
* Hook the return address and push it in the stack of return addresses
|
||||
* in current thread info.
|
||||
*/
|
||||
unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
|
||||
unsigned long ip)
|
||||
|
||||
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct ftrace_regs *fregs)
|
||||
{
|
||||
unsigned long *parent = &arch_ftrace_regs(fregs)->regs.gprs[14];
|
||||
int bit;
|
||||
|
||||
if (unlikely(ftrace_graph_is_dead()))
|
||||
goto out;
|
||||
return;
|
||||
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
||||
goto out;
|
||||
ip -= MCOUNT_INSN_SIZE;
|
||||
if (!function_graph_enter(ra, ip, 0, (void *) sp))
|
||||
ra = (unsigned long) return_to_handler;
|
||||
out:
|
||||
return ra;
|
||||
}
|
||||
NOKPROBE_SYMBOL(prepare_ftrace_return);
|
||||
|
||||
/*
|
||||
* Patch the kernel code at ftrace_graph_caller location. The instruction
|
||||
* there is branch relative on condition. To enable the ftrace graph code
|
||||
* block, we simply patch the mask field of the instruction to zero and
|
||||
* turn the instruction into a nop.
|
||||
* To disable the ftrace graph code the mask field will be patched to
|
||||
* all ones, which turns the instruction into an unconditional branch.
|
||||
*/
|
||||
int ftrace_enable_ftrace_graph_caller(void)
|
||||
{
|
||||
/* Expect brc 0xf,... */
|
||||
return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
|
||||
}
|
||||
|
||||
int ftrace_disable_ftrace_graph_caller(void)
|
||||
{
|
||||
/* Expect brc 0x0,... */
|
||||
return ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
|
||||
return;
|
||||
bit = ftrace_test_recursion_trylock(ip, *parent);
|
||||
if (bit < 0)
|
||||
return;
|
||||
if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs))
|
||||
*parent = (unsigned long)&return_to_handler;
|
||||
ftrace_test_recursion_unlock(bit);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
|
|
|||
|
|
@ -104,17 +104,6 @@ SYM_CODE_START(ftrace_common)
|
|||
lgr %r3,%r14
|
||||
la %r5,STACK_FREGS(%r15)
|
||||
BASR_EX %r14,%r1
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
# The j instruction gets runtime patched to a nop instruction.
|
||||
# See ftrace_enable_ftrace_graph_caller.
|
||||
SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
|
||||
j .Lftrace_graph_caller_end
|
||||
lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
|
||||
lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15)
|
||||
brasl %r14,prepare_ftrace_return
|
||||
stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
|
||||
.Lftrace_graph_caller_end:
|
||||
#endif
|
||||
lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
|
||||
#ifdef MARCH_HAS_Z196_FEATURES
|
||||
ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
|
||||
|
|
@ -134,14 +123,14 @@ SYM_CODE_END(ftrace_common)
|
|||
SYM_FUNC_START(return_to_handler)
|
||||
stmg %r2,%r5,32(%r15)
|
||||
lgr %r1,%r15
|
||||
aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE)
|
||||
# allocate ftrace_regs and stack frame for ftrace_return_to_handler
|
||||
aghi %r15,-STACK_FRAME_SIZE_FREGS
|
||||
stg %r1,__SF_BACKCHAIN(%r15)
|
||||
la %r3,STACK_FRAME_OVERHEAD(%r15)
|
||||
stg %r1,__FGRAPH_RET_FP(%r3)
|
||||
stg %r2,__FGRAPH_RET_GPR2(%r3)
|
||||
lgr %r2,%r3
|
||||
stg %r2,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
|
||||
stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
|
||||
la %r2,STACK_FRAME_OVERHEAD(%r15)
|
||||
brasl %r14,ftrace_return_to_handler
|
||||
aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE
|
||||
aghi %r15,STACK_FRAME_SIZE_FREGS
|
||||
lgr %r14,%r2
|
||||
lmg %r2,%r5,32(%r15)
|
||||
BR_EX %r14
|
||||
|
|
|
|||
|
|
@ -224,6 +224,7 @@ config X86
|
|||
select HAVE_DYNAMIC_FTRACE
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_REGS
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_ARGS if X86_64
|
||||
select HAVE_FTRACE_REGS_HAVING_PT_REGS if X86_64
|
||||
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
|
||||
select HAVE_SAMPLE_FTRACE_DIRECT if X86_64
|
||||
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if X86_64
|
||||
|
|
@ -233,8 +234,9 @@ config X86
|
|||
select HAVE_EXIT_THREAD
|
||||
select HAVE_GUP_FAST
|
||||
select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE
|
||||
select HAVE_FTRACE_GRAPH_FUNC if HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_FREGS if HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_TRACER if X86_32 || (X86_64 && DYNAMIC_FTRACE)
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_GCC_PLUGINS
|
||||
|
|
|
|||
|
|
@ -304,7 +304,7 @@ static struct irq_chip hv_pci_msi_controller = {
|
|||
.irq_retrigger = irq_chip_retrigger_hierarchy,
|
||||
.irq_compose_msi_msg = hv_irq_compose_msi_msg,
|
||||
.irq_set_affinity = msi_domain_set_affinity,
|
||||
.flags = IRQCHIP_SKIP_SET_WAKE,
|
||||
.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED,
|
||||
};
|
||||
|
||||
static struct msi_domain_ops pci_msi_domain_ops = {
|
||||
|
|
|
|||
|
|
@ -10,5 +10,6 @@ generated-y += unistd_64_x32.h
|
|||
generated-y += xen-hypercalls.h
|
||||
|
||||
generic-y += early_ioremap.h
|
||||
generic-y += fprobe.h
|
||||
generic-y += mcs_spinlock.h
|
||||
generic-y += mmzone.h
|
||||
|
|
|
|||
|
|
@ -34,6 +34,27 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
|
|||
return addr;
|
||||
}
|
||||
|
||||
static inline unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
|
||||
{
|
||||
#ifdef CONFIG_X86_KERNEL_IBT
|
||||
u32 instr;
|
||||
|
||||
/* We want to be extra safe in case entry ip is on the page edge,
|
||||
* but otherwise we need to avoid get_kernel_nofault()'s overhead.
|
||||
*/
|
||||
if ((fentry_ip & ~PAGE_MASK) < ENDBR_INSN_SIZE) {
|
||||
if (get_kernel_nofault(instr, (u32 *)(fentry_ip - ENDBR_INSN_SIZE)))
|
||||
return fentry_ip;
|
||||
} else {
|
||||
instr = *(u32 *)(fentry_ip - ENDBR_INSN_SIZE);
|
||||
}
|
||||
if (is_endbr(instr))
|
||||
fentry_ip -= ENDBR_INSN_SIZE;
|
||||
#endif
|
||||
return fentry_ip;
|
||||
}
|
||||
#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
|
||||
|
||||
#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
|
||||
|
||||
#include <linux/ftrace_regs.h>
|
||||
|
|
@ -47,10 +68,23 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
|
|||
return &arch_ftrace_regs(fregs)->regs;
|
||||
}
|
||||
|
||||
#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
|
||||
(_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \
|
||||
(_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \
|
||||
(_regs)->cs = __KERNEL_CS; \
|
||||
(_regs)->flags = 0; \
|
||||
} while (0)
|
||||
|
||||
#define ftrace_regs_set_instruction_pointer(fregs, _ip) \
|
||||
do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0)
|
||||
|
||||
|
||||
static __always_inline unsigned long
|
||||
ftrace_regs_get_return_address(struct ftrace_regs *fregs)
|
||||
{
|
||||
return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs);
|
||||
}
|
||||
|
||||
struct ftrace_ops;
|
||||
#define ftrace_graph_func ftrace_graph_func
|
||||
void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
||||
|
|
@ -134,24 +168,4 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
|
|||
#endif /* !COMPILE_OFFSETS */
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
struct fgraph_ret_regs {
|
||||
unsigned long ax;
|
||||
unsigned long dx;
|
||||
unsigned long bp;
|
||||
};
|
||||
|
||||
static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->ax;
|
||||
}
|
||||
|
||||
static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
|
||||
{
|
||||
return ret_regs->bp;
|
||||
}
|
||||
#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_FTRACE_H */
|
||||
|
|
|
|||
|
|
@ -1861,7 +1861,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
|
|||
.irq_set_affinity = ioapic_set_affinity,
|
||||
.irq_retrigger = irq_chip_retrigger_hierarchy,
|
||||
.irq_get_irqchip_state = ioapic_irq_get_chip_state,
|
||||
.flags = IRQCHIP_SKIP_SET_WAKE |
|
||||
.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED |
|
||||
IRQCHIP_AFFINITY_PRE_STARTUP,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -214,6 +214,7 @@ static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
|
|||
if (WARN_ON_ONCE(domain != real_parent))
|
||||
return false;
|
||||
info->chip->irq_set_affinity = msi_set_affinity;
|
||||
info->chip->flags |= IRQCHIP_MOVE_DEFERRED;
|
||||
break;
|
||||
case DOMAIN_BUS_DMAR:
|
||||
case DOMAIN_BUS_AMDVI:
|
||||
|
|
@ -315,7 +316,7 @@ static struct irq_chip dmar_msi_controller = {
|
|||
.irq_retrigger = irq_chip_retrigger_hierarchy,
|
||||
.irq_compose_msi_msg = dmar_msi_compose_msg,
|
||||
.irq_write_msi_msg = dmar_msi_write_msg,
|
||||
.flags = IRQCHIP_SKIP_SET_WAKE |
|
||||
.flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED |
|
||||
IRQCHIP_AFFINITY_PRE_STARTUP,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -607,16 +607,8 @@ int ftrace_disable_ftrace_graph_caller(void)
|
|||
}
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE && !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
|
||||
|
||||
/*
|
||||
* Hook the return address and push it in the stack of return addrs
|
||||
* in current thread info.
|
||||
*/
|
||||
void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
|
||||
unsigned long frame_pointer)
|
||||
static inline bool skip_ftrace_return(void)
|
||||
{
|
||||
unsigned long return_hooker = (unsigned long)&return_to_handler;
|
||||
int bit;
|
||||
|
||||
/*
|
||||
* When resuming from suspend-to-ram, this function can be indirectly
|
||||
* called from early CPU startup code while the CPU is in real mode,
|
||||
|
|
@ -626,23 +618,31 @@ void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
|
|||
* This check isn't as accurate as virt_addr_valid(), but it should be
|
||||
* good enough for this purpose, and it's fast.
|
||||
*/
|
||||
if (unlikely((long)__builtin_frame_address(0) >= 0))
|
||||
return;
|
||||
if ((long)__builtin_frame_address(0) >= 0)
|
||||
return true;
|
||||
|
||||
if (unlikely(ftrace_graph_is_dead()))
|
||||
return;
|
||||
if (ftrace_graph_is_dead())
|
||||
return true;
|
||||
|
||||
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
||||
return;
|
||||
if (atomic_read(¤t->tracing_graph_pause))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bit = ftrace_test_recursion_trylock(ip, *parent);
|
||||
if (bit < 0)
|
||||
/*
|
||||
* Hook the return address and push it in the stack of return addrs
|
||||
* in current thread info.
|
||||
*/
|
||||
void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
|
||||
unsigned long frame_pointer)
|
||||
{
|
||||
unsigned long return_hooker = (unsigned long)&return_to_handler;
|
||||
|
||||
if (unlikely(skip_ftrace_return()))
|
||||
return;
|
||||
|
||||
if (!function_graph_enter(*parent, ip, frame_pointer, parent))
|
||||
*parent = return_hooker;
|
||||
|
||||
ftrace_test_recursion_unlock(bit);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
|
||||
|
|
@ -651,8 +651,15 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
|
|||
{
|
||||
struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
|
||||
unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
|
||||
unsigned long return_hooker = (unsigned long)&return_to_handler;
|
||||
unsigned long *parent = (unsigned long *)stack;
|
||||
|
||||
prepare_ftrace_return(ip, (unsigned long *)stack, 0);
|
||||
if (unlikely(skip_ftrace_return()))
|
||||
return;
|
||||
|
||||
|
||||
if (!function_graph_enter_regs(*parent, ip, 0, parent, fregs))
|
||||
*parent = return_hooker;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -187,14 +187,15 @@ SYM_CODE_END(ftrace_graph_caller)
|
|||
|
||||
.globl return_to_handler
|
||||
return_to_handler:
|
||||
pushl $0
|
||||
pushl %edx
|
||||
pushl %eax
|
||||
subl $(PTREGS_SIZE), %esp
|
||||
movl $0, PT_EBP(%esp)
|
||||
movl %edx, PT_EDX(%esp)
|
||||
movl %eax, PT_EAX(%esp)
|
||||
movl %esp, %eax
|
||||
call ftrace_return_to_handler
|
||||
movl %eax, %ecx
|
||||
popl %eax
|
||||
popl %edx
|
||||
addl $4, %esp # skip ebp
|
||||
movl PT_EAX(%esp), %eax
|
||||
movl PT_EDX(%esp), %edx
|
||||
addl $(PTREGS_SIZE), %esp
|
||||
JMP_NOSPEC ecx
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -348,21 +348,22 @@ STACK_FRAME_NON_STANDARD_FP(__fentry__)
|
|||
SYM_CODE_START(return_to_handler)
|
||||
UNWIND_HINT_UNDEFINED
|
||||
ANNOTATE_NOENDBR
|
||||
subq $24, %rsp
|
||||
|
||||
/* Save the return values */
|
||||
movq %rax, (%rsp)
|
||||
movq %rdx, 8(%rsp)
|
||||
movq %rbp, 16(%rsp)
|
||||
/* Save ftrace_regs for function exit context */
|
||||
subq $(FRAME_SIZE), %rsp
|
||||
|
||||
movq %rax, RAX(%rsp)
|
||||
movq %rdx, RDX(%rsp)
|
||||
movq %rbp, RBP(%rsp)
|
||||
movq %rsp, %rdi
|
||||
|
||||
call ftrace_return_to_handler
|
||||
|
||||
movq %rax, %rdi
|
||||
movq 8(%rsp), %rdx
|
||||
movq (%rsp), %rax
|
||||
movq RDX(%rsp), %rdx
|
||||
movq RAX(%rsp), %rax
|
||||
|
||||
addq $24, %rsp
|
||||
addq $(FRAME_SIZE), %rsp
|
||||
/*
|
||||
* Jump back to the old return address. This cannot be JMP_NOSPEC rdi
|
||||
* since IBT would demand that contain ENDBR, which simply isn't so for
|
||||
|
|
|
|||
|
|
@ -517,22 +517,14 @@ static int hpet_msi_init(struct irq_domain *domain,
|
|||
struct msi_domain_info *info, unsigned int virq,
|
||||
irq_hw_number_t hwirq, msi_alloc_info_t *arg)
|
||||
{
|
||||
irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
|
||||
irq_domain_set_info(domain, virq, arg->hwirq, info->chip, NULL,
|
||||
handle_edge_irq, arg->data, "edge");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hpet_msi_free(struct irq_domain *domain,
|
||||
struct msi_domain_info *info, unsigned int virq)
|
||||
{
|
||||
irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
|
||||
}
|
||||
|
||||
static struct msi_domain_ops hpet_msi_domain_ops = {
|
||||
.msi_init = hpet_msi_init,
|
||||
.msi_free = hpet_msi_free,
|
||||
};
|
||||
|
||||
static struct msi_domain_info hpet_msi_domain_info = {
|
||||
|
|
|
|||
|
|
@ -92,8 +92,6 @@ static int uv_domain_alloc(struct irq_domain *domain, unsigned int virq,
|
|||
if (ret >= 0) {
|
||||
if (info->uv.limit == UV_AFFINITY_CPU)
|
||||
irq_set_status_flags(virq, IRQ_NO_BALANCING);
|
||||
else
|
||||
irq_set_status_flags(virq, IRQ_MOVE_PCNTXT);
|
||||
|
||||
chip_data->pnode = uv_blade_to_pnode(info->uv.blade);
|
||||
chip_data->offset = info->uv.offset;
|
||||
|
|
@ -113,7 +111,6 @@ static void uv_domain_free(struct irq_domain *domain, unsigned int virq,
|
|||
|
||||
BUG_ON(nr_irqs != 1);
|
||||
kfree(irq_data->chip_data);
|
||||
irq_clear_status_flags(virq, IRQ_MOVE_PCNTXT);
|
||||
irq_clear_status_flags(virq, IRQ_NO_BALANCING);
|
||||
irq_domain_free_irqs_top(domain, virq, nr_irqs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@ menuconfig DRM_ACCEL
|
|||
different device files, called accel/accel* (in /dev, sysfs
|
||||
and debugfs).
|
||||
|
||||
source "drivers/accel/amdxdna/Kconfig"
|
||||
source "drivers/accel/habanalabs/Kconfig"
|
||||
source "drivers/accel/ivpu/Kconfig"
|
||||
source "drivers/accel/qaic/Kconfig"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
obj-$(CONFIG_DRM_ACCEL_AMDXDNA) += amdxdna/
|
||||
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
|
||||
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
|
||||
obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/
|
||||
|
|
|
|||
18
drivers/accel/amdxdna/Kconfig
Normal file
18
drivers/accel/amdxdna/Kconfig
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
config DRM_ACCEL_AMDXDNA
|
||||
tristate "AMD AI Engine"
|
||||
depends on AMD_IOMMU
|
||||
depends on DRM_ACCEL
|
||||
depends on PCI && HAS_IOMEM
|
||||
depends on X86_64
|
||||
select DRM_SCHED
|
||||
select DRM_GEM_SHMEM_HELPER
|
||||
select FW_LOADER
|
||||
select HMM_MIRROR
|
||||
help
|
||||
Choose this option to enable support for NPU integrated into AMD
|
||||
client CPUs like AMD Ryzen AI 300 Series. AMD NPU can be used to
|
||||
accelerate machine learning applications.
|
||||
|
||||
If "M" is selected, the driver module will be amdxdna.
|
||||
23
drivers/accel/amdxdna/Makefile
Normal file
23
drivers/accel/amdxdna/Makefile
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
amdxdna-y := \
|
||||
aie2_ctx.o \
|
||||
aie2_error.o \
|
||||
aie2_message.o \
|
||||
aie2_pci.o \
|
||||
aie2_pm.o \
|
||||
aie2_psp.o \
|
||||
aie2_smu.o \
|
||||
aie2_solver.o \
|
||||
amdxdna_ctx.o \
|
||||
amdxdna_gem.o \
|
||||
amdxdna_mailbox.o \
|
||||
amdxdna_mailbox_helper.o \
|
||||
amdxdna_pci_drv.o \
|
||||
amdxdna_sysfs.o \
|
||||
npu1_regs.o \
|
||||
npu2_regs.o \
|
||||
npu4_regs.o \
|
||||
npu5_regs.o \
|
||||
npu6_regs.o
|
||||
obj-$(CONFIG_DRM_ACCEL_AMDXDNA) = amdxdna.o
|
||||
3
drivers/accel/amdxdna/TODO
Normal file
3
drivers/accel/amdxdna/TODO
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
- Add import and export BO support
|
||||
- Add debugfs support
|
||||
- Add debug BO support
|
||||
910
drivers/accel/amdxdna/aie2_ctx.c
Normal file
910
drivers/accel/amdxdna/aie2_ctx.c
Normal file
|
|
@ -0,0 +1,910 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/drm_syncobj.h>
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <trace/events/amdxdna.h>
|
||||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "aie2_solver.h"
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
static bool force_cmdlist;
|
||||
module_param(force_cmdlist, bool, 0600);
|
||||
MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
|
||||
|
||||
#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
|
||||
|
||||
static void aie2_job_release(struct kref *ref)
|
||||
{
|
||||
struct amdxdna_sched_job *job;
|
||||
|
||||
job = container_of(ref, struct amdxdna_sched_job, refcnt);
|
||||
amdxdna_sched_job_cleanup(job);
|
||||
if (job->out_fence)
|
||||
dma_fence_put(job->out_fence);
|
||||
kfree(job);
|
||||
}
|
||||
|
||||
static void aie2_job_put(struct amdxdna_sched_job *job)
|
||||
{
|
||||
kref_put(&job->refcnt, aie2_job_release);
|
||||
}
|
||||
|
||||
/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
|
||||
static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
|
||||
struct drm_sched_job *bad_job)
|
||||
{
|
||||
drm_sched_stop(&hwctx->priv->sched, bad_job);
|
||||
aie2_destroy_context(xdna->dev_handle, hwctx);
|
||||
}
|
||||
|
||||
static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_gem_obj *heap = hwctx->priv->heap;
|
||||
int ret;
|
||||
|
||||
ret = aie2_create_context(xdna->dev_handle, hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
|
||||
heap->mem.userptr, heap->mem.size);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (hwctx->status != HWCTX_STAT_READY) {
|
||||
XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = aie2_config_cu(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
drm_sched_start(&hwctx->priv->sched, 0);
|
||||
XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void aie2_restart_ctx(struct amdxdna_client *client)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
unsigned long hwctx_id;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
mutex_lock(&client->hwctx_lock);
|
||||
amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
|
||||
if (hwctx->status != HWCTX_STAT_STOP)
|
||||
continue;
|
||||
|
||||
hwctx->status = hwctx->old_status;
|
||||
XDNA_DBG(xdna, "Resetting %s", hwctx->name);
|
||||
aie2_hwctx_restart(xdna, hwctx);
|
||||
}
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
}
|
||||
|
||||
static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
|
||||
{
|
||||
struct dma_fence *fence, *out_fence = NULL;
|
||||
int ret;
|
||||
|
||||
fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
ret = dma_fence_chain_find_seqno(&fence, seq);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
out_fence = dma_fence_get(dma_fence_chain_contained(fence));
|
||||
|
||||
out:
|
||||
dma_fence_put(fence);
|
||||
return out_fence;
|
||||
}
|
||||
|
||||
static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct dma_fence *fence;
|
||||
|
||||
fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
|
||||
if (!fence)
|
||||
return;
|
||||
|
||||
dma_fence_wait(fence, false);
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
|
||||
void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
|
||||
/*
|
||||
* Command timeout is unlikely. But if it happens, it doesn't
|
||||
* break the system. aie2_hwctx_stop() will destroy mailbox
|
||||
* and abort all commands.
|
||||
*/
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
aie2_hwctx_wait_for_idle(hwctx);
|
||||
aie2_hwctx_stop(xdna, hwctx, NULL);
|
||||
hwctx->old_status = hwctx->status;
|
||||
hwctx->status = HWCTX_STAT_STOP;
|
||||
}
|
||||
|
||||
void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
|
||||
/*
|
||||
* The resume path cannot guarantee that mailbox channel can be
|
||||
* regenerated. If this happen, when submit message to this
|
||||
* mailbox channel, error will return.
|
||||
*/
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
hwctx->status = hwctx->old_status;
|
||||
aie2_hwctx_restart(xdna, hwctx);
|
||||
}
|
||||
|
||||
static void
|
||||
aie2_sched_notify(struct amdxdna_sched_job *job)
|
||||
{
|
||||
struct dma_fence *fence = job->fence;
|
||||
|
||||
trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
|
||||
job->hwctx->priv->completed++;
|
||||
dma_fence_signal(fence);
|
||||
|
||||
up(&job->hwctx->priv->job_sem);
|
||||
job->job_done = true;
|
||||
dma_fence_put(fence);
|
||||
mmput_async(job->mm);
|
||||
aie2_job_put(job);
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct amdxdna_sched_job *job = handle;
|
||||
struct amdxdna_gem_obj *cmd_abo;
|
||||
u32 ret = 0;
|
||||
u32 status;
|
||||
|
||||
cmd_abo = job->cmd_bo;
|
||||
|
||||
if (unlikely(!data))
|
||||
goto out;
|
||||
|
||||
if (unlikely(size != sizeof(u32))) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = *data;
|
||||
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
|
||||
if (status == AIE2_STATUS_SUCCESS)
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
|
||||
else
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
|
||||
|
||||
out:
|
||||
aie2_sched_notify(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct amdxdna_sched_job *job = handle;
|
||||
u32 ret = 0;
|
||||
u32 status;
|
||||
|
||||
if (unlikely(!data))
|
||||
goto out;
|
||||
|
||||
if (unlikely(size != sizeof(u32))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = *data;
|
||||
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
|
||||
|
||||
out:
|
||||
aie2_sched_notify(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct amdxdna_sched_job *job = handle;
|
||||
struct amdxdna_gem_obj *cmd_abo;
|
||||
struct cmd_chain_resp *resp;
|
||||
struct amdxdna_dev *xdna;
|
||||
u32 fail_cmd_status;
|
||||
u32 fail_cmd_idx;
|
||||
u32 ret = 0;
|
||||
|
||||
cmd_abo = job->cmd_bo;
|
||||
if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
resp = (struct cmd_chain_resp *)data;
|
||||
xdna = job->hwctx->client->xdna;
|
||||
XDNA_DBG(xdna, "Status 0x%x", resp->status);
|
||||
if (resp->status == AIE2_STATUS_SUCCESS) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Slow path to handle error, read from ringbuf on BAR */
|
||||
fail_cmd_idx = resp->fail_cmd_idx;
|
||||
fail_cmd_status = resp->fail_cmd_status;
|
||||
XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
|
||||
fail_cmd_idx, fail_cmd_status);
|
||||
|
||||
if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
|
||||
|
||||
if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
|
||||
struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
|
||||
|
||||
cc->error_index = fail_cmd_idx;
|
||||
if (cc->error_index >= cc->command_count)
|
||||
cc->error_index = 0;
|
||||
}
|
||||
out:
|
||||
aie2_sched_notify(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct dma_fence *
|
||||
aie2_sched_job_run(struct drm_sched_job *sched_job)
|
||||
{
|
||||
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
struct amdxdna_hwctx *hwctx = job->hwctx;
|
||||
struct dma_fence *fence;
|
||||
int ret;
|
||||
|
||||
if (!mmget_not_zero(job->mm))
|
||||
return ERR_PTR(-ESRCH);
|
||||
|
||||
kref_get(&job->refcnt);
|
||||
fence = dma_fence_get(job->fence);
|
||||
|
||||
if (unlikely(!cmd_abo)) {
|
||||
ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
|
||||
goto out;
|
||||
}
|
||||
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
|
||||
|
||||
if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
|
||||
ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
|
||||
else if (force_cmdlist)
|
||||
ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
|
||||
else
|
||||
ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
|
||||
|
||||
out:
|
||||
if (ret) {
|
||||
dma_fence_put(job->fence);
|
||||
aie2_job_put(job);
|
||||
mmput(job->mm);
|
||||
fence = ERR_PTR(ret);
|
||||
}
|
||||
trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
static void aie2_sched_job_free(struct drm_sched_job *sched_job)
|
||||
{
|
||||
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
|
||||
struct amdxdna_hwctx *hwctx = job->hwctx;
|
||||
|
||||
trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
|
||||
if (!job->job_done)
|
||||
up(&hwctx->priv->job_sem);
|
||||
|
||||
drm_sched_job_cleanup(sched_job);
|
||||
aie2_job_put(job);
|
||||
}
|
||||
|
||||
static enum drm_gpu_sched_stat
|
||||
aie2_sched_job_timedout(struct drm_sched_job *sched_job)
|
||||
{
|
||||
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
|
||||
struct amdxdna_hwctx *hwctx = job->hwctx;
|
||||
struct amdxdna_dev *xdna;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
aie2_hwctx_stop(xdna, hwctx, sched_job);
|
||||
|
||||
aie2_hwctx_restart(xdna, hwctx);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
return DRM_GPU_SCHED_STAT_NOMINAL;
|
||||
}
|
||||
|
||||
const struct drm_sched_backend_ops sched_ops = {
|
||||
.run_job = aie2_sched_job_run,
|
||||
.free_job = aie2_sched_job_free,
|
||||
.timedout_job = aie2_sched_job_timedout,
|
||||
};
|
||||
|
||||
static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int start, end, first, last;
|
||||
u32 width = 1, entries = 0;
|
||||
int i;
|
||||
|
||||
if (!hwctx->num_tiles) {
|
||||
XDNA_ERR(xdna, "Number of tiles is zero");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
if (unlikely(!ndev->metadata.core.row_count)) {
|
||||
XDNA_WARN(xdna, "Core tile row count is zero");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
|
||||
if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
|
||||
XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ndev->priv->col_align == COL_ALIGN_NATURE)
|
||||
width = hwctx->num_col;
|
||||
|
||||
/*
|
||||
* In range [start, end], find out columns that is multiple of width.
|
||||
* 'first' is the first column,
|
||||
* 'last' is the last column,
|
||||
* 'entries' is the total number of columns.
|
||||
*/
|
||||
start = xdna->dev_info->first_col;
|
||||
end = ndev->total_col - hwctx->num_col;
|
||||
if (start > 0 && end == 0) {
|
||||
XDNA_DBG(xdna, "Force start from col 0");
|
||||
start = 0;
|
||||
}
|
||||
first = start + (width - start % width) % width;
|
||||
last = end - end % width;
|
||||
if (last >= first)
|
||||
entries = (last - first) / width + 1;
|
||||
XDNA_DBG(xdna, "start %d end %d first %d last %d",
|
||||
start, end, first, last);
|
||||
|
||||
if (unlikely(!entries)) {
|
||||
XDNA_ERR(xdna, "Start %d end %d width %d",
|
||||
start, end, width);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
|
||||
if (!hwctx->col_list)
|
||||
return -ENOMEM;
|
||||
|
||||
hwctx->col_list_len = entries;
|
||||
hwctx->col_list[0] = first;
|
||||
for (i = 1; i < entries; i++)
|
||||
hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
|
||||
|
||||
print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
|
||||
entries * sizeof(*hwctx->col_list), false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct alloc_requests *xrs_req;
|
||||
int ret;
|
||||
|
||||
xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
|
||||
if (!xrs_req)
|
||||
return -ENOMEM;
|
||||
|
||||
xrs_req->cdo.start_cols = hwctx->col_list;
|
||||
xrs_req->cdo.cols_len = hwctx->col_list_len;
|
||||
xrs_req->cdo.ncols = hwctx->num_col;
|
||||
xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
|
||||
|
||||
xrs_req->rqos.gops = hwctx->qos.gops;
|
||||
xrs_req->rqos.fps = hwctx->qos.fps;
|
||||
xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
|
||||
xrs_req->rqos.latency = hwctx->qos.latency;
|
||||
xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
|
||||
xrs_req->rqos.priority = hwctx->qos.priority;
|
||||
|
||||
xrs_req->rid = (uintptr_t)hwctx;
|
||||
|
||||
ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
|
||||
if (ret)
|
||||
XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
|
||||
|
||||
kfree(xrs_req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
int ret;
|
||||
|
||||
ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
|
||||
if (ret)
|
||||
XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
|
||||
}
|
||||
|
||||
static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct drm_file *filp = hwctx->client->filp;
|
||||
struct drm_syncobj *syncobj;
|
||||
u32 hdl;
|
||||
int ret;
|
||||
|
||||
hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
|
||||
|
||||
ret = drm_syncobj_create(&syncobj, 0, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
|
||||
if (ret) {
|
||||
drm_syncobj_put(syncobj);
|
||||
XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
hwctx->priv->syncobj = syncobj;
|
||||
hwctx->syncobj_hdl = hdl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
/*
|
||||
* The syncobj_hdl is owned by user space and will be cleaned up
|
||||
* separately.
|
||||
*/
|
||||
drm_syncobj_put(hwctx->priv->syncobj);
|
||||
}
|
||||
|
||||
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_client *client = hwctx->client;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct drm_gpu_scheduler *sched;
|
||||
struct amdxdna_hwctx_priv *priv;
|
||||
struct amdxdna_gem_obj *heap;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int i, ret;
|
||||
|
||||
priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
|
||||
if (!priv)
|
||||
return -ENOMEM;
|
||||
hwctx->priv = priv;
|
||||
|
||||
mutex_lock(&client->mm_lock);
|
||||
heap = client->dev_heap;
|
||||
if (!heap) {
|
||||
XDNA_ERR(xdna, "The client dev heap object not exist");
|
||||
mutex_unlock(&client->mm_lock);
|
||||
ret = -ENOENT;
|
||||
goto free_priv;
|
||||
}
|
||||
drm_gem_object_get(to_gobj(heap));
|
||||
mutex_unlock(&client->mm_lock);
|
||||
priv->heap = heap;
|
||||
sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
|
||||
|
||||
ret = amdxdna_gem_pin(heap);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Dev heap pin failed, ret %d", ret);
|
||||
goto put_heap;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
|
||||
struct amdxdna_gem_obj *abo;
|
||||
struct amdxdna_drm_create_bo args = {
|
||||
.flags = 0,
|
||||
.type = AMDXDNA_BO_DEV,
|
||||
.vaddr = 0,
|
||||
.size = MAX_CHAIN_CMDBUF_SIZE,
|
||||
};
|
||||
|
||||
abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
|
||||
if (IS_ERR(abo)) {
|
||||
ret = PTR_ERR(abo);
|
||||
goto free_cmd_bufs;
|
||||
}
|
||||
|
||||
XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
|
||||
i, abo->mem.dev_addr, abo->mem.size);
|
||||
priv->cmd_buf[i] = abo;
|
||||
}
|
||||
|
||||
sched = &priv->sched;
|
||||
mutex_init(&priv->io_lock);
|
||||
|
||||
fs_reclaim_acquire(GFP_KERNEL);
|
||||
might_lock(&priv->io_lock);
|
||||
fs_reclaim_release(GFP_KERNEL);
|
||||
|
||||
ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,
|
||||
HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
|
||||
NULL, NULL, hwctx->name, xdna->ddev.dev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
|
||||
goto free_cmd_bufs;
|
||||
}
|
||||
|
||||
ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
|
||||
&sched, 1, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
|
||||
goto free_sched;
|
||||
}
|
||||
|
||||
ret = aie2_hwctx_col_list(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
|
||||
goto free_entity;
|
||||
}
|
||||
|
||||
ret = aie2_alloc_resource(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
|
||||
goto free_col_list;
|
||||
}
|
||||
|
||||
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
|
||||
heap->mem.userptr, heap->mem.size);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
|
||||
goto release_resource;
|
||||
}
|
||||
|
||||
ret = aie2_ctx_syncobj_create(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
|
||||
goto release_resource;
|
||||
}
|
||||
|
||||
hwctx->status = HWCTX_STAT_INIT;
|
||||
ndev = xdna->dev_handle;
|
||||
ndev->hwctx_num++;
|
||||
|
||||
XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
|
||||
|
||||
return 0;
|
||||
|
||||
release_resource:
|
||||
aie2_release_resource(hwctx);
|
||||
free_col_list:
|
||||
kfree(hwctx->col_list);
|
||||
free_entity:
|
||||
drm_sched_entity_destroy(&priv->entity);
|
||||
free_sched:
|
||||
drm_sched_fini(&priv->sched);
|
||||
free_cmd_bufs:
|
||||
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
|
||||
if (!priv->cmd_buf[i])
|
||||
continue;
|
||||
drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
|
||||
}
|
||||
amdxdna_gem_unpin(heap);
|
||||
put_heap:
|
||||
drm_gem_object_put(to_gobj(heap));
|
||||
free_priv:
|
||||
kfree(priv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
struct amdxdna_dev *xdna;
|
||||
int idx;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
ndev = xdna->dev_handle;
|
||||
ndev->hwctx_num--;
|
||||
drm_sched_wqueue_stop(&hwctx->priv->sched);
|
||||
|
||||
/* Now, scheduler will not send command to device. */
|
||||
aie2_release_resource(hwctx);
|
||||
|
||||
/*
|
||||
* All submitted commands are aborted.
|
||||
* Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run()
|
||||
* will return NODEV if it is called.
|
||||
*/
|
||||
drm_sched_wqueue_start(&hwctx->priv->sched);
|
||||
|
||||
aie2_hwctx_wait_for_idle(hwctx);
|
||||
drm_sched_entity_destroy(&hwctx->priv->entity);
|
||||
drm_sched_fini(&hwctx->priv->sched);
|
||||
aie2_ctx_syncobj_destroy(hwctx);
|
||||
|
||||
XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
|
||||
|
||||
for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
|
||||
drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
|
||||
amdxdna_gem_unpin(hwctx->priv->heap);
|
||||
drm_gem_object_put(to_gobj(hwctx->priv->heap));
|
||||
|
||||
mutex_destroy(&hwctx->priv->io_lock);
|
||||
kfree(hwctx->col_list);
|
||||
kfree(hwctx->priv);
|
||||
kfree(hwctx->cus);
|
||||
}
|
||||
|
||||
static int aie2_hwctx_cu_config(struct amdxdna_hwctx *hwctx, void *buf, u32 size)
|
||||
{
|
||||
struct amdxdna_hwctx_param_config_cu *config = buf;
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
u32 total_size;
|
||||
int ret;
|
||||
|
||||
XDNA_DBG(xdna, "Config %d CU to %s", config->num_cus, hwctx->name);
|
||||
if (XDNA_MBZ_DBG(xdna, config->pad, sizeof(config->pad)))
|
||||
return -EINVAL;
|
||||
|
||||
if (hwctx->status != HWCTX_STAT_INIT) {
|
||||
XDNA_ERR(xdna, "Not support re-config CU");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!config->num_cus) {
|
||||
XDNA_ERR(xdna, "Number of CU is zero");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
total_size = struct_size(config, cu_configs, config->num_cus);
|
||||
if (total_size > size) {
|
||||
XDNA_ERR(xdna, "CU config larger than size");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hwctx->cus = kmemdup(config, total_size, GFP_KERNEL);
|
||||
if (!hwctx->cus)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = aie2_config_cu(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Config CU to firmware failed, ret %d", ret);
|
||||
goto free_cus;
|
||||
}
|
||||
|
||||
wmb(); /* To avoid locking in command submit when check status */
|
||||
hwctx->status = HWCTX_STAT_READY;
|
||||
|
||||
return 0;
|
||||
|
||||
free_cus:
|
||||
kfree(hwctx->cus);
|
||||
hwctx->cus = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
switch (type) {
|
||||
case DRM_AMDXDNA_HWCTX_CONFIG_CU:
|
||||
return aie2_hwctx_cu_config(hwctx, buf, size);
|
||||
case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
|
||||
case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
|
||||
return -EOPNOTSUPP;
|
||||
default:
|
||||
XDNA_DBG(xdna, "Not supported type %d", type);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int aie2_populate_range(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
struct mm_struct *mm = abo->mem.notifier.mm;
|
||||
struct hmm_range range = { 0 };
|
||||
unsigned long timeout;
|
||||
int ret;
|
||||
|
||||
XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
|
||||
abo->mem.userptr, abo->mem.size);
|
||||
range.notifier = &abo->mem.notifier;
|
||||
range.start = abo->mem.userptr;
|
||||
range.end = abo->mem.userptr + abo->mem.size;
|
||||
range.hmm_pfns = abo->mem.pfns;
|
||||
range.default_flags = HMM_PFN_REQ_FAULT;
|
||||
|
||||
if (!mmget_not_zero(mm))
|
||||
return -EFAULT;
|
||||
|
||||
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
again:
|
||||
range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
|
||||
mmap_read_lock(mm);
|
||||
ret = hmm_range_fault(&range);
|
||||
mmap_read_unlock(mm);
|
||||
if (ret) {
|
||||
if (time_after(jiffies, timeout)) {
|
||||
ret = -ETIME;
|
||||
goto put_mm;
|
||||
}
|
||||
|
||||
if (ret == -EBUSY)
|
||||
goto again;
|
||||
|
||||
goto put_mm;
|
||||
}
|
||||
|
||||
down_read(&xdna->notifier_lock);
|
||||
if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
|
||||
up_read(&xdna->notifier_lock);
|
||||
goto again;
|
||||
}
|
||||
abo->mem.map_invalid = false;
|
||||
up_read(&xdna->notifier_lock);
|
||||
|
||||
put_mm:
|
||||
mmput(mm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct ww_acquire_ctx acquire_ctx;
|
||||
struct dma_fence_chain *chain;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
unsigned long timeout = 0;
|
||||
int ret, i;
|
||||
|
||||
ret = down_interruptible(&hwctx->priv->job_sem);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
chain = dma_fence_chain_alloc();
|
||||
if (!chain) {
|
||||
XDNA_ERR(xdna, "Alloc fence chain failed");
|
||||
ret = -ENOMEM;
|
||||
goto up_sem;
|
||||
}
|
||||
|
||||
ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
|
||||
goto free_chain;
|
||||
}
|
||||
|
||||
retry:
|
||||
ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
if (ret) {
|
||||
XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
|
||||
goto cleanup_job;
|
||||
}
|
||||
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
|
||||
if (ret) {
|
||||
XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
|
||||
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
goto cleanup_job;
|
||||
}
|
||||
}
|
||||
|
||||
down_read(&xdna->notifier_lock);
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
abo = to_xdna_obj(job->bos[i]);
|
||||
if (abo->mem.map_invalid) {
|
||||
up_read(&xdna->notifier_lock);
|
||||
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
if (!timeout) {
|
||||
timeout = jiffies +
|
||||
msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
} else if (time_after(jiffies, timeout)) {
|
||||
ret = -ETIME;
|
||||
goto cleanup_job;
|
||||
}
|
||||
|
||||
ret = aie2_populate_range(abo);
|
||||
if (ret)
|
||||
goto cleanup_job;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&hwctx->priv->io_lock);
|
||||
drm_sched_job_arm(&job->base);
|
||||
job->out_fence = dma_fence_get(&job->base.s_fence->finished);
|
||||
for (i = 0; i < job->bo_cnt; i++)
|
||||
dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
|
||||
job->seq = hwctx->priv->seq++;
|
||||
kref_get(&job->refcnt);
|
||||
drm_sched_entity_push_job(&job->base);
|
||||
|
||||
*seq = job->seq;
|
||||
drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
|
||||
mutex_unlock(&hwctx->priv->io_lock);
|
||||
|
||||
up_read(&xdna->notifier_lock);
|
||||
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
|
||||
aie2_job_put(job);
|
||||
|
||||
return 0;
|
||||
|
||||
cleanup_job:
|
||||
drm_sched_job_cleanup(&job->base);
|
||||
free_chain:
|
||||
dma_fence_chain_free(chain);
|
||||
up_sem:
|
||||
up(&hwctx->priv->job_sem);
|
||||
job->job_done = true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
struct drm_gem_object *gobj = to_gobj(abo);
|
||||
long ret;
|
||||
|
||||
down_write(&xdna->notifier_lock);
|
||||
abo->mem.map_invalid = true;
|
||||
mmu_interval_set_seq(&abo->mem.notifier, cur_seq);
|
||||
up_write(&xdna->notifier_lock);
|
||||
ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
|
||||
true, MAX_SCHEDULE_TIMEOUT);
|
||||
if (!ret || ret == -ERESTARTSYS)
|
||||
XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
|
||||
}
|
||||
360
drivers/accel/amdxdna/aie2_error.c
Normal file
360
drivers/accel/amdxdna/aie2_error.c
Normal file
|
|
@ -0,0 +1,360 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/drm_cache.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
struct async_event {
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
struct async_event_msg_resp resp;
|
||||
struct workqueue_struct *wq;
|
||||
struct work_struct work;
|
||||
u8 *buf;
|
||||
dma_addr_t addr;
|
||||
u32 size;
|
||||
};
|
||||
|
||||
struct async_events {
|
||||
struct workqueue_struct *wq;
|
||||
u8 *buf;
|
||||
dma_addr_t addr;
|
||||
u32 size;
|
||||
u32 event_cnt;
|
||||
struct async_event event[] __counted_by(event_cnt);
|
||||
};
|
||||
|
||||
/*
|
||||
* Below enum, struct and lookup tables are porting from XAIE util header file.
|
||||
*
|
||||
* Below data is defined by AIE device and it is used for decode error message
|
||||
* from the device.
|
||||
*/
|
||||
|
||||
enum aie_module_type {
|
||||
AIE_MEM_MOD = 0,
|
||||
AIE_CORE_MOD,
|
||||
AIE_PL_MOD,
|
||||
};
|
||||
|
||||
enum aie_error_category {
|
||||
AIE_ERROR_SATURATION = 0,
|
||||
AIE_ERROR_FP,
|
||||
AIE_ERROR_STREAM,
|
||||
AIE_ERROR_ACCESS,
|
||||
AIE_ERROR_BUS,
|
||||
AIE_ERROR_INSTRUCTION,
|
||||
AIE_ERROR_ECC,
|
||||
AIE_ERROR_LOCK,
|
||||
AIE_ERROR_DMA,
|
||||
AIE_ERROR_MEM_PARITY,
|
||||
/* Unknown is not from XAIE, added for better category */
|
||||
AIE_ERROR_UNKNOWN,
|
||||
};
|
||||
|
||||
/* Don't pack, unless XAIE side changed */
|
||||
struct aie_error {
|
||||
__u8 row;
|
||||
__u8 col;
|
||||
__u32 mod_type;
|
||||
__u8 event_id;
|
||||
};
|
||||
|
||||
struct aie_err_info {
|
||||
u32 err_cnt;
|
||||
u32 ret_code;
|
||||
u32 rsvd;
|
||||
struct aie_error payload[] __counted_by(err_cnt);
|
||||
};
|
||||
|
||||
struct aie_event_category {
|
||||
u8 event_id;
|
||||
enum aie_error_category category;
|
||||
};
|
||||
|
||||
#define EVENT_CATEGORY(id, cat) { id, cat }
|
||||
static const struct aie_event_category aie_ml_mem_event_cat[] = {
|
||||
EVENT_CATEGORY(88U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(90U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(91U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(92U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(93U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(94U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(95U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(96U, AIE_ERROR_MEM_PARITY),
|
||||
EVENT_CATEGORY(97U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(98U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(99U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(100U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(101U, AIE_ERROR_LOCK),
|
||||
};
|
||||
|
||||
static const struct aie_event_category aie_ml_core_event_cat[] = {
|
||||
EVENT_CATEGORY(55U, AIE_ERROR_ACCESS),
|
||||
EVENT_CATEGORY(56U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(57U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(58U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(59U, AIE_ERROR_INSTRUCTION),
|
||||
EVENT_CATEGORY(60U, AIE_ERROR_ACCESS),
|
||||
EVENT_CATEGORY(62U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(64U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(65U, AIE_ERROR_ACCESS),
|
||||
EVENT_CATEGORY(66U, AIE_ERROR_ACCESS),
|
||||
EVENT_CATEGORY(67U, AIE_ERROR_LOCK),
|
||||
EVENT_CATEGORY(70U, AIE_ERROR_INSTRUCTION),
|
||||
EVENT_CATEGORY(71U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(72U, AIE_ERROR_BUS),
|
||||
};
|
||||
|
||||
static const struct aie_event_category aie_ml_mem_tile_event_cat[] = {
|
||||
EVENT_CATEGORY(130U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(132U, AIE_ERROR_ECC),
|
||||
EVENT_CATEGORY(133U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(134U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(135U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(136U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(137U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(138U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(139U, AIE_ERROR_LOCK),
|
||||
};
|
||||
|
||||
static const struct aie_event_category aie_ml_shim_tile_event_cat[] = {
|
||||
EVENT_CATEGORY(64U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(65U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(66U, AIE_ERROR_STREAM),
|
||||
EVENT_CATEGORY(67U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(68U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(69U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(70U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(71U, AIE_ERROR_BUS),
|
||||
EVENT_CATEGORY(72U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(73U, AIE_ERROR_DMA),
|
||||
EVENT_CATEGORY(74U, AIE_ERROR_LOCK),
|
||||
};
|
||||
|
||||
static enum aie_error_category
|
||||
aie_get_error_category(u8 row, u8 event_id, enum aie_module_type mod_type)
|
||||
{
|
||||
const struct aie_event_category *lut;
|
||||
int num_entry;
|
||||
int i;
|
||||
|
||||
switch (mod_type) {
|
||||
case AIE_PL_MOD:
|
||||
lut = aie_ml_shim_tile_event_cat;
|
||||
num_entry = ARRAY_SIZE(aie_ml_shim_tile_event_cat);
|
||||
break;
|
||||
case AIE_CORE_MOD:
|
||||
lut = aie_ml_core_event_cat;
|
||||
num_entry = ARRAY_SIZE(aie_ml_core_event_cat);
|
||||
break;
|
||||
case AIE_MEM_MOD:
|
||||
if (row == 1) {
|
||||
lut = aie_ml_mem_tile_event_cat;
|
||||
num_entry = ARRAY_SIZE(aie_ml_mem_tile_event_cat);
|
||||
} else {
|
||||
lut = aie_ml_mem_event_cat;
|
||||
num_entry = ARRAY_SIZE(aie_ml_mem_event_cat);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return AIE_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_entry; i++) {
|
||||
if (event_id != lut[i].event_id)
|
||||
continue;
|
||||
|
||||
return lut[i].category;
|
||||
}
|
||||
|
||||
return AIE_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
static u32 aie2_error_backtrack(struct amdxdna_dev_hdl *ndev, void *err_info, u32 num_err)
|
||||
{
|
||||
struct aie_error *errs = err_info;
|
||||
u32 err_col = 0; /* assume that AIE has less than 32 columns */
|
||||
int i;
|
||||
|
||||
/* Get err column bitmap */
|
||||
for (i = 0; i < num_err; i++) {
|
||||
struct aie_error *err = &errs[i];
|
||||
enum aie_error_category cat;
|
||||
|
||||
cat = aie_get_error_category(err->row, err->event_id, err->mod_type);
|
||||
XDNA_ERR(ndev->xdna, "Row: %d, Col: %d, module %d, event ID %d, category %d",
|
||||
err->row, err->col, err->mod_type,
|
||||
err->event_id, cat);
|
||||
|
||||
if (err->col >= 32) {
|
||||
XDNA_WARN(ndev->xdna, "Invalid column number");
|
||||
break;
|
||||
}
|
||||
|
||||
err_col |= (1 << err->col);
|
||||
}
|
||||
|
||||
return err_col;
|
||||
}
|
||||
|
||||
static int aie2_error_async_cb(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct async_event_msg_resp *resp;
|
||||
struct async_event *e = handle;
|
||||
|
||||
if (data) {
|
||||
resp = (struct async_event_msg_resp *)data;
|
||||
e->resp.type = resp->type;
|
||||
wmb(); /* Update status in the end, so that no lock for here */
|
||||
e->resp.status = resp->status;
|
||||
}
|
||||
queue_work(e->wq, &e->work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_error_event_send(struct async_event *e)
|
||||
{
|
||||
drm_clflush_virt_range(e->buf, e->size); /* device can access */
|
||||
return aie2_register_asyn_event_msg(e->ndev, e->addr, e->size, e,
|
||||
aie2_error_async_cb);
|
||||
}
|
||||
|
||||
static void aie2_error_worker(struct work_struct *err_work)
|
||||
{
|
||||
struct aie_err_info *info;
|
||||
struct amdxdna_dev *xdna;
|
||||
struct async_event *e;
|
||||
u32 max_err;
|
||||
u32 err_col;
|
||||
|
||||
e = container_of(err_work, struct async_event, work);
|
||||
|
||||
xdna = e->ndev->xdna;
|
||||
|
||||
if (e->resp.status == MAX_AIE2_STATUS_CODE)
|
||||
return;
|
||||
|
||||
e->resp.status = MAX_AIE2_STATUS_CODE;
|
||||
|
||||
print_hex_dump_debug("AIE error: ", DUMP_PREFIX_OFFSET, 16, 4,
|
||||
e->buf, 0x100, false);
|
||||
|
||||
info = (struct aie_err_info *)e->buf;
|
||||
XDNA_DBG(xdna, "Error count %d return code %d", info->err_cnt, info->ret_code);
|
||||
|
||||
max_err = (e->size - sizeof(*info)) / sizeof(struct aie_error);
|
||||
if (unlikely(info->err_cnt > max_err)) {
|
||||
WARN_ONCE(1, "Error count too large %d\n", info->err_cnt);
|
||||
return;
|
||||
}
|
||||
err_col = aie2_error_backtrack(e->ndev, info->payload, info->err_cnt);
|
||||
if (!err_col) {
|
||||
XDNA_WARN(xdna, "Did not get error column");
|
||||
return;
|
||||
}
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
/* Re-sent this event to firmware */
|
||||
if (aie2_error_event_send(e))
|
||||
XDNA_WARN(xdna, "Unable to register async event");
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
}
|
||||
|
||||
int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct async_event *e;
|
||||
int i, ret;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
for (i = 0; i < ndev->async_events->event_cnt; i++) {
|
||||
e = &ndev->async_events->event[i];
|
||||
ret = aie2_error_event_send(e);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct async_events *events;
|
||||
|
||||
events = ndev->async_events;
|
||||
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
destroy_workqueue(events->wq);
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
|
||||
dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
|
||||
events->addr, DMA_FROM_DEVICE);
|
||||
kfree(events);
|
||||
}
|
||||
|
||||
int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
u32 total_col = ndev->total_col;
|
||||
u32 total_size = ASYNC_BUF_SIZE * total_col;
|
||||
struct async_events *events;
|
||||
int i, ret;
|
||||
|
||||
events = kzalloc(struct_size(events, event, total_col), GFP_KERNEL);
|
||||
if (!events)
|
||||
return -ENOMEM;
|
||||
|
||||
events->buf = dma_alloc_noncoherent(xdna->ddev.dev, total_size, &events->addr,
|
||||
DMA_FROM_DEVICE, GFP_KERNEL);
|
||||
if (!events->buf) {
|
||||
ret = -ENOMEM;
|
||||
goto free_events;
|
||||
}
|
||||
events->size = total_size;
|
||||
events->event_cnt = total_col;
|
||||
|
||||
events->wq = alloc_ordered_workqueue("async_wq", 0);
|
||||
if (!events->wq) {
|
||||
ret = -ENOMEM;
|
||||
goto free_buf;
|
||||
}
|
||||
|
||||
for (i = 0; i < events->event_cnt; i++) {
|
||||
struct async_event *e = &events->event[i];
|
||||
u32 offset = i * ASYNC_BUF_SIZE;
|
||||
|
||||
e->ndev = ndev;
|
||||
e->wq = events->wq;
|
||||
e->buf = &events->buf[offset];
|
||||
e->addr = events->addr + offset;
|
||||
e->size = ASYNC_BUF_SIZE;
|
||||
e->resp.status = MAX_AIE2_STATUS_CODE;
|
||||
INIT_WORK(&e->work, aie2_error_worker);
|
||||
}
|
||||
|
||||
ndev->async_events = events;
|
||||
|
||||
XDNA_DBG(xdna, "Async event count %d, buf total size 0x%x",
|
||||
events->event_cnt, events->size);
|
||||
return 0;
|
||||
|
||||
free_buf:
|
||||
dma_free_noncoherent(xdna->ddev.dev, events->size, events->buf,
|
||||
events->addr, DMA_FROM_DEVICE);
|
||||
free_events:
|
||||
kfree(events);
|
||||
return ret;
|
||||
}
|
||||
776
drivers/accel/amdxdna/aie2_message.c
Normal file
776
drivers/accel/amdxdna/aie2_message.c
Normal file
|
|
@ -0,0 +1,776 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_cache.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_mailbox_helper.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define DECLARE_AIE2_MSG(name, op) \
|
||||
DECLARE_XDNA_MSG_COMMON(name, op, MAX_AIE2_STATUS_CODE)
|
||||
|
||||
static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
|
||||
struct xdna_mailbox_msg *msg)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct xdna_notify *hdl = msg->handle;
|
||||
int ret;
|
||||
|
||||
if (!ndev->mgmt_chann)
|
||||
return -ENODEV;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
|
||||
if (ret == -ETIME) {
|
||||
xdna_mailbox_stop_channel(ndev->mgmt_chann);
|
||||
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
|
||||
ndev->mgmt_chann = NULL;
|
||||
}
|
||||
|
||||
if (!ret && *hdl->data != AIE2_STATUS_SUCCESS) {
|
||||
XDNA_ERR(xdna, "command opcode 0x%x failed, status 0x%x",
|
||||
msg->opcode, *hdl->data);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND);
|
||||
|
||||
return aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
}
|
||||
|
||||
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
DECLARE_AIE2_MSG(suspend, MSG_OP_RESUME);
|
||||
|
||||
return aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
}
|
||||
|
||||
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value)
|
||||
{
|
||||
DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
|
||||
int ret;
|
||||
|
||||
req.type = type;
|
||||
req.value = value;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value)
|
||||
{
|
||||
DECLARE_AIE2_MSG(get_runtime_cfg, MSG_OP_GET_RUNTIME_CONFIG);
|
||||
int ret;
|
||||
|
||||
req.type = type;
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Failed to get runtime config, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
*value = resp.value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid)
|
||||
{
|
||||
DECLARE_AIE2_MSG(assign_mgmt_pasid, MSG_OP_ASSIGN_MGMT_PASID);
|
||||
|
||||
req.pasid = pasid;
|
||||
|
||||
return aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
}
|
||||
|
||||
int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version)
|
||||
{
|
||||
DECLARE_AIE2_MSG(aie_version_info, MSG_OP_QUERY_AIE_VERSION);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
int ret;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
XDNA_DBG(xdna, "Query AIE version - major: %u minor: %u completed",
|
||||
resp.major, resp.minor);
|
||||
|
||||
version->major = resp.major;
|
||||
version->minor = resp.minor;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata)
|
||||
{
|
||||
DECLARE_AIE2_MSG(aie_tile_info, MSG_OP_QUERY_AIE_TILE_INFO);
|
||||
int ret;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
metadata->size = resp.info.size;
|
||||
metadata->cols = resp.info.cols;
|
||||
metadata->rows = resp.info.rows;
|
||||
|
||||
metadata->version.major = resp.info.major;
|
||||
metadata->version.minor = resp.info.minor;
|
||||
|
||||
metadata->core.row_count = resp.info.core_rows;
|
||||
metadata->core.row_start = resp.info.core_row_start;
|
||||
metadata->core.dma_channel_count = resp.info.core_dma_channels;
|
||||
metadata->core.lock_count = resp.info.core_locks;
|
||||
metadata->core.event_reg_count = resp.info.core_events;
|
||||
|
||||
metadata->mem.row_count = resp.info.mem_rows;
|
||||
metadata->mem.row_start = resp.info.mem_row_start;
|
||||
metadata->mem.dma_channel_count = resp.info.mem_dma_channels;
|
||||
metadata->mem.lock_count = resp.info.mem_locks;
|
||||
metadata->mem.event_reg_count = resp.info.mem_events;
|
||||
|
||||
metadata->shim.row_count = resp.info.shim_rows;
|
||||
metadata->shim.row_start = resp.info.shim_row_start;
|
||||
metadata->shim.dma_channel_count = resp.info.shim_dma_channels;
|
||||
metadata->shim.lock_count = resp.info.shim_locks;
|
||||
metadata->shim.event_reg_count = resp.info.shim_events;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
|
||||
struct amdxdna_fw_ver *fw_ver)
|
||||
{
|
||||
DECLARE_AIE2_MSG(firmware_version, MSG_OP_GET_FIRMWARE_VERSION);
|
||||
int ret;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
fw_ver->major = resp.major;
|
||||
fw_ver->minor = resp.minor;
|
||||
fw_ver->sub = resp.sub;
|
||||
fw_ver->build = resp.build;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct xdna_mailbox_chann_res x2i;
|
||||
struct xdna_mailbox_chann_res i2x;
|
||||
struct cq_pair *cq_pair;
|
||||
u32 intr_reg;
|
||||
int ret;
|
||||
|
||||
req.aie_type = 1;
|
||||
req.start_col = hwctx->start_col;
|
||||
req.num_col = hwctx->num_col;
|
||||
req.num_cq_pairs_requested = 1;
|
||||
req.pasid = hwctx->client->pasid;
|
||||
req.context_priority = 2;
|
||||
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
hwctx->fw_ctx_id = resp.context_id;
|
||||
WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
|
||||
|
||||
cq_pair = &resp.cq_pair[0];
|
||||
x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
|
||||
x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
|
||||
x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr);
|
||||
x2i.rb_size = cq_pair->x2i_q.buf_size;
|
||||
|
||||
i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr);
|
||||
i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr);
|
||||
i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr);
|
||||
i2x.rb_size = cq_pair->i2x_q.buf_size;
|
||||
|
||||
ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
|
||||
if (ret == -EINVAL) {
|
||||
XDNA_ERR(xdna, "not able to create channel");
|
||||
goto out_destroy_context;
|
||||
}
|
||||
|
||||
intr_reg = i2x.mb_head_ptr_reg + 4;
|
||||
hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
|
||||
intr_reg, ret);
|
||||
if (!hwctx->priv->mbox_chann) {
|
||||
XDNA_ERR(xdna, "not able to create channel");
|
||||
ret = -EINVAL;
|
||||
goto out_destroy_context;
|
||||
}
|
||||
|
||||
XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
|
||||
hwctx->name, ret, resp.msix_id);
|
||||
XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
|
||||
hwctx->fw_ctx_id, hwctx->client->pasid);
|
||||
|
||||
return 0;
|
||||
|
||||
out_destroy_context:
|
||||
aie2_destroy_context(ndev, hwctx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
int ret;
|
||||
|
||||
if (hwctx->fw_ctx_id == -1)
|
||||
return 0;
|
||||
|
||||
xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
|
||||
|
||||
req.context_id = hwctx->fw_ctx_id;
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret);
|
||||
|
||||
xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
|
||||
XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
|
||||
hwctx->fw_ctx_id);
|
||||
hwctx->priv->mbox_chann = NULL;
|
||||
hwctx->fw_ctx_id = -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size)
|
||||
{
|
||||
DECLARE_AIE2_MSG(map_host_buffer, MSG_OP_MAP_HOST_BUFFER);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
int ret;
|
||||
|
||||
req.context_id = context_id;
|
||||
req.buf_addr = addr;
|
||||
req.buf_size = size;
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
XDNA_DBG(xdna, "fw ctx %d map host buf addr 0x%llx size 0x%llx",
|
||||
context_id, addr, size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf,
|
||||
u32 size, u32 *cols_filled)
|
||||
{
|
||||
DECLARE_AIE2_MSG(aie_column_info, MSG_OP_QUERY_COL_STATUS);
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
struct amdxdna_client *client;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
unsigned long hwctx_id;
|
||||
dma_addr_t dma_addr;
|
||||
u32 aie_bitmap = 0;
|
||||
u8 *buff_addr;
|
||||
int ret, idx;
|
||||
|
||||
buff_addr = dma_alloc_noncoherent(xdna->ddev.dev, size, &dma_addr,
|
||||
DMA_FROM_DEVICE, GFP_KERNEL);
|
||||
if (!buff_addr)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Go through each hardware context and mark the AIE columns that are active */
|
||||
list_for_each_entry(client, &xdna->client_list, node) {
|
||||
idx = srcu_read_lock(&client->hwctx_srcu);
|
||||
amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
|
||||
aie_bitmap |= amdxdna_hwctx_col_map(hwctx);
|
||||
srcu_read_unlock(&client->hwctx_srcu, idx);
|
||||
}
|
||||
|
||||
*cols_filled = 0;
|
||||
req.dump_buff_addr = dma_addr;
|
||||
req.dump_buff_size = size;
|
||||
req.num_cols = hweight32(aie_bitmap);
|
||||
req.aie_bitmap = aie_bitmap;
|
||||
|
||||
drm_clflush_virt_range(buff_addr, size); /* device can access */
|
||||
ret = aie2_send_mgmt_msg_wait(ndev, &msg);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Error during NPU query, status %d", ret);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (resp.status != AIE2_STATUS_SUCCESS) {
|
||||
XDNA_ERR(xdna, "Query NPU status failed, status 0x%x", resp.status);
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
XDNA_DBG(xdna, "Query NPU status completed");
|
||||
|
||||
if (size < resp.size) {
|
||||
ret = -EINVAL;
|
||||
XDNA_ERR(xdna, "Bad buffer size. Available: %u. Needs: %u", size, resp.size);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (copy_to_user(buf, buff_addr, resp.size)) {
|
||||
ret = -EFAULT;
|
||||
XDNA_ERR(xdna, "Failed to copy NPU status to user space");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
*cols_filled = aie_bitmap;
|
||||
|
||||
fail:
|
||||
dma_free_noncoherent(xdna->ddev.dev, size, buff_addr, dma_addr, DMA_FROM_DEVICE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
|
||||
void *handle, int (*cb)(void*, const u32 *, size_t))
|
||||
{
|
||||
struct async_event_msg_req req = { 0 };
|
||||
struct xdna_mailbox_msg msg = {
|
||||
.send_data = (u8 *)&req,
|
||||
.send_size = sizeof(req),
|
||||
.handle = handle,
|
||||
.opcode = MSG_OP_REGISTER_ASYNC_EVENT_MSG,
|
||||
.notify_cb = cb,
|
||||
};
|
||||
|
||||
req.buf_addr = addr;
|
||||
req.buf_size = size;
|
||||
|
||||
XDNA_DBG(ndev->xdna, "Register addr 0x%llx size 0x%x", addr, size);
|
||||
return xdna_mailbox_send_msg(ndev->mgmt_chann, &msg, TX_TIMEOUT);
|
||||
}
|
||||
|
||||
int aie2_config_cu(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
u32 shift = xdna->dev_info->dev_mem_buf_shift;
|
||||
DECLARE_AIE2_MSG(config_cu, MSG_OP_CONFIG_CU);
|
||||
struct drm_gem_object *gobj;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
int ret, i;
|
||||
|
||||
if (!chann)
|
||||
return -ENODEV;
|
||||
|
||||
if (hwctx->cus->num_cus > MAX_NUM_CUS) {
|
||||
XDNA_DBG(xdna, "Exceed maximum CU %d", MAX_NUM_CUS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; i < hwctx->cus->num_cus; i++) {
|
||||
struct amdxdna_cu_config *cu = &hwctx->cus->cu_configs[i];
|
||||
|
||||
if (XDNA_MBZ_DBG(xdna, cu->pad, sizeof(cu->pad)))
|
||||
return -EINVAL;
|
||||
|
||||
gobj = drm_gem_object_lookup(hwctx->client->filp, cu->cu_bo);
|
||||
if (!gobj) {
|
||||
XDNA_ERR(xdna, "Lookup GEM object failed");
|
||||
return -EINVAL;
|
||||
}
|
||||
abo = to_xdna_obj(gobj);
|
||||
|
||||
if (abo->type != AMDXDNA_BO_DEV) {
|
||||
drm_gem_object_put(gobj);
|
||||
XDNA_ERR(xdna, "Invalid BO type");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
req.cfgs[i] = FIELD_PREP(AIE2_MSG_CFG_CU_PDI_ADDR,
|
||||
abo->mem.dev_addr >> shift);
|
||||
req.cfgs[i] |= FIELD_PREP(AIE2_MSG_CFG_CU_FUNC, cu->cu_func);
|
||||
XDNA_DBG(xdna, "CU %d full addr 0x%llx, cfg 0x%x", i,
|
||||
abo->mem.dev_addr, req.cfgs[i]);
|
||||
drm_gem_object_put(gobj);
|
||||
}
|
||||
req.num_cus = hwctx->cus->num_cus;
|
||||
|
||||
ret = xdna_send_msg_wait(xdna, chann, &msg);
|
||||
if (ret == -ETIME)
|
||||
aie2_destroy_context(xdna->dev_handle, hwctx);
|
||||
|
||||
if (resp.status == AIE2_STATUS_SUCCESS) {
|
||||
XDNA_DBG(xdna, "Configure %d CUs, ret %d", req.num_cus, ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
XDNA_ERR(xdna, "Command opcode 0x%x failed, status 0x%x ret %d",
|
||||
msg.opcode, resp.status, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
union {
|
||||
struct execute_buffer_req ebuf;
|
||||
struct exec_dpu_req dpu;
|
||||
} req;
|
||||
struct xdna_mailbox_msg msg;
|
||||
u32 payload_len;
|
||||
void *payload;
|
||||
int cu_idx;
|
||||
int ret;
|
||||
u32 op;
|
||||
|
||||
if (!chann)
|
||||
return -ENODEV;
|
||||
|
||||
payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
|
||||
if (!payload) {
|
||||
XDNA_ERR(xdna, "Invalid command, cannot get payload");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
|
||||
if (cu_idx < 0) {
|
||||
XDNA_DBG(xdna, "Invalid cu idx");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
op = amdxdna_cmd_get_op(cmd_abo);
|
||||
switch (op) {
|
||||
case ERT_START_CU:
|
||||
if (unlikely(payload_len > sizeof(req.ebuf.payload)))
|
||||
XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
|
||||
req.ebuf.cu_idx = cu_idx;
|
||||
memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
|
||||
msg.send_size = sizeof(req.ebuf);
|
||||
msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
|
||||
break;
|
||||
case ERT_START_NPU: {
|
||||
struct amdxdna_cmd_start_npu *sn = payload;
|
||||
|
||||
if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
|
||||
XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
|
||||
req.dpu.inst_buf_addr = sn->buffer;
|
||||
req.dpu.inst_size = sn->buffer_size;
|
||||
req.dpu.inst_prop_cnt = sn->prop_count;
|
||||
req.dpu.cu_idx = cu_idx;
|
||||
memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
|
||||
msg.send_size = sizeof(req.dpu);
|
||||
msg.opcode = MSG_OP_EXEC_DPU;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
|
||||
return -EINVAL;
|
||||
}
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
|
||||
0x40, false);
|
||||
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
|
||||
struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
|
||||
int cu_idx = amdxdna_cmd_get_cu_idx(abo);
|
||||
u32 payload_len;
|
||||
void *payload;
|
||||
|
||||
if (cu_idx < 0)
|
||||
return -EINVAL;
|
||||
|
||||
payload = amdxdna_cmd_get_payload(abo, &payload_len);
|
||||
if (!payload)
|
||||
return -EINVAL;
|
||||
|
||||
if (!slot_cf_has_space(offset, payload_len))
|
||||
return -ENOSPC;
|
||||
|
||||
buf->cu_idx = cu_idx;
|
||||
buf->arg_cnt = payload_len / sizeof(u32);
|
||||
memcpy(buf->args, payload, payload_len);
|
||||
/* Accurate buf size to hint firmware to do necessary copy */
|
||||
*size = sizeof(*buf) + payload_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
|
||||
struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
|
||||
int cu_idx = amdxdna_cmd_get_cu_idx(abo);
|
||||
struct amdxdna_cmd_start_npu *sn;
|
||||
u32 payload_len;
|
||||
void *payload;
|
||||
u32 arg_sz;
|
||||
|
||||
if (cu_idx < 0)
|
||||
return -EINVAL;
|
||||
|
||||
payload = amdxdna_cmd_get_payload(abo, &payload_len);
|
||||
if (!payload)
|
||||
return -EINVAL;
|
||||
sn = payload;
|
||||
arg_sz = payload_len - sizeof(*sn);
|
||||
if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (!slot_dpu_has_space(offset, arg_sz))
|
||||
return -ENOSPC;
|
||||
|
||||
buf->inst_buf_addr = sn->buffer;
|
||||
buf->inst_size = sn->buffer_size;
|
||||
buf->inst_prop_cnt = sn->prop_count;
|
||||
buf->cu_idx = cu_idx;
|
||||
buf->arg_cnt = arg_sz / sizeof(u32);
|
||||
memcpy(buf->args, sn->prop_args, arg_sz);
|
||||
|
||||
/* Accurate buf size to hint firmware to do necessary copy */
|
||||
*size += sizeof(*buf) + arg_sz;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
|
||||
struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
u32 this_op = amdxdna_cmd_get_op(abo);
|
||||
void *cmd_buf = cmdbuf_abo->mem.kva;
|
||||
int ret;
|
||||
|
||||
if (this_op != op) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case ERT_START_CU:
|
||||
ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
|
||||
break;
|
||||
case ERT_START_NPU:
|
||||
ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
|
||||
break;
|
||||
default:
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
done:
|
||||
if (ret) {
|
||||
XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
|
||||
op, ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct amdxdna_gem_obj *
|
||||
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
|
||||
{
|
||||
int idx = get_job_idx(job->seq);
|
||||
|
||||
return job->hwctx->priv->cmd_buf[idx];
|
||||
}
|
||||
|
||||
static void
|
||||
aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
|
||||
struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
|
||||
{
|
||||
req->buf_addr = cmdbuf_abo->mem.dev_addr;
|
||||
req->buf_size = size;
|
||||
req->count = cnt;
|
||||
drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
|
||||
XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
|
||||
req->buf_addr, size, cnt);
|
||||
}
|
||||
|
||||
static inline u32
|
||||
aie2_cmd_op_to_msg_op(u32 op)
|
||||
{
|
||||
switch (op) {
|
||||
case ERT_START_CU:
|
||||
return MSG_OP_CHAIN_EXEC_BUFFER_CF;
|
||||
case ERT_START_NPU:
|
||||
return MSG_OP_CHAIN_EXEC_DPU;
|
||||
default:
|
||||
return MSG_OP_MAX_OPCODE;
|
||||
}
|
||||
}
|
||||
|
||||
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_client *client = hwctx->client;
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
struct amdxdna_cmd_chain *payload;
|
||||
struct xdna_mailbox_msg msg;
|
||||
struct cmd_chain_req req;
|
||||
u32 payload_len;
|
||||
u32 offset = 0;
|
||||
u32 size;
|
||||
int ret;
|
||||
u32 op;
|
||||
u32 i;
|
||||
|
||||
op = amdxdna_cmd_get_op(cmd_abo);
|
||||
payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
|
||||
if (op != ERT_CMD_CHAIN || !payload ||
|
||||
payload_len < struct_size(payload, data, payload->command_count))
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < payload->command_count; i++) {
|
||||
u32 boh = (u32)(payload->data[i]);
|
||||
struct amdxdna_gem_obj *abo;
|
||||
|
||||
abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
|
||||
if (!abo) {
|
||||
XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/* All sub-cmd should have same op, use the first one. */
|
||||
if (i == 0)
|
||||
op = amdxdna_cmd_get_op(abo);
|
||||
|
||||
ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
|
||||
amdxdna_gem_put_obj(abo);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
offset += size;
|
||||
}
|
||||
|
||||
/* The offset is the accumulated total size of the cmd buffer */
|
||||
aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
|
||||
|
||||
msg.opcode = aie2_cmd_op_to_msg_op(op);
|
||||
if (msg.opcode == MSG_OP_MAX_OPCODE)
|
||||
return -EOPNOTSUPP;
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
msg.send_size = sizeof(req);
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(hwctx->client->xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
struct xdna_mailbox_msg msg;
|
||||
struct cmd_chain_req req;
|
||||
u32 size;
|
||||
int ret;
|
||||
u32 op;
|
||||
|
||||
op = amdxdna_cmd_get_op(cmd_abo);
|
||||
ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
|
||||
|
||||
msg.opcode = aie2_cmd_op_to_msg_op(op);
|
||||
if (msg.opcode == MSG_OP_MAX_OPCODE)
|
||||
return -EOPNOTSUPP;
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
msg.send_size = sizeof(req);
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(hwctx->client->xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct xdna_mailbox_msg msg;
|
||||
struct sync_bo_req req;
|
||||
int ret = 0;
|
||||
|
||||
req.src_addr = 0;
|
||||
req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr;
|
||||
req.size = abo->mem.size;
|
||||
|
||||
/* Device to Host */
|
||||
req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) |
|
||||
FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM);
|
||||
|
||||
XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed",
|
||||
req.size, req.src_addr, req.dst_addr);
|
||||
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
msg.send_size = sizeof(req);
|
||||
msg.opcode = MSG_OP_SYNC_BO;
|
||||
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
370
drivers/accel/amdxdna/aie2_msg_priv.h
Normal file
370
drivers/accel/amdxdna/aie2_msg_priv.h
Normal file
|
|
@ -0,0 +1,370 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AIE2_MSG_PRIV_H_
|
||||
#define _AIE2_MSG_PRIV_H_
|
||||
|
||||
enum aie2_msg_opcode {
|
||||
MSG_OP_CREATE_CONTEXT = 0x2,
|
||||
MSG_OP_DESTROY_CONTEXT = 0x3,
|
||||
MSG_OP_SYNC_BO = 0x7,
|
||||
MSG_OP_EXECUTE_BUFFER_CF = 0xC,
|
||||
MSG_OP_QUERY_COL_STATUS = 0xD,
|
||||
MSG_OP_QUERY_AIE_TILE_INFO = 0xE,
|
||||
MSG_OP_QUERY_AIE_VERSION = 0xF,
|
||||
MSG_OP_EXEC_DPU = 0x10,
|
||||
MSG_OP_CONFIG_CU = 0x11,
|
||||
MSG_OP_CHAIN_EXEC_BUFFER_CF = 0x12,
|
||||
MSG_OP_CHAIN_EXEC_DPU = 0x13,
|
||||
MSG_OP_MAX_XRT_OPCODE,
|
||||
MSG_OP_SUSPEND = 0x101,
|
||||
MSG_OP_RESUME = 0x102,
|
||||
MSG_OP_ASSIGN_MGMT_PASID = 0x103,
|
||||
MSG_OP_INVOKE_SELF_TEST = 0x104,
|
||||
MSG_OP_MAP_HOST_BUFFER = 0x106,
|
||||
MSG_OP_GET_FIRMWARE_VERSION = 0x108,
|
||||
MSG_OP_SET_RUNTIME_CONFIG = 0x10A,
|
||||
MSG_OP_GET_RUNTIME_CONFIG = 0x10B,
|
||||
MSG_OP_REGISTER_ASYNC_EVENT_MSG = 0x10C,
|
||||
MSG_OP_MAX_DRV_OPCODE,
|
||||
MSG_OP_GET_PROTOCOL_VERSION = 0x301,
|
||||
MSG_OP_MAX_OPCODE
|
||||
};
|
||||
|
||||
enum aie2_msg_status {
|
||||
AIE2_STATUS_SUCCESS = 0x0,
|
||||
/* AIE Error codes */
|
||||
AIE2_STATUS_AIE_SATURATION_ERROR = 0x1000001,
|
||||
AIE2_STATUS_AIE_FP_ERROR = 0x1000002,
|
||||
AIE2_STATUS_AIE_STREAM_ERROR = 0x1000003,
|
||||
AIE2_STATUS_AIE_ACCESS_ERROR = 0x1000004,
|
||||
AIE2_STATUS_AIE_BUS_ERROR = 0x1000005,
|
||||
AIE2_STATUS_AIE_INSTRUCTION_ERROR = 0x1000006,
|
||||
AIE2_STATUS_AIE_ECC_ERROR = 0x1000007,
|
||||
AIE2_STATUS_AIE_LOCK_ERROR = 0x1000008,
|
||||
AIE2_STATUS_AIE_DMA_ERROR = 0x1000009,
|
||||
AIE2_STATUS_AIE_MEM_PARITY_ERROR = 0x100000a,
|
||||
AIE2_STATUS_AIE_PWR_CFG_ERROR = 0x100000b,
|
||||
AIE2_STATUS_AIE_BACKTRACK_ERROR = 0x100000c,
|
||||
AIE2_STATUS_MAX_AIE_STATUS_CODE,
|
||||
/* MGMT ERT Error codes */
|
||||
AIE2_STATUS_MGMT_ERT_SELF_TEST_FAILURE = 0x2000001,
|
||||
AIE2_STATUS_MGMT_ERT_HASH_MISMATCH,
|
||||
AIE2_STATUS_MGMT_ERT_NOAVAIL,
|
||||
AIE2_STATUS_MGMT_ERT_INVALID_PARAM,
|
||||
AIE2_STATUS_MGMT_ERT_ENTER_SUSPEND_FAILURE,
|
||||
AIE2_STATUS_MGMT_ERT_BUSY,
|
||||
AIE2_STATUS_MGMT_ERT_APPLICATION_ACTIVE,
|
||||
MAX_MGMT_ERT_STATUS_CODE,
|
||||
/* APP ERT Error codes */
|
||||
AIE2_STATUS_APP_ERT_FIRST_ERROR = 0x3000001,
|
||||
AIE2_STATUS_APP_INVALID_INSTR,
|
||||
AIE2_STATUS_APP_LOAD_PDI_FAIL,
|
||||
MAX_APP_ERT_STATUS_CODE,
|
||||
/* NPU RTOS Error Codes */
|
||||
AIE2_STATUS_INVALID_INPUT_BUFFER = 0x4000001,
|
||||
AIE2_STATUS_INVALID_COMMAND,
|
||||
AIE2_STATUS_INVALID_PARAM,
|
||||
AIE2_STATUS_INVALID_OPERATION = 0x4000006,
|
||||
AIE2_STATUS_ASYNC_EVENT_MSGS_FULL,
|
||||
AIE2_STATUS_MAX_RTOS_STATUS_CODE,
|
||||
MAX_AIE2_STATUS_CODE
|
||||
};
|
||||
|
||||
struct assign_mgmt_pasid_req {
|
||||
__u16 pasid;
|
||||
__u16 reserved;
|
||||
} __packed;
|
||||
|
||||
struct assign_mgmt_pasid_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct map_host_buffer_req {
|
||||
__u32 context_id;
|
||||
__u64 buf_addr;
|
||||
__u64 buf_size;
|
||||
} __packed;
|
||||
|
||||
struct map_host_buffer_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
#define MAX_CQ_PAIRS 2
|
||||
struct cq_info {
|
||||
__u32 head_addr;
|
||||
__u32 tail_addr;
|
||||
__u32 buf_addr;
|
||||
__u32 buf_size;
|
||||
};
|
||||
|
||||
struct cq_pair {
|
||||
struct cq_info x2i_q;
|
||||
struct cq_info i2x_q;
|
||||
};
|
||||
|
||||
struct create_ctx_req {
|
||||
__u32 aie_type;
|
||||
__u8 start_col;
|
||||
__u8 num_col;
|
||||
__u16 reserved;
|
||||
__u8 num_cq_pairs_requested;
|
||||
__u8 reserved1;
|
||||
__u16 pasid;
|
||||
__u32 pad[2];
|
||||
__u32 sec_comm_target_type;
|
||||
__u32 context_priority;
|
||||
} __packed;
|
||||
|
||||
struct create_ctx_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 context_id;
|
||||
__u16 msix_id;
|
||||
__u8 num_cq_pairs_allocated;
|
||||
__u8 reserved;
|
||||
struct cq_pair cq_pair[MAX_CQ_PAIRS];
|
||||
} __packed;
|
||||
|
||||
struct destroy_ctx_req {
|
||||
__u32 context_id;
|
||||
} __packed;
|
||||
|
||||
struct destroy_ctx_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct execute_buffer_req {
|
||||
__u32 cu_idx;
|
||||
__u32 payload[19];
|
||||
} __packed;
|
||||
|
||||
struct exec_dpu_req {
|
||||
__u64 inst_buf_addr;
|
||||
__u32 inst_size;
|
||||
__u32 inst_prop_cnt;
|
||||
__u32 cu_idx;
|
||||
__u32 payload[35];
|
||||
} __packed;
|
||||
|
||||
struct execute_buffer_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct aie_tile_info {
|
||||
__u32 size;
|
||||
__u16 major;
|
||||
__u16 minor;
|
||||
__u16 cols;
|
||||
__u16 rows;
|
||||
__u16 core_rows;
|
||||
__u16 mem_rows;
|
||||
__u16 shim_rows;
|
||||
__u16 core_row_start;
|
||||
__u16 mem_row_start;
|
||||
__u16 shim_row_start;
|
||||
__u16 core_dma_channels;
|
||||
__u16 mem_dma_channels;
|
||||
__u16 shim_dma_channels;
|
||||
__u16 core_locks;
|
||||
__u16 mem_locks;
|
||||
__u16 shim_locks;
|
||||
__u16 core_events;
|
||||
__u16 mem_events;
|
||||
__u16 shim_events;
|
||||
__u16 reserved;
|
||||
};
|
||||
|
||||
struct aie_tile_info_req {
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct aie_tile_info_resp {
|
||||
enum aie2_msg_status status;
|
||||
struct aie_tile_info info;
|
||||
} __packed;
|
||||
|
||||
struct aie_version_info_req {
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct aie_version_info_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u16 major;
|
||||
__u16 minor;
|
||||
} __packed;
|
||||
|
||||
struct aie_column_info_req {
|
||||
__u64 dump_buff_addr;
|
||||
__u32 dump_buff_size;
|
||||
__u32 num_cols;
|
||||
__u32 aie_bitmap;
|
||||
} __packed;
|
||||
|
||||
struct aie_column_info_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 size;
|
||||
} __packed;
|
||||
|
||||
struct suspend_req {
|
||||
__u32 place_holder;
|
||||
} __packed;
|
||||
|
||||
struct suspend_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct resume_req {
|
||||
__u32 place_holder;
|
||||
} __packed;
|
||||
|
||||
struct resume_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct check_header_hash_req {
|
||||
__u64 hash_high;
|
||||
__u64 hash_low;
|
||||
} __packed;
|
||||
|
||||
struct check_header_hash_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct query_error_req {
|
||||
__u64 buf_addr;
|
||||
__u32 buf_size;
|
||||
__u32 next_row;
|
||||
__u32 next_column;
|
||||
__u32 next_module;
|
||||
} __packed;
|
||||
|
||||
struct query_error_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 num_err;
|
||||
__u32 has_next_err;
|
||||
__u32 next_row;
|
||||
__u32 next_column;
|
||||
__u32 next_module;
|
||||
} __packed;
|
||||
|
||||
struct protocol_version_req {
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct protocol_version_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 major;
|
||||
__u32 minor;
|
||||
} __packed;
|
||||
|
||||
struct firmware_version_req {
|
||||
__u32 reserved;
|
||||
} __packed;
|
||||
|
||||
struct firmware_version_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 major;
|
||||
__u32 minor;
|
||||
__u32 sub;
|
||||
__u32 build;
|
||||
} __packed;
|
||||
|
||||
#define MAX_NUM_CUS 32
|
||||
#define AIE2_MSG_CFG_CU_PDI_ADDR GENMASK(16, 0)
|
||||
#define AIE2_MSG_CFG_CU_FUNC GENMASK(24, 17)
|
||||
struct config_cu_req {
|
||||
__u32 num_cus;
|
||||
__u32 cfgs[MAX_NUM_CUS];
|
||||
} __packed;
|
||||
|
||||
struct config_cu_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct set_runtime_cfg_req {
|
||||
__u32 type;
|
||||
__u64 value;
|
||||
} __packed;
|
||||
|
||||
struct set_runtime_cfg_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
|
||||
struct get_runtime_cfg_req {
|
||||
__u32 type;
|
||||
} __packed;
|
||||
|
||||
struct get_runtime_cfg_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u64 value;
|
||||
} __packed;
|
||||
|
||||
enum async_event_type {
|
||||
ASYNC_EVENT_TYPE_AIE_ERROR,
|
||||
ASYNC_EVENT_TYPE_EXCEPTION,
|
||||
MAX_ASYNC_EVENT_TYPE
|
||||
};
|
||||
|
||||
#define ASYNC_BUF_SIZE SZ_8K
|
||||
struct async_event_msg_req {
|
||||
__u64 buf_addr;
|
||||
__u32 buf_size;
|
||||
} __packed;
|
||||
|
||||
struct async_event_msg_resp {
|
||||
enum aie2_msg_status status;
|
||||
enum async_event_type type;
|
||||
} __packed;
|
||||
|
||||
#define MAX_CHAIN_CMDBUF_SIZE SZ_4K
|
||||
#define slot_cf_has_space(offset, payload_size) \
|
||||
(MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
|
||||
offsetof(struct cmd_chain_slot_execbuf_cf, args[0]))
|
||||
struct cmd_chain_slot_execbuf_cf {
|
||||
__u32 cu_idx;
|
||||
__u32 arg_cnt;
|
||||
__u32 args[] __counted_by(arg_cnt);
|
||||
};
|
||||
|
||||
#define slot_dpu_has_space(offset, payload_size) \
|
||||
(MAX_CHAIN_CMDBUF_SIZE - ((offset) + (payload_size)) > \
|
||||
offsetof(struct cmd_chain_slot_dpu, args[0]))
|
||||
struct cmd_chain_slot_dpu {
|
||||
__u64 inst_buf_addr;
|
||||
__u32 inst_size;
|
||||
__u32 inst_prop_cnt;
|
||||
__u32 cu_idx;
|
||||
__u32 arg_cnt;
|
||||
#define MAX_DPU_ARGS_SIZE (34 * sizeof(__u32))
|
||||
__u32 args[] __counted_by(arg_cnt);
|
||||
};
|
||||
|
||||
struct cmd_chain_req {
|
||||
__u64 buf_addr;
|
||||
__u32 buf_size;
|
||||
__u32 count;
|
||||
} __packed;
|
||||
|
||||
struct cmd_chain_resp {
|
||||
enum aie2_msg_status status;
|
||||
__u32 fail_cmd_idx;
|
||||
enum aie2_msg_status fail_cmd_status;
|
||||
} __packed;
|
||||
|
||||
#define AIE2_MSG_SYNC_BO_SRC_TYPE GENMASK(3, 0)
|
||||
#define AIE2_MSG_SYNC_BO_DST_TYPE GENMASK(7, 4)
|
||||
struct sync_bo_req {
|
||||
__u64 src_addr;
|
||||
__u64 dst_addr;
|
||||
__u32 size;
|
||||
#define SYNC_BO_DEV_MEM 0
|
||||
#define SYNC_BO_HOST_MEM 2
|
||||
__u32 type;
|
||||
} __packed;
|
||||
|
||||
struct sync_bo_resp {
|
||||
enum aie2_msg_status status;
|
||||
} __packed;
|
||||
#endif /* _AIE2_MSG_PRIV_H_ */
|
||||
928
drivers/accel/amdxdna/aie2_pci.c
Normal file
928
drivers/accel/amdxdna/aie2_pci.c
Normal file
|
|
@ -0,0 +1,928 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/iopoll.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "aie2_solver.h"
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
static int aie2_max_col = XRS_MAX_COL;
|
||||
module_param(aie2_max_col, uint, 0600);
|
||||
MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
|
||||
|
||||
/*
|
||||
* The management mailbox channel is allocated by firmware.
|
||||
* The related register and ring buffer information is on SRAM BAR.
|
||||
* This struct is the register layout.
|
||||
*/
|
||||
#define MGMT_MBOX_MAGIC 0x55504e5f /* _NPU */
|
||||
struct mgmt_mbox_chann_info {
|
||||
__u32 x2i_tail;
|
||||
__u32 x2i_head;
|
||||
__u32 x2i_buf;
|
||||
__u32 x2i_buf_sz;
|
||||
__u32 i2x_tail;
|
||||
__u32 i2x_head;
|
||||
__u32 i2x_buf;
|
||||
__u32 i2x_buf_sz;
|
||||
__u32 magic;
|
||||
__u32 msi_id;
|
||||
__u32 prot_major;
|
||||
__u32 prot_minor;
|
||||
__u32 rsvd[4];
|
||||
};
|
||||
|
||||
static int aie2_check_protocol(struct amdxdna_dev_hdl *ndev, u32 fw_major, u32 fw_minor)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
|
||||
/*
|
||||
* The driver supported mailbox behavior is defined by
|
||||
* ndev->priv->protocol_major and protocol_minor.
|
||||
*
|
||||
* When protocol_major and fw_major are different, it means driver
|
||||
* and firmware are incompatible.
|
||||
*/
|
||||
if (ndev->priv->protocol_major != fw_major) {
|
||||
XDNA_ERR(xdna, "Incompatible firmware protocol major %d minor %d",
|
||||
fw_major, fw_minor);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* When protocol_minor is greater then fw_minor, that means driver
|
||||
* relies on operation the installed firmware does not support.
|
||||
*/
|
||||
if (ndev->priv->protocol_minor > fw_minor) {
|
||||
XDNA_ERR(xdna, "Firmware minor version smaller than supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aie2_dump_chann_info_debug(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
|
||||
XDNA_DBG(xdna, "i2x tail 0x%x", ndev->mgmt_i2x.mb_tail_ptr_reg);
|
||||
XDNA_DBG(xdna, "i2x head 0x%x", ndev->mgmt_i2x.mb_head_ptr_reg);
|
||||
XDNA_DBG(xdna, "i2x ringbuf 0x%x", ndev->mgmt_i2x.rb_start_addr);
|
||||
XDNA_DBG(xdna, "i2x rsize 0x%x", ndev->mgmt_i2x.rb_size);
|
||||
XDNA_DBG(xdna, "x2i tail 0x%x", ndev->mgmt_x2i.mb_tail_ptr_reg);
|
||||
XDNA_DBG(xdna, "x2i head 0x%x", ndev->mgmt_x2i.mb_head_ptr_reg);
|
||||
XDNA_DBG(xdna, "x2i ringbuf 0x%x", ndev->mgmt_x2i.rb_start_addr);
|
||||
XDNA_DBG(xdna, "x2i rsize 0x%x", ndev->mgmt_x2i.rb_size);
|
||||
XDNA_DBG(xdna, "x2i chann index 0x%x", ndev->mgmt_chan_idx);
|
||||
XDNA_DBG(xdna, "mailbox protocol major 0x%x", ndev->mgmt_prot_major);
|
||||
XDNA_DBG(xdna, "mailbox protocol minor 0x%x", ndev->mgmt_prot_minor);
|
||||
}
|
||||
|
||||
static int aie2_get_mgmt_chann_info(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
struct mgmt_mbox_chann_info info_regs;
|
||||
struct xdna_mailbox_chann_res *i2x;
|
||||
struct xdna_mailbox_chann_res *x2i;
|
||||
u32 addr, off;
|
||||
u32 *reg;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Once firmware is alive, it will write management channel
|
||||
* information in SRAM BAR and write the address of that information
|
||||
* at FW_ALIVE_OFF offset in SRMA BAR.
|
||||
*
|
||||
* Read a non-zero value from FW_ALIVE_OFF implies that firmware
|
||||
* is alive.
|
||||
*/
|
||||
ret = readx_poll_timeout(readl, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF),
|
||||
addr, addr, AIE2_INTERVAL, AIE2_TIMEOUT);
|
||||
if (ret || !addr)
|
||||
return -ETIME;
|
||||
|
||||
off = AIE2_SRAM_OFF(ndev, addr);
|
||||
reg = (u32 *)&info_regs;
|
||||
for (i = 0; i < sizeof(info_regs) / sizeof(u32); i++)
|
||||
reg[i] = readl(ndev->sram_base + off + i * sizeof(u32));
|
||||
|
||||
if (info_regs.magic != MGMT_MBOX_MAGIC) {
|
||||
XDNA_ERR(ndev->xdna, "Invalid mbox magic 0x%x", info_regs.magic);
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
i2x = &ndev->mgmt_i2x;
|
||||
x2i = &ndev->mgmt_x2i;
|
||||
|
||||
i2x->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_head);
|
||||
i2x->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.i2x_tail);
|
||||
i2x->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.i2x_buf);
|
||||
i2x->rb_size = info_regs.i2x_buf_sz;
|
||||
|
||||
x2i->mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_head);
|
||||
x2i->mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, info_regs.x2i_tail);
|
||||
x2i->rb_start_addr = AIE2_SRAM_OFF(ndev, info_regs.x2i_buf);
|
||||
x2i->rb_size = info_regs.x2i_buf_sz;
|
||||
|
||||
ndev->mgmt_chan_idx = info_regs.msi_id;
|
||||
ndev->mgmt_prot_major = info_regs.prot_major;
|
||||
ndev->mgmt_prot_minor = info_regs.prot_minor;
|
||||
|
||||
ret = aie2_check_protocol(ndev, ndev->mgmt_prot_major, ndev->mgmt_prot_minor);
|
||||
|
||||
done:
|
||||
aie2_dump_chann_info_debug(ndev);
|
||||
|
||||
/* Must clear address at FW_ALIVE_OFF */
|
||||
writel(0, SRAM_GET_ADDR(ndev, FW_ALIVE_OFF));
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
|
||||
enum rt_config_category category, u32 *val)
|
||||
{
|
||||
const struct rt_config *cfg;
|
||||
u32 value;
|
||||
int ret;
|
||||
|
||||
for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
|
||||
if (cfg->category != category)
|
||||
continue;
|
||||
|
||||
value = val ? *val : cfg->value;
|
||||
ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
|
||||
cfg->type, value);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_xdna_reset(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_suspend_fw(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Suspend firmware failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_resume_fw(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Resume firmware failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_mgmt_fw_init(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Runtime config failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_assign_mgmt_pasid(ndev, 0);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Can not assign PASID");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_xdna_reset(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Reset firmware failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!ndev->async_events)
|
||||
return 0;
|
||||
|
||||
ret = aie2_error_async_events_send(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Send async events failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_mgmt_fw_query(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "query firmware version failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_query_aie_version(ndev, &ndev->version);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Query AIE version failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = aie2_query_aie_metadata(ndev, &ndev->metadata);
|
||||
if (ret) {
|
||||
XDNA_ERR(ndev->xdna, "Query AIE metadata failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
if (aie2_suspend_fw(ndev))
|
||||
XDNA_ERR(ndev->xdna, "Suspend_fw failed");
|
||||
XDNA_DBG(ndev->xdna, "Firmware suspended");
|
||||
}
|
||||
|
||||
static int aie2_xrs_load(void *cb_arg, struct xrs_action_load *action)
|
||||
{
|
||||
struct amdxdna_hwctx *hwctx = cb_arg;
|
||||
struct amdxdna_dev *xdna;
|
||||
int ret;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
|
||||
hwctx->start_col = action->part.start_col;
|
||||
hwctx->num_col = action->part.ncols;
|
||||
ret = aie2_create_context(xdna->dev_handle, hwctx);
|
||||
if (ret)
|
||||
XDNA_ERR(xdna, "create context failed, ret %d", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_xrs_unload(void *cb_arg)
|
||||
{
|
||||
struct amdxdna_hwctx *hwctx = cb_arg;
|
||||
struct amdxdna_dev *xdna;
|
||||
int ret;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
|
||||
ret = aie2_destroy_context(xdna->dev_handle, hwctx);
|
||||
if (ret)
|
||||
XDNA_ERR(xdna, "destroy context failed, ret %d", ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 dpm_level)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(ddev);
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
ndev->dft_dpm_level = dpm_level;
|
||||
if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == dpm_level)
|
||||
return 0;
|
||||
|
||||
return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
|
||||
}
|
||||
|
||||
static struct xrs_action_ops aie2_xrs_actions = {
|
||||
.load = aie2_xrs_load,
|
||||
.unload = aie2_xrs_unload,
|
||||
.set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
|
||||
};
|
||||
|
||||
static void aie2_hw_stop(struct amdxdna_dev *xdna)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
|
||||
struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
|
||||
|
||||
if (ndev->dev_status <= AIE2_DEV_INIT) {
|
||||
XDNA_ERR(xdna, "device is already stopped");
|
||||
return;
|
||||
}
|
||||
|
||||
aie2_mgmt_fw_fini(ndev);
|
||||
xdna_mailbox_stop_channel(ndev->mgmt_chann);
|
||||
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
|
||||
ndev->mgmt_chann = NULL;
|
||||
drmm_kfree(&xdna->ddev, ndev->mbox);
|
||||
ndev->mbox = NULL;
|
||||
aie2_psp_stop(ndev->psp_hdl);
|
||||
aie2_smu_fini(ndev);
|
||||
pci_disable_device(pdev);
|
||||
|
||||
ndev->dev_status = AIE2_DEV_INIT;
|
||||
}
|
||||
|
||||
static int aie2_hw_start(struct amdxdna_dev *xdna)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
|
||||
struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
|
||||
struct xdna_mailbox_res mbox_res;
|
||||
u32 xdna_mailbox_intr_reg;
|
||||
int mgmt_mb_irq, ret;
|
||||
|
||||
if (ndev->dev_status >= AIE2_DEV_START) {
|
||||
XDNA_INFO(xdna, "device is already started");
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = pci_enable_device(pdev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to enable device, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
pci_set_master(pdev);
|
||||
|
||||
ret = aie2_smu_init(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to init smu, ret %d", ret);
|
||||
goto disable_dev;
|
||||
}
|
||||
|
||||
ret = aie2_psp_start(ndev->psp_hdl);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to start psp, ret %d", ret);
|
||||
goto fini_smu;
|
||||
}
|
||||
|
||||
ret = aie2_get_mgmt_chann_info(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "firmware is not alive");
|
||||
goto stop_psp;
|
||||
}
|
||||
|
||||
mbox_res.ringbuf_base = ndev->sram_base;
|
||||
mbox_res.ringbuf_size = pci_resource_len(pdev, xdna->dev_info->sram_bar);
|
||||
mbox_res.mbox_base = ndev->mbox_base;
|
||||
mbox_res.mbox_size = MBOX_SIZE(ndev);
|
||||
mbox_res.name = "xdna_mailbox";
|
||||
ndev->mbox = xdnam_mailbox_create(&xdna->ddev, &mbox_res);
|
||||
if (!ndev->mbox) {
|
||||
XDNA_ERR(xdna, "failed to create mailbox device");
|
||||
ret = -ENODEV;
|
||||
goto stop_psp;
|
||||
}
|
||||
|
||||
mgmt_mb_irq = pci_irq_vector(pdev, ndev->mgmt_chan_idx);
|
||||
if (mgmt_mb_irq < 0) {
|
||||
ret = mgmt_mb_irq;
|
||||
XDNA_ERR(xdna, "failed to alloc irq vector, ret %d", ret);
|
||||
goto stop_psp;
|
||||
}
|
||||
|
||||
xdna_mailbox_intr_reg = ndev->mgmt_i2x.mb_head_ptr_reg + 4;
|
||||
ndev->mgmt_chann = xdna_mailbox_create_channel(ndev->mbox,
|
||||
&ndev->mgmt_x2i,
|
||||
&ndev->mgmt_i2x,
|
||||
xdna_mailbox_intr_reg,
|
||||
mgmt_mb_irq);
|
||||
if (!ndev->mgmt_chann) {
|
||||
XDNA_ERR(xdna, "failed to create management mailbox channel");
|
||||
ret = -EINVAL;
|
||||
goto stop_psp;
|
||||
}
|
||||
|
||||
ret = aie2_pm_init(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
|
||||
goto destroy_mgmt_chann;
|
||||
}
|
||||
|
||||
ret = aie2_mgmt_fw_init(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
|
||||
goto destroy_mgmt_chann;
|
||||
}
|
||||
|
||||
ndev->dev_status = AIE2_DEV_START;
|
||||
|
||||
return 0;
|
||||
|
||||
destroy_mgmt_chann:
|
||||
xdna_mailbox_stop_channel(ndev->mgmt_chann);
|
||||
xdna_mailbox_destroy_channel(ndev->mgmt_chann);
|
||||
stop_psp:
|
||||
aie2_psp_stop(ndev->psp_hdl);
|
||||
fini_smu:
|
||||
aie2_smu_fini(ndev);
|
||||
disable_dev:
|
||||
pci_disable_device(pdev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_init(struct amdxdna_dev *xdna)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
|
||||
void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
|
||||
struct init_config xrs_cfg = { 0 };
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
struct psp_config psp_conf;
|
||||
const struct firmware *fw;
|
||||
unsigned long bars = 0;
|
||||
int i, nvec, ret;
|
||||
|
||||
ndev = drmm_kzalloc(&xdna->ddev, sizeof(*ndev), GFP_KERNEL);
|
||||
if (!ndev)
|
||||
return -ENOMEM;
|
||||
|
||||
ndev->priv = xdna->dev_info->dev_priv;
|
||||
ndev->xdna = xdna;
|
||||
|
||||
ret = request_firmware(&fw, ndev->priv->fw_path, &pdev->dev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "failed to request_firmware %s, ret %d",
|
||||
ndev->priv->fw_path, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = pcim_enable_device(pdev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "pcim enable device failed, ret %d", ret);
|
||||
goto release_fw;
|
||||
}
|
||||
|
||||
for (i = 0; i < PSP_MAX_REGS; i++)
|
||||
set_bit(PSP_REG_BAR(ndev, i), &bars);
|
||||
|
||||
set_bit(xdna->dev_info->sram_bar, &bars);
|
||||
set_bit(xdna->dev_info->smu_bar, &bars);
|
||||
set_bit(xdna->dev_info->mbox_bar, &bars);
|
||||
|
||||
for (i = 0; i < PCI_NUM_RESOURCES; i++) {
|
||||
if (!test_bit(i, &bars))
|
||||
continue;
|
||||
tbl[i] = pcim_iomap(pdev, i, 0);
|
||||
if (!tbl[i]) {
|
||||
XDNA_ERR(xdna, "map bar %d failed", i);
|
||||
ret = -ENOMEM;
|
||||
goto release_fw;
|
||||
}
|
||||
}
|
||||
|
||||
ndev->sram_base = tbl[xdna->dev_info->sram_bar];
|
||||
ndev->smu_base = tbl[xdna->dev_info->smu_bar];
|
||||
ndev->mbox_base = tbl[xdna->dev_info->mbox_bar];
|
||||
|
||||
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to set DMA mask: %d", ret);
|
||||
goto release_fw;
|
||||
}
|
||||
|
||||
nvec = pci_msix_vec_count(pdev);
|
||||
if (nvec <= 0) {
|
||||
XDNA_ERR(xdna, "does not get number of interrupt vector");
|
||||
ret = -EINVAL;
|
||||
goto release_fw;
|
||||
}
|
||||
|
||||
ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_MSIX);
|
||||
if (ret < 0) {
|
||||
XDNA_ERR(xdna, "failed to alloc irq vectors, ret %d", ret);
|
||||
goto release_fw;
|
||||
}
|
||||
|
||||
ret = iommu_dev_enable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Enable PASID failed, ret %d", ret);
|
||||
goto free_irq;
|
||||
}
|
||||
|
||||
psp_conf.fw_size = fw->size;
|
||||
psp_conf.fw_buf = fw->data;
|
||||
for (i = 0; i < PSP_MAX_REGS; i++)
|
||||
psp_conf.psp_regs[i] = tbl[PSP_REG_BAR(ndev, i)] + PSP_REG_OFF(ndev, i);
|
||||
ndev->psp_hdl = aie2m_psp_create(&xdna->ddev, &psp_conf);
|
||||
if (!ndev->psp_hdl) {
|
||||
XDNA_ERR(xdna, "failed to create psp");
|
||||
ret = -ENOMEM;
|
||||
goto disable_sva;
|
||||
}
|
||||
xdna->dev_handle = ndev;
|
||||
|
||||
ret = aie2_hw_start(xdna);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "start npu failed, ret %d", ret);
|
||||
goto disable_sva;
|
||||
}
|
||||
|
||||
ret = aie2_mgmt_fw_query(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
|
||||
goto stop_hw;
|
||||
}
|
||||
ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
|
||||
|
||||
xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
|
||||
for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
|
||||
xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv->dpm_clk_tbl[i].hclk;
|
||||
xrs_cfg.sys_eff_factor = 1;
|
||||
xrs_cfg.ddev = &xdna->ddev;
|
||||
xrs_cfg.actions = &aie2_xrs_actions;
|
||||
xrs_cfg.total_col = ndev->total_col;
|
||||
|
||||
xdna->xrs_hdl = xrsm_init(&xrs_cfg);
|
||||
if (!xdna->xrs_hdl) {
|
||||
XDNA_ERR(xdna, "Initialize resolver failed");
|
||||
ret = -EINVAL;
|
||||
goto stop_hw;
|
||||
}
|
||||
|
||||
ret = aie2_error_async_events_alloc(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Allocate async events failed, ret %d", ret);
|
||||
goto stop_hw;
|
||||
}
|
||||
|
||||
ret = aie2_error_async_events_send(ndev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Send async events failed, ret %d", ret);
|
||||
goto async_event_free;
|
||||
}
|
||||
|
||||
/* Issue a command to make sure firmware handled async events */
|
||||
ret = aie2_query_firmware_version(ndev, &ndev->xdna->fw_ver);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Re-query firmware version failed");
|
||||
goto async_event_free;
|
||||
}
|
||||
|
||||
release_firmware(fw);
|
||||
return 0;
|
||||
|
||||
async_event_free:
|
||||
aie2_error_async_events_free(ndev);
|
||||
stop_hw:
|
||||
aie2_hw_stop(xdna);
|
||||
disable_sva:
|
||||
iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
free_irq:
|
||||
pci_free_irq_vectors(pdev);
|
||||
release_fw:
|
||||
release_firmware(fw);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void aie2_fini(struct amdxdna_dev *xdna)
|
||||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
|
||||
struct amdxdna_dev_hdl *ndev = xdna->dev_handle;
|
||||
|
||||
aie2_hw_stop(xdna);
|
||||
aie2_error_async_events_free(ndev);
|
||||
iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
pci_free_irq_vectors(pdev);
|
||||
}
|
||||
|
||||
static int aie2_get_aie_status(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_aie_status status;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int ret;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
if (copy_from_user(&status, u64_to_user_ptr(args->buffer), sizeof(status))) {
|
||||
XDNA_ERR(xdna, "Failed to copy AIE request into kernel");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (ndev->metadata.cols * ndev->metadata.size < status.buffer_size) {
|
||||
XDNA_ERR(xdna, "Invalid buffer size. Given Size: %u. Need Size: %u.",
|
||||
status.buffer_size, ndev->metadata.cols * ndev->metadata.size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = aie2_query_status(ndev, u64_to_user_ptr(status.buffer),
|
||||
status.buffer_size, &status.cols_filled);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to get AIE status info. Ret: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), &status, sizeof(status))) {
|
||||
XDNA_ERR(xdna, "Failed to copy AIE request info to user space");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_get_aie_metadata(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_aie_metadata *meta;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int ret = 0;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
|
||||
if (!meta)
|
||||
return -ENOMEM;
|
||||
|
||||
meta->col_size = ndev->metadata.size;
|
||||
meta->cols = ndev->metadata.cols;
|
||||
meta->rows = ndev->metadata.rows;
|
||||
|
||||
meta->version.major = ndev->metadata.version.major;
|
||||
meta->version.minor = ndev->metadata.version.minor;
|
||||
|
||||
meta->core.row_count = ndev->metadata.core.row_count;
|
||||
meta->core.row_start = ndev->metadata.core.row_start;
|
||||
meta->core.dma_channel_count = ndev->metadata.core.dma_channel_count;
|
||||
meta->core.lock_count = ndev->metadata.core.lock_count;
|
||||
meta->core.event_reg_count = ndev->metadata.core.event_reg_count;
|
||||
|
||||
meta->mem.row_count = ndev->metadata.mem.row_count;
|
||||
meta->mem.row_start = ndev->metadata.mem.row_start;
|
||||
meta->mem.dma_channel_count = ndev->metadata.mem.dma_channel_count;
|
||||
meta->mem.lock_count = ndev->metadata.mem.lock_count;
|
||||
meta->mem.event_reg_count = ndev->metadata.mem.event_reg_count;
|
||||
|
||||
meta->shim.row_count = ndev->metadata.shim.row_count;
|
||||
meta->shim.row_start = ndev->metadata.shim.row_start;
|
||||
meta->shim.dma_channel_count = ndev->metadata.shim.dma_channel_count;
|
||||
meta->shim.lock_count = ndev->metadata.shim.lock_count;
|
||||
meta->shim.event_reg_count = ndev->metadata.shim.event_reg_count;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), meta, sizeof(*meta)))
|
||||
ret = -EFAULT;
|
||||
|
||||
kfree(meta);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_get_aie_version(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_aie_version version;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
version.major = ndev->version.major;
|
||||
version.minor = ndev->version.minor;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_get_firmware_version(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_firmware_version version;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
|
||||
version.major = xdna->fw_ver.major;
|
||||
version.minor = xdna->fw_ver.minor;
|
||||
version.patch = xdna->fw_ver.sub;
|
||||
version.build = xdna->fw_ver.build;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), &version, sizeof(version)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_get_power_mode(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_get_power_mode mode = {};
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
mode.power_mode = ndev->pw_mode;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, sizeof(mode)))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aie2_get_clock_metadata(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_clock_metadata *clock;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
int ret = 0;
|
||||
|
||||
ndev = xdna->dev_handle;
|
||||
clock = kzalloc(sizeof(*clock), GFP_KERNEL);
|
||||
if (!clock)
|
||||
return -ENOMEM;
|
||||
|
||||
snprintf(clock->mp_npu_clock.name, sizeof(clock->mp_npu_clock.name),
|
||||
"MP-NPU Clock");
|
||||
clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
|
||||
snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H Clock");
|
||||
clock->h_clock.freq_mhz = ndev->hclk_freq;
|
||||
|
||||
if (copy_to_user(u64_to_user_ptr(args->buffer), clock, sizeof(*clock)))
|
||||
ret = -EFAULT;
|
||||
|
||||
kfree(clock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_get_hwctx_status(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_drm_query_hwctx __user *buf;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_drm_query_hwctx *tmp;
|
||||
struct amdxdna_client *tmp_client;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
unsigned long hwctx_id;
|
||||
bool overflow = false;
|
||||
u32 req_bytes = 0;
|
||||
u32 hw_i = 0;
|
||||
int ret = 0;
|
||||
int idx;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
|
||||
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
buf = u64_to_user_ptr(args->buffer);
|
||||
list_for_each_entry(tmp_client, &xdna->client_list, node) {
|
||||
idx = srcu_read_lock(&tmp_client->hwctx_srcu);
|
||||
amdxdna_for_each_hwctx(tmp_client, hwctx_id, hwctx) {
|
||||
req_bytes += sizeof(*tmp);
|
||||
if (args->buffer_size < req_bytes) {
|
||||
/* Continue iterating to get the required size */
|
||||
overflow = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
memset(tmp, 0, sizeof(*tmp));
|
||||
tmp->pid = tmp_client->pid;
|
||||
tmp->context_id = hwctx->id;
|
||||
tmp->start_col = hwctx->start_col;
|
||||
tmp->num_col = hwctx->num_col;
|
||||
tmp->command_submissions = hwctx->priv->seq;
|
||||
tmp->command_completions = hwctx->priv->completed;
|
||||
|
||||
if (copy_to_user(&buf[hw_i], tmp, sizeof(*tmp))) {
|
||||
ret = -EFAULT;
|
||||
srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
|
||||
goto out;
|
||||
}
|
||||
hw_i++;
|
||||
}
|
||||
srcu_read_unlock(&tmp_client->hwctx_srcu, idx);
|
||||
}
|
||||
|
||||
if (overflow) {
|
||||
XDNA_ERR(xdna, "Invalid buffer size. Given: %u Need: %u.",
|
||||
args->buffer_size, req_bytes);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(tmp);
|
||||
args->buffer_size = req_bytes;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_info *args)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
int ret, idx;
|
||||
|
||||
if (!drm_dev_enter(&xdna->ddev, &idx))
|
||||
return -ENODEV;
|
||||
|
||||
switch (args->param) {
|
||||
case DRM_AMDXDNA_QUERY_AIE_STATUS:
|
||||
ret = aie2_get_aie_status(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_AIE_METADATA:
|
||||
ret = aie2_get_aie_metadata(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_AIE_VERSION:
|
||||
ret = aie2_get_aie_version(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_CLOCK_METADATA:
|
||||
ret = aie2_get_clock_metadata(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_HW_CONTEXTS:
|
||||
ret = aie2_get_hwctx_status(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
|
||||
ret = aie2_get_firmware_version(client, args);
|
||||
break;
|
||||
case DRM_AMDXDNA_GET_POWER_MODE:
|
||||
ret = aie2_get_power_mode(client, args);
|
||||
break;
|
||||
default:
|
||||
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
XDNA_DBG(xdna, "Got param %d", args->param);
|
||||
|
||||
drm_dev_exit(idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int aie2_set_power_mode(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_set_state *args)
|
||||
{
|
||||
struct amdxdna_drm_set_power_mode power_state;
|
||||
enum amdxdna_power_mode_type power_mode;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
|
||||
if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
|
||||
sizeof(power_state))) {
|
||||
XDNA_ERR(xdna, "Failed to copy power mode request into kernel");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (XDNA_MBZ_DBG(xdna, power_state.pad, sizeof(power_state.pad)))
|
||||
return -EINVAL;
|
||||
|
||||
power_mode = power_state.power_mode;
|
||||
if (power_mode > POWER_MODE_TURBO) {
|
||||
XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return aie2_pm_set_mode(xdna->dev_handle, power_mode);
|
||||
}
|
||||
|
||||
static int aie2_set_state(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_set_state *args)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
int ret, idx;
|
||||
|
||||
if (!drm_dev_enter(&xdna->ddev, &idx))
|
||||
return -ENODEV;
|
||||
|
||||
switch (args->param) {
|
||||
case DRM_AMDXDNA_SET_POWER_MODE:
|
||||
ret = aie2_set_power_mode(client, args);
|
||||
break;
|
||||
default:
|
||||
XDNA_ERR(xdna, "Not supported request parameter %u", args->param);
|
||||
ret = -EOPNOTSUPP;
|
||||
break;
|
||||
}
|
||||
|
||||
drm_dev_exit(idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct amdxdna_dev_ops aie2_ops = {
|
||||
.init = aie2_init,
|
||||
.fini = aie2_fini,
|
||||
.resume = aie2_hw_start,
|
||||
.suspend = aie2_hw_stop,
|
||||
.get_aie_info = aie2_get_info,
|
||||
.set_aie_state = aie2_set_state,
|
||||
.hwctx_init = aie2_hwctx_init,
|
||||
.hwctx_fini = aie2_hwctx_fini,
|
||||
.hwctx_config = aie2_hwctx_config,
|
||||
.cmd_submit = aie2_cmd_submit,
|
||||
.hmm_invalidate = aie2_hmm_invalidate,
|
||||
.hwctx_suspend = aie2_hwctx_suspend,
|
||||
.hwctx_resume = aie2_hwctx_resume,
|
||||
};
|
||||
297
drivers/accel/amdxdna/aie2_pci.h
Normal file
297
drivers/accel/amdxdna/aie2_pci.h
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AIE2_PCI_H_
|
||||
#define _AIE2_PCI_H_
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
#include "amdxdna_mailbox.h"
|
||||
|
||||
#define AIE2_INTERVAL 20000 /* us */
|
||||
#define AIE2_TIMEOUT 1000000 /* us */
|
||||
|
||||
/* Firmware determines device memory base address and size */
|
||||
#define AIE2_DEVM_BASE 0x4000000
|
||||
#define AIE2_DEVM_SIZE SZ_64M
|
||||
|
||||
#define NDEV2PDEV(ndev) (to_pci_dev((ndev)->xdna->ddev.dev))
|
||||
|
||||
#define AIE2_SRAM_OFF(ndev, addr) ((addr) - (ndev)->priv->sram_dev_addr)
|
||||
#define AIE2_MBOX_OFF(ndev, addr) ((addr) - (ndev)->priv->mbox_dev_addr)
|
||||
|
||||
#define PSP_REG_BAR(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].bar_idx)
|
||||
#define PSP_REG_OFF(ndev, idx) ((ndev)->priv->psp_regs_off[(idx)].offset)
|
||||
#define SRAM_REG_OFF(ndev, idx) ((ndev)->priv->sram_offs[(idx)].offset)
|
||||
|
||||
#define SMU_REG(ndev, idx) \
|
||||
({ \
|
||||
typeof(ndev) _ndev = ndev; \
|
||||
((_ndev)->smu_base + (_ndev)->priv->smu_regs_off[(idx)].offset); \
|
||||
})
|
||||
#define SRAM_GET_ADDR(ndev, idx) \
|
||||
({ \
|
||||
typeof(ndev) _ndev = ndev; \
|
||||
((_ndev)->sram_base + SRAM_REG_OFF((_ndev), (idx))); \
|
||||
})
|
||||
|
||||
#define CHAN_SLOT_SZ SZ_8K
|
||||
#define MBOX_SIZE(ndev) \
|
||||
({ \
|
||||
typeof(ndev) _ndev = (ndev); \
|
||||
((_ndev)->priv->mbox_size) ? (_ndev)->priv->mbox_size : \
|
||||
pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
|
||||
})
|
||||
|
||||
enum aie2_smu_reg_idx {
|
||||
SMU_CMD_REG = 0,
|
||||
SMU_ARG_REG,
|
||||
SMU_INTR_REG,
|
||||
SMU_RESP_REG,
|
||||
SMU_OUT_REG,
|
||||
SMU_MAX_REGS /* Keep this at the end */
|
||||
};
|
||||
|
||||
enum aie2_sram_reg_idx {
|
||||
MBOX_CHANN_OFF = 0,
|
||||
FW_ALIVE_OFF,
|
||||
SRAM_MAX_INDEX /* Keep this at the end */
|
||||
};
|
||||
|
||||
enum psp_reg_idx {
|
||||
PSP_CMD_REG = 0,
|
||||
PSP_ARG0_REG,
|
||||
PSP_ARG1_REG,
|
||||
PSP_ARG2_REG,
|
||||
PSP_NUM_IN_REGS, /* number of input registers */
|
||||
PSP_INTR_REG = PSP_NUM_IN_REGS,
|
||||
PSP_STATUS_REG,
|
||||
PSP_RESP_REG,
|
||||
PSP_MAX_REGS /* Keep this at the end */
|
||||
};
|
||||
|
||||
struct amdxdna_client;
|
||||
struct amdxdna_fw_ver;
|
||||
struct amdxdna_hwctx;
|
||||
struct amdxdna_sched_job;
|
||||
|
||||
struct psp_config {
|
||||
const void *fw_buf;
|
||||
u32 fw_size;
|
||||
void __iomem *psp_regs[PSP_MAX_REGS];
|
||||
};
|
||||
|
||||
struct aie_version {
|
||||
u16 major;
|
||||
u16 minor;
|
||||
};
|
||||
|
||||
struct aie_tile_metadata {
|
||||
u16 row_count;
|
||||
u16 row_start;
|
||||
u16 dma_channel_count;
|
||||
u16 lock_count;
|
||||
u16 event_reg_count;
|
||||
};
|
||||
|
||||
struct aie_metadata {
|
||||
u32 size;
|
||||
u16 cols;
|
||||
u16 rows;
|
||||
struct aie_version version;
|
||||
struct aie_tile_metadata core;
|
||||
struct aie_tile_metadata mem;
|
||||
struct aie_tile_metadata shim;
|
||||
};
|
||||
|
||||
enum rt_config_category {
|
||||
AIE2_RT_CFG_INIT,
|
||||
AIE2_RT_CFG_CLK_GATING,
|
||||
};
|
||||
|
||||
struct rt_config {
|
||||
u32 type;
|
||||
u32 value;
|
||||
u32 category;
|
||||
};
|
||||
|
||||
struct dpm_clk_freq {
|
||||
u32 npuclk;
|
||||
u32 hclk;
|
||||
};
|
||||
|
||||
/*
|
||||
* Define the maximum number of pending commands in a hardware context.
|
||||
* Must be power of 2!
|
||||
*/
|
||||
#define HWCTX_MAX_CMDS 4
|
||||
#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
|
||||
struct amdxdna_hwctx_priv {
|
||||
struct amdxdna_gem_obj *heap;
|
||||
void *mbox_chann;
|
||||
|
||||
struct drm_gpu_scheduler sched;
|
||||
struct drm_sched_entity entity;
|
||||
|
||||
struct mutex io_lock; /* protect seq and cmd order */
|
||||
struct wait_queue_head job_free_wq;
|
||||
u32 num_pending;
|
||||
u64 seq;
|
||||
struct semaphore job_sem;
|
||||
bool job_done;
|
||||
|
||||
/* Completed job counter */
|
||||
u64 completed;
|
||||
|
||||
struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
|
||||
struct drm_syncobj *syncobj;
|
||||
};
|
||||
|
||||
enum aie2_dev_status {
|
||||
AIE2_DEV_UNINIT,
|
||||
AIE2_DEV_INIT,
|
||||
AIE2_DEV_START,
|
||||
};
|
||||
|
||||
struct amdxdna_dev_hdl {
|
||||
struct amdxdna_dev *xdna;
|
||||
const struct amdxdna_dev_priv *priv;
|
||||
void __iomem *sram_base;
|
||||
void __iomem *smu_base;
|
||||
void __iomem *mbox_base;
|
||||
struct psp_device *psp_hdl;
|
||||
|
||||
struct xdna_mailbox_chann_res mgmt_x2i;
|
||||
struct xdna_mailbox_chann_res mgmt_i2x;
|
||||
u32 mgmt_chan_idx;
|
||||
u32 mgmt_prot_major;
|
||||
u32 mgmt_prot_minor;
|
||||
|
||||
u32 total_col;
|
||||
struct aie_version version;
|
||||
struct aie_metadata metadata;
|
||||
|
||||
/* power management and clock*/
|
||||
enum amdxdna_power_mode_type pw_mode;
|
||||
u32 dpm_level;
|
||||
u32 dft_dpm_level;
|
||||
u32 max_dpm_level;
|
||||
u32 clk_gating;
|
||||
u32 npuclk_freq;
|
||||
u32 hclk_freq;
|
||||
|
||||
/* Mailbox and the management channel */
|
||||
struct mailbox *mbox;
|
||||
struct mailbox_channel *mgmt_chann;
|
||||
struct async_events *async_events;
|
||||
|
||||
enum aie2_dev_status dev_status;
|
||||
u32 hwctx_num;
|
||||
};
|
||||
|
||||
#define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
|
||||
[reg_name] = {bar##_BAR_INDEX, (reg_addr) - bar##_BAR_BASE}
|
||||
|
||||
struct aie2_bar_off_pair {
|
||||
int bar_idx;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
struct aie2_hw_ops {
|
||||
int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
|
||||
};
|
||||
|
||||
struct amdxdna_dev_priv {
|
||||
const char *fw_path;
|
||||
u64 protocol_major;
|
||||
u64 protocol_minor;
|
||||
const struct rt_config *rt_config;
|
||||
const struct dpm_clk_freq *dpm_clk_tbl;
|
||||
|
||||
#define COL_ALIGN_NONE 0
|
||||
#define COL_ALIGN_NATURE 1
|
||||
u32 col_align;
|
||||
u32 mbox_dev_addr;
|
||||
/* If mbox_size is 0, use BAR size. See MBOX_SIZE macro */
|
||||
u32 mbox_size;
|
||||
u32 sram_dev_addr;
|
||||
struct aie2_bar_off_pair sram_offs[SRAM_MAX_INDEX];
|
||||
struct aie2_bar_off_pair psp_regs_off[PSP_MAX_REGS];
|
||||
struct aie2_bar_off_pair smu_regs_off[SMU_MAX_REGS];
|
||||
struct aie2_hw_ops hw_ops;
|
||||
};
|
||||
|
||||
extern const struct amdxdna_dev_ops aie2_ops;
|
||||
|
||||
int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
|
||||
enum rt_config_category category, u32 *val);
|
||||
|
||||
/* aie2 npu hw config */
|
||||
extern const struct dpm_clk_freq npu1_dpm_clk_table[];
|
||||
extern const struct dpm_clk_freq npu4_dpm_clk_table[];
|
||||
extern const struct rt_config npu1_default_rt_cfg[];
|
||||
extern const struct rt_config npu4_default_rt_cfg[];
|
||||
|
||||
/* aie2_smu.c */
|
||||
int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
|
||||
void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
|
||||
int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
|
||||
int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
|
||||
|
||||
/* aie2_pm.c */
|
||||
int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target);
|
||||
|
||||
/* aie2_psp.c */
|
||||
struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
|
||||
int aie2_psp_start(struct psp_device *psp);
|
||||
void aie2_psp_stop(struct psp_device *psp);
|
||||
|
||||
/* aie2_error.c */
|
||||
int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
|
||||
void aie2_error_async_events_free(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_error_async_events_send(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_error_async_msg_thread(void *data);
|
||||
|
||||
/* aie2_message.c */
|
||||
int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_resume_fw(struct amdxdna_dev_hdl *ndev);
|
||||
int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 value);
|
||||
int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, u64 *value);
|
||||
int aie2_assign_mgmt_pasid(struct amdxdna_dev_hdl *ndev, u16 pasid);
|
||||
int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *version);
|
||||
int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
|
||||
int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
|
||||
struct amdxdna_fw_ver *fw_ver);
|
||||
int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
|
||||
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
|
||||
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
|
||||
int aie2_query_status(struct amdxdna_dev_hdl *ndev, char __user *buf, u32 size, u32 *cols_filled);
|
||||
int aie2_register_asyn_event_msg(struct amdxdna_dev_hdl *ndev, dma_addr_t addr, u32 size,
|
||||
void *handle, int (*cb)(void*, const u32 *, size_t));
|
||||
int aie2_config_cu(struct amdxdna_hwctx *hwctx);
|
||||
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
|
||||
/* aie2_hwctx.c */
|
||||
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
|
||||
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
|
||||
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
|
||||
void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx);
|
||||
void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx);
|
||||
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
|
||||
void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
|
||||
void aie2_restart_ctx(struct amdxdna_client *client);
|
||||
|
||||
#endif /* _AIE2_PCI_H_ */
|
||||
108
drivers/accel/amdxdna/aie2_pm.c
Normal file
108
drivers/accel/amdxdna/aie2_pm.c
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define AIE2_CLK_GATING_ENABLE 1
|
||||
#define AIE2_CLK_GATING_DISABLE 0
|
||||
|
||||
static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 val)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ndev->clk_gating = val;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (ndev->dev_status != AIE2_DEV_UNINIT) {
|
||||
/* Resume device */
|
||||
ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
|
||||
ndev->max_dpm_level++;
|
||||
ndev->max_dpm_level--;
|
||||
|
||||
ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ndev->pw_mode = POWER_MODE_DEFAULT;
|
||||
ndev->dft_dpm_level = ndev->max_dpm_level;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type target)
|
||||
{
|
||||
struct amdxdna_dev *xdna = ndev->xdna;
|
||||
u32 clk_gating, dpm_level;
|
||||
int ret;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
|
||||
if (ndev->pw_mode == target)
|
||||
return 0;
|
||||
|
||||
switch (target) {
|
||||
case POWER_MODE_TURBO:
|
||||
if (ndev->hwctx_num) {
|
||||
XDNA_ERR(xdna, "Can not set turbo when there is active hwctx");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
clk_gating = AIE2_CLK_GATING_DISABLE;
|
||||
dpm_level = ndev->max_dpm_level;
|
||||
break;
|
||||
case POWER_MODE_HIGH:
|
||||
clk_gating = AIE2_CLK_GATING_ENABLE;
|
||||
dpm_level = ndev->max_dpm_level;
|
||||
break;
|
||||
case POWER_MODE_DEFAULT:
|
||||
clk_gating = AIE2_CLK_GATING_ENABLE;
|
||||
dpm_level = ndev->dft_dpm_level;
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = aie2_pm_set_clk_gating(ndev, clk_gating);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ndev->pw_mode = target;
|
||||
|
||||
return 0;
|
||||
}
|
||||
146
drivers/accel/amdxdna/aie2_psp.c
Normal file
146
drivers/accel/amdxdna/aie2_psp.c
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/iopoll.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define PSP_STATUS_READY BIT(31)
|
||||
|
||||
/* PSP commands */
|
||||
#define PSP_VALIDATE 1
|
||||
#define PSP_START 2
|
||||
#define PSP_RELEASE_TMR 3
|
||||
|
||||
/* PSP special arguments */
|
||||
#define PSP_START_COPY_FW 1
|
||||
|
||||
/* PSP response error code */
|
||||
#define PSP_ERROR_CANCEL 0xFFFF0002
|
||||
#define PSP_ERROR_BAD_STATE 0xFFFF0007
|
||||
|
||||
#define PSP_FW_ALIGN 0x10000
|
||||
#define PSP_POLL_INTERVAL 20000 /* us */
|
||||
#define PSP_POLL_TIMEOUT 1000000 /* us */
|
||||
|
||||
#define PSP_REG(p, reg) ((p)->psp_regs[reg])
|
||||
|
||||
struct psp_device {
|
||||
struct drm_device *ddev;
|
||||
struct psp_config conf;
|
||||
u32 fw_buf_sz;
|
||||
u64 fw_paddr;
|
||||
void *fw_buffer;
|
||||
void __iomem *psp_regs[PSP_MAX_REGS];
|
||||
};
|
||||
|
||||
static int psp_exec(struct psp_device *psp, u32 *reg_vals)
|
||||
{
|
||||
u32 resp_code;
|
||||
int ret, i;
|
||||
u32 ready;
|
||||
|
||||
/* Write command and argument registers */
|
||||
for (i = 0; i < PSP_NUM_IN_REGS; i++)
|
||||
writel(reg_vals[i], PSP_REG(psp, i));
|
||||
|
||||
/* clear and set PSP INTR register to kick off */
|
||||
writel(0, PSP_REG(psp, PSP_INTR_REG));
|
||||
writel(1, PSP_REG(psp, PSP_INTR_REG));
|
||||
|
||||
/* PSP should be busy. Wait for ready, so we know task is done. */
|
||||
ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_STATUS_REG), ready,
|
||||
FIELD_GET(PSP_STATUS_READY, ready),
|
||||
PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
|
||||
if (ret) {
|
||||
drm_err(psp->ddev, "PSP is not ready, ret 0x%x", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
resp_code = readl(PSP_REG(psp, PSP_RESP_REG));
|
||||
if (resp_code) {
|
||||
drm_err(psp->ddev, "fw return error 0x%x", resp_code);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void aie2_psp_stop(struct psp_device *psp)
|
||||
{
|
||||
u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
|
||||
int ret;
|
||||
|
||||
ret = psp_exec(psp, reg_vals);
|
||||
if (ret)
|
||||
drm_err(psp->ddev, "release tmr failed, ret %d", ret);
|
||||
}
|
||||
|
||||
int aie2_psp_start(struct psp_device *psp)
|
||||
{
|
||||
u32 reg_vals[PSP_NUM_IN_REGS];
|
||||
int ret;
|
||||
|
||||
reg_vals[0] = PSP_VALIDATE;
|
||||
reg_vals[1] = lower_32_bits(psp->fw_paddr);
|
||||
reg_vals[2] = upper_32_bits(psp->fw_paddr);
|
||||
reg_vals[3] = psp->fw_buf_sz;
|
||||
|
||||
ret = psp_exec(psp, reg_vals);
|
||||
if (ret) {
|
||||
drm_err(psp->ddev, "failed to validate fw, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
memset(reg_vals, 0, sizeof(reg_vals));
|
||||
reg_vals[0] = PSP_START;
|
||||
reg_vals[1] = PSP_START_COPY_FW;
|
||||
ret = psp_exec(psp, reg_vals);
|
||||
if (ret) {
|
||||
drm_err(psp->ddev, "failed to start fw, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf)
|
||||
{
|
||||
struct psp_device *psp;
|
||||
u64 offset;
|
||||
|
||||
psp = drmm_kzalloc(ddev, sizeof(*psp), GFP_KERNEL);
|
||||
if (!psp)
|
||||
return NULL;
|
||||
|
||||
psp->ddev = ddev;
|
||||
memcpy(psp->psp_regs, conf->psp_regs, sizeof(psp->psp_regs));
|
||||
|
||||
psp->fw_buf_sz = ALIGN(conf->fw_size, PSP_FW_ALIGN) + PSP_FW_ALIGN;
|
||||
psp->fw_buffer = drmm_kmalloc(ddev, psp->fw_buf_sz, GFP_KERNEL);
|
||||
if (!psp->fw_buffer) {
|
||||
drm_err(ddev, "no memory for fw buffer");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD Platform Security Processor(PSP) requires host physical
|
||||
* address to load NPU firmware.
|
||||
*/
|
||||
psp->fw_paddr = virt_to_phys(psp->fw_buffer);
|
||||
offset = ALIGN(psp->fw_paddr, PSP_FW_ALIGN) - psp->fw_paddr;
|
||||
psp->fw_paddr += offset;
|
||||
memcpy(psp->fw_buffer + offset, conf->fw_buf, conf->fw_size);
|
||||
|
||||
return psp;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user