mirror of
https://github.com/torvalds/linux.git
synced 2026-06-08 22:52:35 +02:00
Merge 9ccce092fc Merge tag 'for-linus-5.13-ofs-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux into android-mainline
A little step en route to v5.13-rc1 Signed-off-by: Lee Jones <lee.jones@linaro.org> Change-Id: Ib6cc40149287da7aa1626104b295999e4ab0b53a
This commit is contained in:
commit
e11858ac17
14
Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe
Normal file
14
Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
What: /sys/bus/coresight/devices/trbe<cpu>/align
|
||||
Date: March 2021
|
||||
KernelVersion: 5.13
|
||||
Contact: Anshuman Khandual <anshuman.khandual@arm.com>
|
||||
Description: (Read) Shows the TRBE write pointer alignment. This value
|
||||
is fetched from the TRBIDR register.
|
||||
|
||||
What: /sys/bus/coresight/devices/trbe<cpu>/flag
|
||||
Date: March 2021
|
||||
KernelVersion: 5.13
|
||||
Contact: Anshuman Khandual <anshuman.khandual@arm.com>
|
||||
Description: (Read) Shows if TRBE updates in the memory are with access
|
||||
and dirty flag updates as well. This value is fetched from
|
||||
the TRBIDR register.
|
||||
|
|
@ -34,6 +34,9 @@ Description: Multipath policy specifies which path should be selected on each IO
|
|||
min-inflight (1):
|
||||
select path with minimum inflights.
|
||||
|
||||
min-latency (2):
|
||||
select path with minimum latency.
|
||||
|
||||
What: /sys/class/rtrs-client/<session-name>/paths/
|
||||
Date: Feb 2020
|
||||
KernelVersion: 5.7
|
||||
|
|
@ -95,6 +98,15 @@ KernelVersion: 5.7
|
|||
Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
|
||||
Description: RO, Contains the destination address of the path
|
||||
|
||||
What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/cur_latency
|
||||
Date: Feb 2020
|
||||
KernelVersion: 5.7
|
||||
Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
|
||||
Description: RO, Contains the latency time calculated by the heart-beat messages.
|
||||
Whenever the client sends heart-beat message, it checks the time gap
|
||||
between sending the heart-beat message and receiving the ACK.
|
||||
This value can be changed regularly.
|
||||
|
||||
What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/stats/reset_all
|
||||
Date: Feb 2020
|
||||
KernelVersion: 5.7
|
||||
|
|
|
|||
|
|
@ -1881,13 +1881,6 @@
|
|||
bypassed by not enabling DMAR with this option. In
|
||||
this case, gfx device will use physical address for
|
||||
DMA.
|
||||
forcedac [X86-64]
|
||||
With this option iommu will not optimize to look
|
||||
for io virtual address below 32-bit forcing dual
|
||||
address cycle on pci bus for cards supporting greater
|
||||
than 32-bit addressing. The default is to look
|
||||
for translation below 32-bit and if not available
|
||||
then look in the higher range.
|
||||
strict [Default Off]
|
||||
With this option on every unmap_single operation will
|
||||
result in a hardware IOTLB flush operation as opposed
|
||||
|
|
@ -1976,6 +1969,14 @@
|
|||
nobypass [PPC/POWERNV]
|
||||
Disable IOMMU bypass, using IOMMU for PCI devices.
|
||||
|
||||
iommu.forcedac= [ARM64, X86] Control IOVA allocation for PCI devices.
|
||||
Format: { "0" | "1" }
|
||||
0 - Try to allocate a 32-bit DMA address first, before
|
||||
falling back to the full range if needed.
|
||||
1 - Allocate directly from the full usable range,
|
||||
forcing Dual Address Cycle for PCI cards supporting
|
||||
greater than 32-bit addressing.
|
||||
|
||||
iommu.strict= [ARM64] Configure TLB invalidation behaviour
|
||||
Format: { "0" | "1" }
|
||||
0 - Lazy mode.
|
||||
|
|
|
|||
75
Documentation/devicetree/bindings/arm/ete.yaml
Normal file
75
Documentation/devicetree/bindings/arm/ete.yaml
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
|
||||
# Copyright 2021, Arm Ltd
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: "http://devicetree.org/schemas/arm/ete.yaml#"
|
||||
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
|
||||
|
||||
title: ARM Embedded Trace Extensions
|
||||
|
||||
maintainers:
|
||||
- Suzuki K Poulose <suzuki.poulose@arm.com>
|
||||
- Mathieu Poirier <mathieu.poirier@linaro.org>
|
||||
|
||||
description: |
|
||||
Arm Embedded Trace Extension(ETE) is a per CPU trace component that
|
||||
allows tracing the CPU execution. It overlaps with the CoreSight ETMv4
|
||||
architecture and has extended support for future architecture changes.
|
||||
The trace generated by the ETE could be stored via legacy CoreSight
|
||||
components (e.g, TMC-ETR) or other means (e.g, using a per CPU buffer
|
||||
Arm Trace Buffer Extension (TRBE)). Since the ETE can be connected to
|
||||
legacy CoreSight components, a node must be listed per instance, along
|
||||
with any optional connection graph as per the coresight bindings.
|
||||
See bindings/arm/coresight.txt.
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
pattern: "^ete([0-9a-f]+)$"
|
||||
compatible:
|
||||
items:
|
||||
- const: arm,embedded-trace-extension
|
||||
|
||||
cpu:
|
||||
description: |
|
||||
Handle to the cpu this ETE is bound to.
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
|
||||
out-ports:
|
||||
description: |
|
||||
Output connections from the ETE to legacy CoreSight trace bus.
|
||||
$ref: /schemas/graph.yaml#/properties/ports
|
||||
properties:
|
||||
port:
|
||||
description: Output connection from the ETE to legacy CoreSight Trace bus.
|
||||
$ref: /schemas/graph.yaml#/properties/port
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- cpu
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
|
||||
# An ETE node without legacy CoreSight connections
|
||||
- |
|
||||
ete0 {
|
||||
compatible = "arm,embedded-trace-extension";
|
||||
cpu = <&cpu_0>;
|
||||
};
|
||||
# An ETE node with legacy CoreSight connections
|
||||
- |
|
||||
ete1 {
|
||||
compatible = "arm,embedded-trace-extension";
|
||||
cpu = <&cpu_1>;
|
||||
|
||||
out-ports { /* legacy coresight connection */
|
||||
port {
|
||||
ete1_out_port: endpoint {
|
||||
remote-endpoint = <&funnel_in_port0>;
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
...
|
||||
49
Documentation/devicetree/bindings/arm/trbe.yaml
Normal file
49
Documentation/devicetree/bindings/arm/trbe.yaml
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
|
||||
# Copyright 2021, Arm Ltd
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: "http://devicetree.org/schemas/arm/trbe.yaml#"
|
||||
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
|
||||
|
||||
title: ARM Trace Buffer Extensions
|
||||
|
||||
maintainers:
|
||||
- Anshuman Khandual <anshuman.khandual@arm.com>
|
||||
|
||||
description: |
|
||||
Arm Trace Buffer Extension (TRBE) is a per CPU component
|
||||
for storing trace generated on the CPU to memory. It is
|
||||
accessed via CPU system registers. The software can verify
|
||||
if it is permitted to use the component by checking the
|
||||
TRBIDR register.
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
const: "trbe"
|
||||
compatible:
|
||||
items:
|
||||
- const: arm,trace-buffer-extension
|
||||
|
||||
interrupts:
|
||||
description: |
|
||||
Exactly 1 PPI must be listed. For heterogeneous systems where
|
||||
TRBE is only supported on a subset of the CPUs, please consult
|
||||
the arm,gic-v3 binding for details on describing a PPI partition.
|
||||
maxItems: 1
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- interrupts
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
|
||||
trbe {
|
||||
compatible = "arm,trace-buffer-extension";
|
||||
interrupts = <GIC_PPI 15 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
...
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
Hisilicon RoCE DT description
|
||||
|
||||
Hisilicon RoCE engine is a part of network subsystem.
|
||||
It works depending on other part of network wubsytem, such as, gmac and
|
||||
It works depending on other part of network subsystem, such as gmac and
|
||||
dsa fabric.
|
||||
|
||||
Additional properties are described here:
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ properties:
|
|||
items:
|
||||
- enum:
|
||||
- qcom,sc7180-smmu-500
|
||||
- qcom,sc7280-smmu-500
|
||||
- qcom,sc8180x-smmu-500
|
||||
- qcom,sdm845-smmu-500
|
||||
- qcom,sm8150-smmu-500
|
||||
|
|
|
|||
57
Documentation/devicetree/bindings/iommu/sprd,iommu.yaml
Normal file
57
Documentation/devicetree/bindings/iommu/sprd,iommu.yaml
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
# Copyright 2020 Unisoc Inc.
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/iommu/sprd,iommu.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Unisoc IOMMU and Multi-media MMU
|
||||
|
||||
maintainers:
|
||||
- Chunyan Zhang <zhang.lyra@gmail.com>
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- sprd,iommu-v1
|
||||
|
||||
"#iommu-cells":
|
||||
const: 0
|
||||
description:
|
||||
Unisoc IOMMUs are all single-master IOMMU devices, therefore no
|
||||
additional information needs to associate with its master device.
|
||||
Please refer to the generic bindings document for more details,
|
||||
Documentation/devicetree/bindings/iommu/iommu.txt
|
||||
|
||||
reg:
|
||||
maxItems: 1
|
||||
|
||||
clocks:
|
||||
description:
|
||||
Reference to a gate clock phandle, since access to some of IOMMUs are
|
||||
controlled by gate clock, but this is not required.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- "#iommu-cells"
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
iommu_disp: iommu@63000800 {
|
||||
compatible = "sprd,iommu-v1";
|
||||
reg = <0x63000800 0x80>;
|
||||
#iommu-cells = <0>;
|
||||
};
|
||||
|
||||
- |
|
||||
iommu_jpg: iommu@62300300 {
|
||||
compatible = "sprd,iommu-v1";
|
||||
reg = <0x62300300 0x80>;
|
||||
#iommu-cells = <0>;
|
||||
clocks = <&mm_gate 1>;
|
||||
};
|
||||
|
||||
...
|
||||
|
|
@ -16,3 +16,4 @@ Security Documentation
|
|||
siphash
|
||||
tpm/index
|
||||
digsig
|
||||
landlock
|
||||
|
|
|
|||
85
Documentation/security/landlock.rst
Normal file
85
Documentation/security/landlock.rst
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
|
||||
.. Copyright © 2019-2020 ANSSI
|
||||
|
||||
==================================
|
||||
Landlock LSM: kernel documentation
|
||||
==================================
|
||||
|
||||
:Author: Mickaël Salaün
|
||||
:Date: March 2021
|
||||
|
||||
Landlock's goal is to create scoped access-control (i.e. sandboxing). To
|
||||
harden a whole system, this feature should be available to any process,
|
||||
including unprivileged ones. Because such process may be compromised or
|
||||
backdoored (i.e. untrusted), Landlock's features must be safe to use from the
|
||||
kernel and other processes point of view. Landlock's interface must therefore
|
||||
expose a minimal attack surface.
|
||||
|
||||
Landlock is designed to be usable by unprivileged processes while following the
|
||||
system security policy enforced by other access control mechanisms (e.g. DAC,
|
||||
LSM). Indeed, a Landlock rule shall not interfere with other access-controls
|
||||
enforced on the system, only add more restrictions.
|
||||
|
||||
Any user can enforce Landlock rulesets on their processes. They are merged and
|
||||
evaluated according to the inherited ones in a way that ensures that only more
|
||||
constraints can be added.
|
||||
|
||||
User space documentation can be found here: :doc:`/userspace-api/landlock`.
|
||||
|
||||
Guiding principles for safe access controls
|
||||
===========================================
|
||||
|
||||
* A Landlock rule shall be focused on access control on kernel objects instead
|
||||
of syscall filtering (i.e. syscall arguments), which is the purpose of
|
||||
seccomp-bpf.
|
||||
* To avoid multiple kinds of side-channel attacks (e.g. leak of security
|
||||
policies, CPU-based attacks), Landlock rules shall not be able to
|
||||
programmatically communicate with user space.
|
||||
* Kernel access check shall not slow down access request from unsandboxed
|
||||
processes.
|
||||
* Computation related to Landlock operations (e.g. enforcing a ruleset) shall
|
||||
only impact the processes requesting them.
|
||||
|
||||
Tests
|
||||
=====
|
||||
|
||||
Userspace tests for backward compatibility, ptrace restrictions and filesystem
|
||||
support can be found here: `tools/testing/selftests/landlock/`_.
|
||||
|
||||
Kernel structures
|
||||
=================
|
||||
|
||||
Object
|
||||
------
|
||||
|
||||
.. kernel-doc:: security/landlock/object.h
|
||||
:identifiers:
|
||||
|
||||
Filesystem
|
||||
----------
|
||||
|
||||
.. kernel-doc:: security/landlock/fs.h
|
||||
:identifiers:
|
||||
|
||||
Ruleset and domain
|
||||
------------------
|
||||
|
||||
A domain is a read-only ruleset tied to a set of subjects (i.e. tasks'
|
||||
credentials). Each time a ruleset is enforced on a task, the current domain is
|
||||
duplicated and the ruleset is imported as a new layer of rules in the new
|
||||
domain. Indeed, once in a domain, each rule is tied to a layer level. To
|
||||
grant access to an object, at least one rule of each layer must allow the
|
||||
requested action on the object. A task can then only transit to a new domain
|
||||
that is the intersection of the constraints from the current domain and those
|
||||
of a ruleset provided by the task.
|
||||
|
||||
The definition of a subject is implicit for a task sandboxing itself, which
|
||||
makes the reasoning much easier and helps avoid pitfalls.
|
||||
|
||||
.. kernel-doc:: security/landlock/ruleset.h
|
||||
:identifiers:
|
||||
|
||||
.. Links
|
||||
.. _tools/testing/selftests/landlock/:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/landlock/
|
||||
38
Documentation/trace/coresight/coresight-trbe.rst
Normal file
38
Documentation/trace/coresight/coresight-trbe.rst
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==============================
|
||||
Trace Buffer Extension (TRBE).
|
||||
==============================
|
||||
|
||||
:Author: Anshuman Khandual <anshuman.khandual@arm.com>
|
||||
:Date: November 2020
|
||||
|
||||
Hardware Description
|
||||
--------------------
|
||||
|
||||
Trace Buffer Extension (TRBE) is a percpu hardware which captures in system
|
||||
memory, CPU traces generated from a corresponding percpu tracing unit. This
|
||||
gets plugged in as a coresight sink device because the corresponding trace
|
||||
generators (ETE), are plugged in as source device.
|
||||
|
||||
The TRBE is not compliant to CoreSight architecture specifications, but is
|
||||
driven via the CoreSight driver framework to support the ETE (which is
|
||||
CoreSight compliant) integration.
|
||||
|
||||
Sysfs files and directories
|
||||
---------------------------
|
||||
|
||||
The TRBE devices appear on the existing coresight bus alongside the other
|
||||
coresight devices::
|
||||
|
||||
>$ ls /sys/bus/coresight/devices
|
||||
trbe0 trbe1 trbe2 trbe3
|
||||
|
||||
The ``trbe<N>`` named TRBEs are associated with a CPU.::
|
||||
|
||||
>$ ls /sys/bus/coresight/devices/trbe0/
|
||||
align flag
|
||||
|
||||
*Key file items are:-*
|
||||
* ``align``: TRBE write pointer alignment
|
||||
* ``flag``: TRBE updates memory with access and dirty flags
|
||||
|
|
@ -18,6 +18,7 @@ place where this information is gathered.
|
|||
|
||||
no_new_privs
|
||||
seccomp_filter
|
||||
landlock
|
||||
unshare
|
||||
spec_ctrl
|
||||
accelerators/ocxl
|
||||
|
|
|
|||
311
Documentation/userspace-api/landlock.rst
Normal file
311
Documentation/userspace-api/landlock.rst
Normal file
|
|
@ -0,0 +1,311 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
|
||||
.. Copyright © 2019-2020 ANSSI
|
||||
.. Copyright © 2021 Microsoft Corporation
|
||||
|
||||
=====================================
|
||||
Landlock: unprivileged access control
|
||||
=====================================
|
||||
|
||||
:Author: Mickaël Salaün
|
||||
:Date: March 2021
|
||||
|
||||
The goal of Landlock is to enable to restrict ambient rights (e.g. global
|
||||
filesystem access) for a set of processes. Because Landlock is a stackable
|
||||
LSM, it makes possible to create safe security sandboxes as new security layers
|
||||
in addition to the existing system-wide access-controls. This kind of sandbox
|
||||
is expected to help mitigate the security impact of bugs or
|
||||
unexpected/malicious behaviors in user space applications. Landlock empowers
|
||||
any process, including unprivileged ones, to securely restrict themselves.
|
||||
|
||||
Landlock rules
|
||||
==============
|
||||
|
||||
A Landlock rule describes an action on an object. An object is currently a
|
||||
file hierarchy, and the related filesystem actions are defined with `access
|
||||
rights`_. A set of rules is aggregated in a ruleset, which can then restrict
|
||||
the thread enforcing it, and its future children.
|
||||
|
||||
Defining and enforcing a security policy
|
||||
----------------------------------------
|
||||
|
||||
We first need to create the ruleset that will contain our rules. For this
|
||||
example, the ruleset will contain rules that only allow read actions, but write
|
||||
actions will be denied. The ruleset then needs to handle both of these kind of
|
||||
actions.
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
int ruleset_fd;
|
||||
struct landlock_ruleset_attr ruleset_attr = {
|
||||
.handled_access_fs =
|
||||
LANDLOCK_ACCESS_FS_EXECUTE |
|
||||
LANDLOCK_ACCESS_FS_WRITE_FILE |
|
||||
LANDLOCK_ACCESS_FS_READ_FILE |
|
||||
LANDLOCK_ACCESS_FS_READ_DIR |
|
||||
LANDLOCK_ACCESS_FS_REMOVE_DIR |
|
||||
LANDLOCK_ACCESS_FS_REMOVE_FILE |
|
||||
LANDLOCK_ACCESS_FS_MAKE_CHAR |
|
||||
LANDLOCK_ACCESS_FS_MAKE_DIR |
|
||||
LANDLOCK_ACCESS_FS_MAKE_REG |
|
||||
LANDLOCK_ACCESS_FS_MAKE_SOCK |
|
||||
LANDLOCK_ACCESS_FS_MAKE_FIFO |
|
||||
LANDLOCK_ACCESS_FS_MAKE_BLOCK |
|
||||
LANDLOCK_ACCESS_FS_MAKE_SYM,
|
||||
};
|
||||
|
||||
ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
|
||||
if (ruleset_fd < 0) {
|
||||
perror("Failed to create a ruleset");
|
||||
return 1;
|
||||
}
|
||||
|
||||
We can now add a new rule to this ruleset thanks to the returned file
|
||||
descriptor referring to this ruleset. The rule will only allow reading the
|
||||
file hierarchy ``/usr``. Without another rule, write actions would then be
|
||||
denied by the ruleset. To add ``/usr`` to the ruleset, we open it with the
|
||||
``O_PATH`` flag and fill the &struct landlock_path_beneath_attr with this file
|
||||
descriptor.
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
int err;
|
||||
struct landlock_path_beneath_attr path_beneath = {
|
||||
.allowed_access =
|
||||
LANDLOCK_ACCESS_FS_EXECUTE |
|
||||
LANDLOCK_ACCESS_FS_READ_FILE |
|
||||
LANDLOCK_ACCESS_FS_READ_DIR,
|
||||
};
|
||||
|
||||
path_beneath.parent_fd = open("/usr", O_PATH | O_CLOEXEC);
|
||||
if (path_beneath.parent_fd < 0) {
|
||||
perror("Failed to open file");
|
||||
close(ruleset_fd);
|
||||
return 1;
|
||||
}
|
||||
err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
|
||||
&path_beneath, 0);
|
||||
close(path_beneath.parent_fd);
|
||||
if (err) {
|
||||
perror("Failed to update ruleset");
|
||||
close(ruleset_fd);
|
||||
return 1;
|
||||
}
|
||||
|
||||
We now have a ruleset with one rule allowing read access to ``/usr`` while
|
||||
denying all other handled accesses for the filesystem. The next step is to
|
||||
restrict the current thread from gaining more privileges (e.g. thanks to a SUID
|
||||
binary).
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
|
||||
perror("Failed to restrict privileges");
|
||||
close(ruleset_fd);
|
||||
return 1;
|
||||
}
|
||||
|
||||
The current thread is now ready to sandbox itself with the ruleset.
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
if (landlock_restrict_self(ruleset_fd, 0)) {
|
||||
perror("Failed to enforce ruleset");
|
||||
close(ruleset_fd);
|
||||
return 1;
|
||||
}
|
||||
close(ruleset_fd);
|
||||
|
||||
If the `landlock_restrict_self` system call succeeds, the current thread is now
|
||||
restricted and this policy will be enforced on all its subsequently created
|
||||
children as well. Once a thread is landlocked, there is no way to remove its
|
||||
security policy; only adding more restrictions is allowed. These threads are
|
||||
now in a new Landlock domain, merge of their parent one (if any) with the new
|
||||
ruleset.
|
||||
|
||||
Full working code can be found in `samples/landlock/sandboxer.c`_.
|
||||
|
||||
Layers of file path access rights
|
||||
---------------------------------
|
||||
|
||||
Each time a thread enforces a ruleset on itself, it updates its Landlock domain
|
||||
with a new layer of policy. Indeed, this complementary policy is stacked with
|
||||
the potentially other rulesets already restricting this thread. A sandboxed
|
||||
thread can then safely add more constraints to itself with a new enforced
|
||||
ruleset.
|
||||
|
||||
One policy layer grants access to a file path if at least one of its rules
|
||||
encountered on the path grants the access. A sandboxed thread can only access
|
||||
a file path if all its enforced policy layers grant the access as well as all
|
||||
the other system access controls (e.g. filesystem DAC, other LSM policies,
|
||||
etc.).
|
||||
|
||||
Bind mounts and OverlayFS
|
||||
-------------------------
|
||||
|
||||
Landlock enables to restrict access to file hierarchies, which means that these
|
||||
access rights can be propagated with bind mounts (cf.
|
||||
:doc:`/filesystems/sharedsubtree`) but not with :doc:`/filesystems/overlayfs`.
|
||||
|
||||
A bind mount mirrors a source file hierarchy to a destination. The destination
|
||||
hierarchy is then composed of the exact same files, on which Landlock rules can
|
||||
be tied, either via the source or the destination path. These rules restrict
|
||||
access when they are encountered on a path, which means that they can restrict
|
||||
access to multiple file hierarchies at the same time, whether these hierarchies
|
||||
are the result of bind mounts or not.
|
||||
|
||||
An OverlayFS mount point consists of upper and lower layers. These layers are
|
||||
combined in a merge directory, result of the mount point. This merge hierarchy
|
||||
may include files from the upper and lower layers, but modifications performed
|
||||
on the merge hierarchy only reflects on the upper layer. From a Landlock
|
||||
policy point of view, each OverlayFS layers and merge hierarchies are
|
||||
standalone and contains their own set of files and directories, which is
|
||||
different from bind mounts. A policy restricting an OverlayFS layer will not
|
||||
restrict the resulted merged hierarchy, and vice versa. Landlock users should
|
||||
then only think about file hierarchies they want to allow access to, regardless
|
||||
of the underlying filesystem.
|
||||
|
||||
Inheritance
|
||||
-----------
|
||||
|
||||
Every new thread resulting from a :manpage:`clone(2)` inherits Landlock domain
|
||||
restrictions from its parent. This is similar to the seccomp inheritance (cf.
|
||||
:doc:`/userspace-api/seccomp_filter`) or any other LSM dealing with task's
|
||||
:manpage:`credentials(7)`. For instance, one process's thread may apply
|
||||
Landlock rules to itself, but they will not be automatically applied to other
|
||||
sibling threads (unlike POSIX thread credential changes, cf.
|
||||
:manpage:`nptl(7)`).
|
||||
|
||||
When a thread sandboxes itself, we have the guarantee that the related security
|
||||
policy will stay enforced on all this thread's descendants. This allows
|
||||
creating standalone and modular security policies per application, which will
|
||||
automatically be composed between themselves according to their runtime parent
|
||||
policies.
|
||||
|
||||
Ptrace restrictions
|
||||
-------------------
|
||||
|
||||
A sandboxed process has less privileges than a non-sandboxed process and must
|
||||
then be subject to additional restrictions when manipulating another process.
|
||||
To be allowed to use :manpage:`ptrace(2)` and related syscalls on a target
|
||||
process, a sandboxed process should have a subset of the target process rules,
|
||||
which means the tracee must be in a sub-domain of the tracer.
|
||||
|
||||
Kernel interface
|
||||
================
|
||||
|
||||
Access rights
|
||||
-------------
|
||||
|
||||
.. kernel-doc:: include/uapi/linux/landlock.h
|
||||
:identifiers: fs_access
|
||||
|
||||
Creating a new ruleset
|
||||
----------------------
|
||||
|
||||
.. kernel-doc:: security/landlock/syscalls.c
|
||||
:identifiers: sys_landlock_create_ruleset
|
||||
|
||||
.. kernel-doc:: include/uapi/linux/landlock.h
|
||||
:identifiers: landlock_ruleset_attr
|
||||
|
||||
Extending a ruleset
|
||||
-------------------
|
||||
|
||||
.. kernel-doc:: security/landlock/syscalls.c
|
||||
:identifiers: sys_landlock_add_rule
|
||||
|
||||
.. kernel-doc:: include/uapi/linux/landlock.h
|
||||
:identifiers: landlock_rule_type landlock_path_beneath_attr
|
||||
|
||||
Enforcing a ruleset
|
||||
-------------------
|
||||
|
||||
.. kernel-doc:: security/landlock/syscalls.c
|
||||
:identifiers: sys_landlock_restrict_self
|
||||
|
||||
Current limitations
|
||||
===================
|
||||
|
||||
File renaming and linking
|
||||
-------------------------
|
||||
|
||||
Because Landlock targets unprivileged access controls, it is needed to properly
|
||||
handle composition of rules. Such property also implies rules nesting.
|
||||
Properly handling multiple layers of ruleset, each one of them able to restrict
|
||||
access to files, also implies to inherit the ruleset restrictions from a parent
|
||||
to its hierarchy. Because files are identified and restricted by their
|
||||
hierarchy, moving or linking a file from one directory to another implies to
|
||||
propagate the hierarchy constraints. To protect against privilege escalations
|
||||
through renaming or linking, and for the sake of simplicity, Landlock currently
|
||||
limits linking and renaming to the same directory. Future Landlock evolutions
|
||||
will enable more flexibility for renaming and linking, with dedicated ruleset
|
||||
flags.
|
||||
|
||||
Filesystem topology modification
|
||||
--------------------------------
|
||||
|
||||
As for file renaming and linking, a sandboxed thread cannot modify its
|
||||
filesystem topology, whether via :manpage:`mount(2)` or
|
||||
:manpage:`pivot_root(2)`. However, :manpage:`chroot(2)` calls are not denied.
|
||||
|
||||
Special filesystems
|
||||
-------------------
|
||||
|
||||
Access to regular files and directories can be restricted by Landlock,
|
||||
according to the handled accesses of a ruleset. However, files that do not
|
||||
come from a user-visible filesystem (e.g. pipe, socket), but can still be
|
||||
accessed through ``/proc/<pid>/fd/*``, cannot currently be explicitly
|
||||
restricted. Likewise, some special kernel filesystems such as nsfs, which can
|
||||
be accessed through ``/proc/<pid>/ns/*``, cannot currently be explicitly
|
||||
restricted. However, thanks to the `ptrace restrictions`_, access to such
|
||||
sensitive ``/proc`` files are automatically restricted according to domain
|
||||
hierarchies. Future Landlock evolutions could still enable to explicitly
|
||||
restrict such paths with dedicated ruleset flags.
|
||||
|
||||
Ruleset layers
|
||||
--------------
|
||||
|
||||
There is a limit of 64 layers of stacked rulesets. This can be an issue for a
|
||||
task willing to enforce a new ruleset in complement to its 64 inherited
|
||||
rulesets. Once this limit is reached, sys_landlock_restrict_self() returns
|
||||
E2BIG. It is then strongly suggested to carefully build rulesets once in the
|
||||
life of a thread, especially for applications able to launch other applications
|
||||
that may also want to sandbox themselves (e.g. shells, container managers,
|
||||
etc.).
|
||||
|
||||
Memory usage
|
||||
------------
|
||||
|
||||
Kernel memory allocated to create rulesets is accounted and can be restricted
|
||||
by the :doc:`/admin-guide/cgroup-v1/memory`.
|
||||
|
||||
Questions and answers
|
||||
=====================
|
||||
|
||||
What about user space sandbox managers?
|
||||
---------------------------------------
|
||||
|
||||
Using user space process to enforce restrictions on kernel resources can lead
|
||||
to race conditions or inconsistent evaluations (i.e. `Incorrect mirroring of
|
||||
the OS code and state
|
||||
<https://www.ndss-symposium.org/ndss2003/traps-and-pitfalls-practical-problems-system-call-interposition-based-security-tools/>`_).
|
||||
|
||||
What about namespaces and containers?
|
||||
-------------------------------------
|
||||
|
||||
Namespaces can help create sandboxes but they are not designed for
|
||||
access-control and then miss useful features for such use case (e.g. no
|
||||
fine-grained restrictions). Moreover, their complexity can lead to security
|
||||
issues, especially when untrusted processes can manipulate them (cf.
|
||||
`Controlling access to user namespaces <https://lwn.net/Articles/673597/>`_).
|
||||
|
||||
Additional documentation
|
||||
========================
|
||||
|
||||
* :doc:`/security/landlock`
|
||||
* https://landlock.io
|
||||
|
||||
.. Links
|
||||
.. _samples/landlock/sandboxer.c:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/samples/landlock/sandboxer.c
|
||||
|
|
@ -148,6 +148,9 @@ measurement. Since the guest owner knows the initial contents of the guest at
|
|||
boot, the measurement can be verified by comparing it to what the guest owner
|
||||
expects.
|
||||
|
||||
If len is zero on entry, the measurement blob length is written to len and
|
||||
uaddr is unused.
|
||||
|
||||
Parameters (in): struct kvm_sev_launch_measure
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
|
@ -271,6 +274,9 @@ report containing the SHA-256 digest of the guest memory and VMSA passed through
|
|||
commands and signed with the PEK. The digest returned by the command should match the digest
|
||||
used by the guest owner with the KVM_SEV_LAUNCH_MEASURE.
|
||||
|
||||
If len is zero on entry, the measurement blob length is written to len and
|
||||
uaddr is unused.
|
||||
|
||||
Parameters (in): struct kvm_sev_attestation
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
|
@ -284,6 +290,143 @@ Returns: 0 on success, -negative on error
|
|||
__u32 len;
|
||||
};
|
||||
|
||||
11. KVM_SEV_SEND_START
|
||||
----------------------
|
||||
|
||||
The KVM_SEV_SEND_START command can be used by the hypervisor to create an
|
||||
outgoing guest encryption context.
|
||||
|
||||
If session_len is zero on entry, the length of the guest session information is
|
||||
written to session_len and all other fields are not used.
|
||||
|
||||
Parameters (in): struct kvm_sev_send_start
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_sev_send_start {
|
||||
__u32 policy; /* guest policy */
|
||||
|
||||
__u64 pdh_cert_uaddr; /* platform Diffie-Hellman certificate */
|
||||
__u32 pdh_cert_len;
|
||||
|
||||
__u64 plat_certs_uaddr; /* platform certificate chain */
|
||||
__u32 plat_certs_len;
|
||||
|
||||
__u64 amd_certs_uaddr; /* AMD certificate */
|
||||
__u32 amd_certs_len;
|
||||
|
||||
__u64 session_uaddr; /* Guest session information */
|
||||
__u32 session_len;
|
||||
};
|
||||
|
||||
12. KVM_SEV_SEND_UPDATE_DATA
|
||||
----------------------------
|
||||
|
||||
The KVM_SEV_SEND_UPDATE_DATA command can be used by the hypervisor to encrypt the
|
||||
outgoing guest memory region with the encryption context creating using
|
||||
KVM_SEV_SEND_START.
|
||||
|
||||
If hdr_len or trans_len are zero on entry, the length of the packet header and
|
||||
transport region are written to hdr_len and trans_len respectively, and all
|
||||
other fields are not used.
|
||||
|
||||
Parameters (in): struct kvm_sev_send_update_data
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_sev_launch_send_update_data {
|
||||
__u64 hdr_uaddr; /* userspace address containing the packet header */
|
||||
__u32 hdr_len;
|
||||
|
||||
__u64 guest_uaddr; /* the source memory region to be encrypted */
|
||||
__u32 guest_len;
|
||||
|
||||
__u64 trans_uaddr; /* the destination memory region */
|
||||
__u32 trans_len;
|
||||
};
|
||||
|
||||
13. KVM_SEV_SEND_FINISH
|
||||
------------------------
|
||||
|
||||
After completion of the migration flow, the KVM_SEV_SEND_FINISH command can be
|
||||
issued by the hypervisor to delete the encryption context.
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
14. KVM_SEV_SEND_CANCEL
|
||||
------------------------
|
||||
|
||||
After completion of SEND_START, but before SEND_FINISH, the source VMM can issue the
|
||||
SEND_CANCEL command to stop a migration. This is necessary so that a cancelled
|
||||
migration can restart with a new target later.
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
15. KVM_SEV_RECEIVE_START
|
||||
-------------------------
|
||||
|
||||
The KVM_SEV_RECEIVE_START command is used for creating the memory encryption
|
||||
context for an incoming SEV guest. To create the encryption context, the user must
|
||||
provide a guest policy, the platform public Diffie-Hellman (PDH) key and session
|
||||
information.
|
||||
|
||||
Parameters: struct kvm_sev_receive_start (in/out)
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_sev_receive_start {
|
||||
__u32 handle; /* if zero then firmware creates a new handle */
|
||||
__u32 policy; /* guest's policy */
|
||||
|
||||
__u64 pdh_uaddr; /* userspace address pointing to the PDH key */
|
||||
__u32 pdh_len;
|
||||
|
||||
__u64 session_uaddr; /* userspace address which points to the guest session information */
|
||||
__u32 session_len;
|
||||
};
|
||||
|
||||
On success, the 'handle' field contains a new handle and on error, a negative value.
|
||||
|
||||
For more details, see SEV spec Section 6.12.
|
||||
|
||||
16. KVM_SEV_RECEIVE_UPDATE_DATA
|
||||
-------------------------------
|
||||
|
||||
The KVM_SEV_RECEIVE_UPDATE_DATA command can be used by the hypervisor to copy
|
||||
the incoming buffers into the guest memory region with encryption context
|
||||
created during the KVM_SEV_RECEIVE_START.
|
||||
|
||||
Parameters (in): struct kvm_sev_receive_update_data
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
::
|
||||
|
||||
struct kvm_sev_launch_receive_update_data {
|
||||
__u64 hdr_uaddr; /* userspace address containing the packet header */
|
||||
__u32 hdr_len;
|
||||
|
||||
__u64 guest_uaddr; /* the destination guest memory region */
|
||||
__u32 guest_len;
|
||||
|
||||
__u64 trans_uaddr; /* the incoming buffer memory region */
|
||||
__u32 trans_len;
|
||||
};
|
||||
|
||||
17. KVM_SEV_RECEIVE_FINISH
|
||||
--------------------------
|
||||
|
||||
After completion of the migration flow, the KVM_SEV_RECEIVE_FINISH command can be
|
||||
issued by the hypervisor to make the guest ready for execution.
|
||||
|
||||
Returns: 0 on success, -negative on error
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ Errors:
|
|||
|
||||
====== ============================================================
|
||||
EFAULT the msr index list cannot be read from or written to
|
||||
E2BIG the msr index list is to be to fit in the array specified by
|
||||
E2BIG the msr index list is too big to fit in the array specified by
|
||||
the user.
|
||||
====== ============================================================
|
||||
|
||||
|
|
@ -3116,6 +3116,18 @@ optional features it should have. This will cause a reset of the cpu
|
|||
registers to their initial values. If this is not called, KVM_RUN will
|
||||
return ENOEXEC for that vcpu.
|
||||
|
||||
The initial values are defined as:
|
||||
- Processor state:
|
||||
* AArch64: EL1h, D, A, I and F bits set. All other bits
|
||||
are cleared.
|
||||
* AArch32: SVC, A, I and F bits set. All other bits are
|
||||
cleared.
|
||||
- General Purpose registers, including PC and SP: set to 0
|
||||
- FPSIMD/NEON registers: set to 0
|
||||
- SVE registers: set to 0
|
||||
- System registers: Reset to their architecturally defined
|
||||
values as for a warm reset to EL1 (resp. SVC)
|
||||
|
||||
Note that because some registers reflect machine topology, all vcpus
|
||||
should be created before this ioctl is invoked.
|
||||
|
||||
|
|
@ -3335,7 +3347,8 @@ The top 16 bits of the control field are architecture specific control
|
|||
flags which can include the following:
|
||||
|
||||
- KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64]
|
||||
- KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64]
|
||||
- KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
|
||||
- KVM_GUESTDBG_USE_HW: using hardware debug events [arm64]
|
||||
- KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
|
||||
- KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
|
||||
- KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
|
||||
|
|
@ -3358,6 +3371,9 @@ indicating the number of supported registers.
|
|||
For ppc, the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability indicates whether
|
||||
the single-step debug event (KVM_GUESTDBG_SINGLESTEP) is supported.
|
||||
|
||||
Also when supported, KVM_CAP_SET_GUEST_DEBUG2 capability indicates the
|
||||
supported KVM_GUESTDBG_* bits in the control field.
|
||||
|
||||
When debug events exit the main run loop with the reason
|
||||
KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
|
||||
structure containing architecture specific debug information.
|
||||
|
|
@ -3690,31 +3706,105 @@ which is the maximum number of possibly pending cpu-local interrupts.
|
|||
|
||||
Queues an SMI on the thread's vcpu.
|
||||
|
||||
4.97 KVM_CAP_PPC_MULTITCE
|
||||
-------------------------
|
||||
4.97 KVM_X86_SET_MSR_FILTER
|
||||
----------------------------
|
||||
|
||||
:Capability: KVM_CAP_PPC_MULTITCE
|
||||
:Architectures: ppc
|
||||
:Type: vm
|
||||
:Capability: KVM_X86_SET_MSR_FILTER
|
||||
:Architectures: x86
|
||||
:Type: vm ioctl
|
||||
:Parameters: struct kvm_msr_filter
|
||||
:Returns: 0 on success, < 0 on error
|
||||
|
||||
This capability means the kernel is capable of handling hypercalls
|
||||
H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
|
||||
space. This significantly accelerates DMA operations for PPC KVM guests.
|
||||
User space should expect that its handlers for these hypercalls
|
||||
are not going to be called if user space previously registered LIOBN
|
||||
in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
|
||||
::
|
||||
|
||||
In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
|
||||
user space might have to advertise it for the guest. For example,
|
||||
IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
|
||||
present in the "ibm,hypertas-functions" device-tree property.
|
||||
struct kvm_msr_filter_range {
|
||||
#define KVM_MSR_FILTER_READ (1 << 0)
|
||||
#define KVM_MSR_FILTER_WRITE (1 << 1)
|
||||
__u32 flags;
|
||||
__u32 nmsrs; /* number of msrs in bitmap */
|
||||
__u32 base; /* MSR index the bitmap starts at */
|
||||
__u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
|
||||
};
|
||||
|
||||
The hypercalls mentioned above may or may not be processed successfully
|
||||
in the kernel based fast path. If they can not be handled by the kernel,
|
||||
they will get passed on to user space. So user space still has to have
|
||||
an implementation for these despite the in kernel acceleration.
|
||||
#define KVM_MSR_FILTER_MAX_RANGES 16
|
||||
struct kvm_msr_filter {
|
||||
#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
|
||||
#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
|
||||
__u32 flags;
|
||||
struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
|
||||
};
|
||||
|
||||
This capability is always enabled.
|
||||
flags values for ``struct kvm_msr_filter_range``:
|
||||
|
||||
``KVM_MSR_FILTER_READ``
|
||||
|
||||
Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
|
||||
indicates that a read should immediately fail, while a 1 indicates that
|
||||
a read for a particular MSR should be handled regardless of the default
|
||||
filter action.
|
||||
|
||||
``KVM_MSR_FILTER_WRITE``
|
||||
|
||||
Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
|
||||
indicates that a write should immediately fail, while a 1 indicates that
|
||||
a write for a particular MSR should be handled regardless of the default
|
||||
filter action.
|
||||
|
||||
``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE``
|
||||
|
||||
Filter both read and write accesses to MSRs using the given bitmap. A 0
|
||||
in the bitmap indicates that both reads and writes should immediately fail,
|
||||
while a 1 indicates that reads and writes for a particular MSR are not
|
||||
filtered by this range.
|
||||
|
||||
flags values for ``struct kvm_msr_filter``:
|
||||
|
||||
``KVM_MSR_FILTER_DEFAULT_ALLOW``
|
||||
|
||||
If no filter range matches an MSR index that is getting accessed, KVM will
|
||||
fall back to allowing access to the MSR.
|
||||
|
||||
``KVM_MSR_FILTER_DEFAULT_DENY``
|
||||
|
||||
If no filter range matches an MSR index that is getting accessed, KVM will
|
||||
fall back to rejecting access to the MSR. In this mode, all MSRs that should
|
||||
be processed by KVM need to explicitly be marked as allowed in the bitmaps.
|
||||
|
||||
This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
|
||||
specify whether a certain MSR access should be explicitly filtered for or not.
|
||||
|
||||
If this ioctl has never been invoked, MSR accesses are not guarded and the
|
||||
default KVM in-kernel emulation behavior is fully preserved.
|
||||
|
||||
Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
|
||||
filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
|
||||
an error.
|
||||
|
||||
As soon as the filtering is in place, every MSR access is processed through
|
||||
the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff);
|
||||
x2APIC MSRs are always allowed, independent of the ``default_allow`` setting,
|
||||
and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base
|
||||
register.
|
||||
|
||||
If a bit is within one of the defined ranges, read and write accesses are
|
||||
guarded by the bitmap's value for the MSR index if the kind of access
|
||||
is included in the ``struct kvm_msr_filter_range`` flags. If no range
|
||||
cover this particular access, the behavior is determined by the flags
|
||||
field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW``
|
||||
and ``KVM_MSR_FILTER_DEFAULT_DENY``.
|
||||
|
||||
Each bitmap range specifies a range of MSRs to potentially allow access on.
|
||||
The range goes from MSR index [base .. base+nmsrs]. The flags field
|
||||
indicates whether reads, writes or both reads and writes are filtered
|
||||
by setting a 1 bit in the bitmap for the corresponding MSR index.
|
||||
|
||||
If an MSR access is not permitted through the filtering, it generates a
|
||||
#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
|
||||
allows user space to deflect and potentially handle various MSR accesses
|
||||
into user space.
|
||||
|
||||
If a vCPU is in running state while this ioctl is invoked, the vCPU may
|
||||
experience inconsistent filtering behavior on MSR accesses.
|
||||
|
||||
4.98 KVM_CREATE_SPAPR_TCE_64
|
||||
----------------------------
|
||||
|
|
@ -4855,7 +4945,7 @@ KVM_XEN_ATTR_TYPE_SHARED_INFO
|
|||
KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
|
||||
Sets the exception vector used to deliver Xen event channel upcalls.
|
||||
|
||||
4.128 KVM_XEN_HVM_GET_ATTR
|
||||
4.127 KVM_XEN_HVM_GET_ATTR
|
||||
--------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
|
|
@ -4867,7 +4957,7 @@ KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
|
|||
Allows Xen VM attributes to be read. For the structure and types,
|
||||
see KVM_XEN_HVM_SET_ATTR above.
|
||||
|
||||
4.129 KVM_XEN_VCPU_SET_ATTR
|
||||
4.128 KVM_XEN_VCPU_SET_ATTR
|
||||
---------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
|
|
@ -4929,7 +5019,7 @@ KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
|
|||
or RUNSTATE_offline) to set the current accounted state as of the
|
||||
adjusted state_entry_time.
|
||||
|
||||
4.130 KVM_XEN_VCPU_GET_ATTR
|
||||
4.129 KVM_XEN_VCPU_GET_ATTR
|
||||
---------------------------
|
||||
|
||||
:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
|
||||
|
|
@ -6233,6 +6323,45 @@ KVM_RUN_BUS_LOCK flag is used to distinguish between them.
|
|||
This capability can be used to check / enable 2nd DAWR feature provided
|
||||
by POWER10 processor.
|
||||
|
||||
7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
|
||||
-------------------------------------
|
||||
|
||||
Architectures: x86 SEV enabled
|
||||
Type: vm
|
||||
Parameters: args[0] is the fd of the source vm
|
||||
Returns: 0 on success; ENOTTY on error
|
||||
|
||||
This capability enables userspace to copy encryption context from the vm
|
||||
indicated by the fd to the vm this is called on.
|
||||
|
||||
This is intended to support in-guest workloads scheduled by the host. This
|
||||
allows the in-guest workload to maintain its own NPTs and keeps the two vms
|
||||
from accidentally clobbering each other with interrupts and the like (separate
|
||||
APIC/MSRs/etc).
|
||||
|
||||
7.25 KVM_CAP_SGX_ATTRIBUTE
|
||||
--------------------------
|
||||
|
||||
:Architectures: x86
|
||||
:Target: VM
|
||||
:Parameters: args[0] is a file handle of a SGX attribute file in securityfs
|
||||
:Returns: 0 on success, -EINVAL if the file handle is invalid or if a requested
|
||||
attribute is not supported by KVM.
|
||||
|
||||
KVM_CAP_SGX_ATTRIBUTE enables a userspace VMM to grant a VM access to one or
|
||||
more priveleged enclave attributes. args[0] must hold a file handle to a valid
|
||||
SGX attribute file corresponding to an attribute that is supported/restricted
|
||||
by KVM (currently only PROVISIONKEY).
|
||||
|
||||
The SGX subsystem restricts access to a subset of enclave attributes to provide
|
||||
additional security for an uncompromised kernel, e.g. use of the PROVISIONKEY
|
||||
is restricted to deter malware from using the PROVISIONKEY to obtain a stable
|
||||
system fingerprint. To prevent userspace from circumventing such restrictions
|
||||
by running an enclave in a VM, KVM prevents access to privileged attributes by
|
||||
default.
|
||||
|
||||
See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
|
|
@ -6727,3 +6856,38 @@ vcpu_info is set.
|
|||
The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
|
||||
features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
|
||||
supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
|
||||
|
||||
8.31 KVM_CAP_PPC_MULTITCE
|
||||
-------------------------
|
||||
|
||||
:Capability: KVM_CAP_PPC_MULTITCE
|
||||
:Architectures: ppc
|
||||
:Type: vm
|
||||
|
||||
This capability means the kernel is capable of handling hypercalls
|
||||
H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
|
||||
space. This significantly accelerates DMA operations for PPC KVM guests.
|
||||
User space should expect that its handlers for these hypercalls
|
||||
are not going to be called if user space previously registered LIOBN
|
||||
in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
|
||||
|
||||
In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
|
||||
user space might have to advertise it for the guest. For example,
|
||||
IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
|
||||
present in the "ibm,hypertas-functions" device-tree property.
|
||||
|
||||
The hypercalls mentioned above may or may not be processed successfully
|
||||
in the kernel based fast path. If they can not be handled by the kernel,
|
||||
they will get passed on to user space. So user space still has to have
|
||||
an implementation for these despite the in kernel acceleration.
|
||||
|
||||
This capability is always enabled.
|
||||
|
||||
8.32 KVM_CAP_PTP_KVM
|
||||
--------------------
|
||||
|
||||
:Architectures: arm64
|
||||
|
||||
This capability indicates that the KVM virtual PTP service is
|
||||
supported in the host. A VMM can check whether the service is
|
||||
available to the guest on migration.
|
||||
|
|
|
|||
|
|
@ -10,3 +10,4 @@ ARM
|
|||
hyp-abi
|
||||
psci
|
||||
pvtime
|
||||
ptp_kvm
|
||||
|
|
|
|||
25
Documentation/virt/kvm/arm/ptp_kvm.rst
Normal file
25
Documentation/virt/kvm/arm/ptp_kvm.rst
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
PTP_KVM support for arm/arm64
|
||||
=============================
|
||||
|
||||
PTP_KVM is used for high precision time sync between host and guests.
|
||||
It relies on transferring the wall clock and counter value from the
|
||||
host to the guest using a KVM-specific hypercall.
|
||||
|
||||
* ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: 0x86000001
|
||||
|
||||
This hypercall uses the SMC32/HVC32 calling convention:
|
||||
|
||||
ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID
|
||||
============== ======== =====================================
|
||||
Function ID: (uint32) 0x86000001
|
||||
Arguments: (uint32) KVM_PTP_VIRT_COUNTER(0)
|
||||
KVM_PTP_PHYS_COUNTER(1)
|
||||
Return Values: (int32) NOT_SUPPORTED(-1) on error, or
|
||||
(uint32) Upper 32 bits of wall clock time (r0)
|
||||
(uint32) Lower 32 bits of wall clock time (r1)
|
||||
(uint32) Upper 32 bits of counter (r2)
|
||||
(uint32) Lower 32 bits of counter (r3)
|
||||
Endianness: No Restrictions.
|
||||
============== ======== =====================================
|
||||
|
|
@ -80,7 +80,7 @@ KVM_DEV_ARM_VGIC_GRP_CTRL
|
|||
-EFAULT Invalid guest ram access
|
||||
-EBUSY One or more VCPUS are running
|
||||
-EACCES The virtual ITS is backed by a physical GICv4 ITS, and the
|
||||
state is not available
|
||||
state is not available without GICv4.1
|
||||
======= ==========================================================
|
||||
|
||||
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ Groups:
|
|||
|
||||
KVM_DEV_ARM_VGIC_CTRL_INIT
|
||||
request the initialization of the VGIC, no additional parameter in
|
||||
kvm_device_attr.addr.
|
||||
kvm_device_attr.addr. Must be called after all VCPUs have been created.
|
||||
KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES
|
||||
save all LPI pending bits into guest RAM pending tables.
|
||||
|
||||
|
|
|
|||
|
|
@ -38,25 +38,24 @@ the mmu-lock on x86. Currently, the page fault can be fast in one of the
|
|||
following two cases:
|
||||
|
||||
1. Access Tracking: The SPTE is not present, but it is marked for access
|
||||
tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
|
||||
restore the saved R/X bits. This is described in more detail later below.
|
||||
tracking. That means we need to restore the saved R/X bits. This is
|
||||
described in more detail later below.
|
||||
|
||||
2. Write-Protection: The SPTE is present and the fault is
|
||||
caused by write-protect. That means we just need to change the W bit of
|
||||
the spte.
|
||||
2. Write-Protection: The SPTE is present and the fault is caused by
|
||||
write-protect. That means we just need to change the W bit of the spte.
|
||||
|
||||
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
|
||||
SPTE_MMU_WRITEABLE bit on the spte:
|
||||
What we use to avoid all the race is the Host-writable bit and MMU-writable bit
|
||||
on the spte:
|
||||
|
||||
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
|
||||
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
|
||||
the gfn is writable on guest mmu and it is not write-protected by shadow
|
||||
page write-protection.
|
||||
- Host-writable means the gfn is writable in the host kernel page tables and in
|
||||
its KVM memslot.
|
||||
- MMU-writable means the gfn is writable in the guest's mmu and it is not
|
||||
write-protected by shadow page write-protection.
|
||||
|
||||
On fast page fault path, we will use cmpxchg to atomically set the spte W
|
||||
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or
|
||||
restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
|
||||
is safe because whenever changing these bits can be detected by cmpxchg.
|
||||
bit if spte.HOST_WRITEABLE = 1 and spte.WRITE_PROTECT = 1, to restore the saved
|
||||
R/X bits if for an access-traced spte, or both. This is safe because whenever
|
||||
changing these bits can be detected by cmpxchg.
|
||||
|
||||
But we need carefully check these cases:
|
||||
|
||||
|
|
@ -185,17 +184,17 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
|
|||
Lockless Access Tracking:
|
||||
|
||||
This is used for Intel CPUs that are using EPT but do not support the EPT A/D
|
||||
bits. In this case, when the KVM MMU notifier is called to track accesses to a
|
||||
page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
|
||||
by clearing the RWX bits in the PTE and storing the original R & X bits in
|
||||
some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
|
||||
PTE (using the ignored bit 62). When the VM tries to access the page later on,
|
||||
a fault is generated and the fast page fault mechanism described above is used
|
||||
to atomically restore the PTE to a Present state. The W bit is not saved when
|
||||
the PTE is marked for access tracking and during restoration to the Present
|
||||
state, the W bit is set depending on whether or not it was a write access. If
|
||||
it wasn't, then the W bit will remain clear until a write access happens, at
|
||||
which time it will be set using the Dirty tracking mechanism described above.
|
||||
bits. In this case, PTEs are tagged as A/D disabled (using ignored bits), and
|
||||
when the KVM MMU notifier is called to track accesses to a page (via
|
||||
kvm_mmu_notifier_clear_flush_young), it marks the PTE not-present in hardware
|
||||
by clearing the RWX bits in the PTE and storing the original R & X bits in more
|
||||
unused/ignored bits. When the VM tries to access the page later on, a fault is
|
||||
generated and the fast page fault mechanism described above is used to
|
||||
atomically restore the PTE to a Present state. The W bit is not saved when the
|
||||
PTE is marked for access tracking and during restoration to the Present state,
|
||||
the W bit is set depending on whether or not it was a write access. If it
|
||||
wasn't, then the W bit will remain clear until a write access happens, at which
|
||||
time it will be set using the Dirty tracking mechanism described above.
|
||||
|
||||
3. Reference
|
||||
------------
|
||||
|
|
|
|||
|
|
@ -84,3 +84,36 @@ If the function code specifies 0x501, breakpoint functions may be performed.
|
|||
This function code is handled by userspace.
|
||||
|
||||
This diagnose function code has no subfunctions and uses no parameters.
|
||||
|
||||
|
||||
DIAGNOSE function code 'X'9C - Voluntary Time Slice Yield
|
||||
---------------------------------------------------------
|
||||
|
||||
General register 1 contains the target CPU address.
|
||||
|
||||
In a guest of a hypervisor like LPAR, KVM or z/VM using shared host CPUs,
|
||||
DIAGNOSE with function code 0x9c may improve system performance by
|
||||
yielding the host CPU on which the guest CPU is running to be assigned
|
||||
to another guest CPU, preferably the logical CPU containing the specified
|
||||
target CPU.
|
||||
|
||||
|
||||
DIAG 'X'9C forwarding
|
||||
+++++++++++++++++++++
|
||||
|
||||
The guest may send a DIAGNOSE 0x9c in order to yield to a certain
|
||||
other vcpu. An example is a Linux guest that tries to yield to the vcpu
|
||||
that is currently holding a spinlock, but not running.
|
||||
|
||||
However, on the host the real cpu backing the vcpu may itself not be
|
||||
running.
|
||||
Forwarding the DIAGNOSE 0x9c initially sent by the guest to yield to
|
||||
the backing cpu will hopefully cause that cpu, and thus subsequently
|
||||
the guest's vcpu, to be scheduled.
|
||||
|
||||
|
||||
diag9c_forwarding_hz
|
||||
KVM kernel parameter allowing to specify the maximum number of DIAGNOSE
|
||||
0x9c forwarding per second in the purpose of avoiding a DIAGNOSE 0x9c
|
||||
forwarding storm.
|
||||
A value of 0 turns the forwarding off.
|
||||
|
|
|
|||
28
MAINTAINERS
28
MAINTAINERS
|
|
@ -1783,6 +1783,8 @@ F: Documentation/ABI/testing/sysfs-bus-coresight-devices-*
|
|||
F: Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
|
||||
F: Documentation/devicetree/bindings/arm/coresight-cti.yaml
|
||||
F: Documentation/devicetree/bindings/arm/coresight.txt
|
||||
F: Documentation/devicetree/bindings/arm/ete.yaml
|
||||
F: Documentation/devicetree/bindings/arm/trbe.yaml
|
||||
F: Documentation/trace/coresight/*
|
||||
F: drivers/hwtracing/coresight/*
|
||||
F: include/dt-bindings/arm/coresight-cti-dt.h
|
||||
|
|
@ -8227,7 +8229,6 @@ F: drivers/crypto/hisilicon/zip/
|
|||
|
||||
HISILICON ROCE DRIVER
|
||||
M: Lijun Ou <oulijun@huawei.com>
|
||||
M: Wei Hu(Xavier) <huwei87@hisilicon.com>
|
||||
M: Weihang Li <liweihang@huawei.com>
|
||||
L: linux-rdma@vger.kernel.org
|
||||
S: Maintained
|
||||
|
|
@ -9958,10 +9959,10 @@ F: virt/kvm/*
|
|||
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
|
||||
M: Marc Zyngier <maz@kernel.org>
|
||||
R: James Morse <james.morse@arm.com>
|
||||
R: Julien Thierry <julien.thierry.kdev@gmail.com>
|
||||
R: Alexandru Elisei <alexandru.elisei@arm.com>
|
||||
R: Suzuki K Poulose <suzuki.poulose@arm.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
L: kvmarm@lists.cs.columbia.edu
|
||||
L: kvmarm@lists.cs.columbia.edu (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
|
||||
F: arch/arm64/include/asm/kvm*
|
||||
|
|
@ -10198,6 +10199,21 @@ F: net/core/sock_map.c
|
|||
F: net/ipv4/tcp_bpf.c
|
||||
F: net/ipv4/udp_bpf.c
|
||||
|
||||
LANDLOCK SECURITY MODULE
|
||||
M: Mickaël Salaün <mic@digikod.net>
|
||||
L: linux-security-module@vger.kernel.org
|
||||
S: Supported
|
||||
W: https://landlock.io
|
||||
T: git https://github.com/landlock-lsm/linux.git
|
||||
F: Documentation/security/landlock.rst
|
||||
F: Documentation/userspace-api/landlock.rst
|
||||
F: include/uapi/linux/landlock.h
|
||||
F: samples/landlock/
|
||||
F: security/landlock/
|
||||
F: tools/testing/selftests/landlock/
|
||||
K: landlock
|
||||
K: LANDLOCK
|
||||
|
||||
LANTIQ / INTEL Ethernet drivers
|
||||
M: Hauke Mehrtens <hauke@hauke-m.de>
|
||||
L: netdev@vger.kernel.org
|
||||
|
|
@ -14296,8 +14312,10 @@ R: Mark Rutland <mark.rutland@arm.com>
|
|||
R: Alexander Shishkin <alexander.shishkin@linux.intel.com>
|
||||
R: Jiri Olsa <jolsa@redhat.com>
|
||||
R: Namhyung Kim <namhyung@kernel.org>
|
||||
L: linux-perf-users@vger.kernel.org
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Supported
|
||||
W: https://perf.wiki.kernel.org/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
|
||||
F: arch/*/events/*
|
||||
F: arch/*/events/*/*
|
||||
|
|
@ -15833,8 +15851,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8xxxu-deve
|
|||
F: drivers/net/wireless/realtek/rtl8xxxu/
|
||||
|
||||
RTRS TRANSPORT DRIVERS
|
||||
M: Danil Kipnis <danil.kipnis@cloud.ionos.com>
|
||||
M: Jack Wang <jinpu.wang@cloud.ionos.com>
|
||||
M: Md. Haris Iqbal <haris.iqbal@ionos.com>
|
||||
M: Jack Wang <jinpu.wang@ionos.com>
|
||||
L: linux-rdma@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/infiniband/ulp/rtrs/
|
||||
|
|
|
|||
6
Makefile
6
Makefile
|
|
@ -1543,9 +1543,9 @@ MRPROPER_FILES += include/config include/generated \
|
|||
debian snap tar-install \
|
||||
.config .config.old .version \
|
||||
Module.symvers \
|
||||
signing_key.pem signing_key.priv signing_key.x509 \
|
||||
x509.genkey extra_certificates signing_key.x509.keyid \
|
||||
signing_key.x509.signer vmlinux-gdb.py \
|
||||
certs/signing_key.pem certs/signing_key.x509 \
|
||||
certs/x509.genkey \
|
||||
vmlinux-gdb.py \
|
||||
*.spec
|
||||
|
||||
# Directories & files removed with 'make distclean'
|
||||
|
|
|
|||
|
|
@ -1068,6 +1068,13 @@ config COMPAT_32BIT_TIME
|
|||
config ARCH_NO_PREEMPT
|
||||
bool
|
||||
|
||||
config ARCH_EPHEMERAL_INODES
|
||||
def_bool n
|
||||
help
|
||||
An arch should select this symbol if it doesn't keep track of inode
|
||||
instances on its own, but instead relies on something else (e.g. the
|
||||
host kernel for an UML kernel).
|
||||
|
||||
config ARCH_SUPPORTS_RT
|
||||
bool
|
||||
|
||||
|
|
|
|||
|
|
@ -483,3 +483,6 @@
|
|||
551 common epoll_pwait2 sys_epoll_pwait2
|
||||
552 common mount_setattr sys_mount_setattr
|
||||
553 common quotactl_path sys_quotactl_path
|
||||
554 common landlock_create_ruleset sys_landlock_create_ruleset
|
||||
555 common landlock_add_rule sys_landlock_add_rule
|
||||
556 common landlock_restrict_self sys_landlock_restrict_self
|
||||
|
|
|
|||
|
|
@ -4,4 +4,7 @@
|
|||
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
void kvm_init_hyp_services(void);
|
||||
bool kvm_arm_hyp_service_available(u32 func_id);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -457,3 +457,6 @@
|
|||
441 common epoll_pwait2 sys_epoll_pwait2
|
||||
442 common mount_setattr sys_mount_setattr
|
||||
443 common quotactl_path sys_quotactl_path
|
||||
444 common landlock_create_ruleset sys_landlock_create_ruleset
|
||||
445 common landlock_add_rule sys_landlock_add_rule
|
||||
446 common landlock_restrict_self sys_landlock_restrict_self
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/asm-bug.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/debug-monitors.h>
|
||||
|
|
@ -279,12 +280,24 @@ alternative_endif
|
|||
* provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
|
||||
*/
|
||||
.macro read_ctr, reg
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
|
||||
mrs \reg, ctr_el0 // read CTR
|
||||
nop
|
||||
alternative_else
|
||||
ldr_l \reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
|
||||
alternative_endif
|
||||
#else
|
||||
alternative_if_not ARM64_KVM_PROTECTED_MODE
|
||||
ASM_BUG()
|
||||
alternative_else_nop_endif
|
||||
alternative_cb kvm_compute_final_ctr_el0
|
||||
movz \reg, #0
|
||||
movk \reg, #0, lsl #16
|
||||
movk \reg, #0, lsl #32
|
||||
movk \reg, #0, lsl #48
|
||||
alternative_cb_end
|
||||
#endif
|
||||
.endm
|
||||
|
||||
|
||||
|
|
@ -685,11 +698,11 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
|
|||
.endm
|
||||
|
||||
/*
|
||||
* Set SCTLR_EL1 to the passed value, and invalidate the local icache
|
||||
* Set SCTLR_ELx to the @reg value, and invalidate the local icache
|
||||
* in the process. This is called when setting the MMU on.
|
||||
*/
|
||||
.macro set_sctlr_el1, reg
|
||||
msr sctlr_el1, \reg
|
||||
.macro set_sctlr, sreg, reg
|
||||
msr \sreg, \reg
|
||||
isb
|
||||
/*
|
||||
* Invalidate the local I-cache so that any instructions fetched
|
||||
|
|
@ -701,6 +714,14 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
|
|||
isb
|
||||
.endm
|
||||
|
||||
.macro set_sctlr_el1, reg
|
||||
set_sctlr sctlr_el1, \reg
|
||||
.endm
|
||||
|
||||
.macro set_sctlr_el2, reg
|
||||
set_sctlr sctlr_el2, \reg
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Check whether preempt/bh-disabled asm code should yield as soon as
|
||||
* it is able. This is the case if we are currently running in task
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
|
||||
|
||||
#define psb_csync() asm volatile("hint #17" : : : "memory")
|
||||
#define tsb_csync() asm volatile("hint #18" : : : "memory")
|
||||
#define csdb() asm volatile("hint #20" : : : "memory")
|
||||
|
||||
#ifdef CONFIG_ARM64_PSEUDO_NMI
|
||||
|
|
|
|||
|
|
@ -65,6 +65,19 @@
|
|||
// use EL1&0 translation.
|
||||
|
||||
.Lskip_spe_\@:
|
||||
/* Trace buffer */
|
||||
ubfx x0, x1, #ID_AA64DFR0_TRBE_SHIFT, #4
|
||||
cbz x0, .Lskip_trace_\@ // Skip if TraceBuffer is not present
|
||||
|
||||
mrs_s x0, SYS_TRBIDR_EL1
|
||||
and x0, x0, TRBIDR_PROG
|
||||
cbnz x0, .Lskip_trace_\@ // If TRBE is available at EL2
|
||||
|
||||
mov x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
|
||||
orr x2, x2, x0 // allow the EL1&0 translation
|
||||
// to own it.
|
||||
|
||||
.Lskip_trace_\@:
|
||||
msr mdcr_el2, x2 // Configure debug traps
|
||||
.endm
|
||||
|
||||
|
|
|
|||
|
|
@ -131,6 +131,15 @@ static inline void sve_user_enable(void)
|
|||
sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_ZEN_EL0EN);
|
||||
}
|
||||
|
||||
#define sve_cond_update_zcr_vq(val, reg) \
|
||||
do { \
|
||||
u64 __zcr = read_sysreg_s((reg)); \
|
||||
u64 __new = __zcr & ~ZCR_ELx_LEN_MASK; \
|
||||
__new |= (val) & ZCR_ELx_LEN_MASK; \
|
||||
if (__zcr != __new) \
|
||||
write_sysreg_s(__new, (reg)); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Probing and setup functions.
|
||||
* Calls to these functions must be serialised with one another.
|
||||
|
|
@ -160,6 +169,8 @@ static inline int sve_get_current_vl(void)
|
|||
static inline void sve_user_disable(void) { BUILD_BUG(); }
|
||||
static inline void sve_user_enable(void) { BUILD_BUG(); }
|
||||
|
||||
#define sve_cond_update_zcr_vq(val, reg) do { } while (0)
|
||||
|
||||
static inline void sve_init_vq_map(void) { }
|
||||
static inline void sve_update_vq_map(void) { }
|
||||
static inline int sve_verify_vq_map(void) { return 0; }
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
* Author: Catalin Marinas <catalin.marinas@arm.com>
|
||||
*/
|
||||
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.macro fpsimd_save state, tmpnr
|
||||
stp q0, q1, [\state, #16 * 0]
|
||||
stp q2, q3, [\state, #16 * 2]
|
||||
|
|
@ -230,8 +232,7 @@
|
|||
str w\nxtmp, [\xpfpsr, #4]
|
||||
.endm
|
||||
|
||||
.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
|
||||
sve_load_vq \xvqminus1, x\nxtmp, \xtmp2
|
||||
.macro __sve_load nxbase, xpfpsr, nxtmp
|
||||
_for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
|
||||
_sve_ldr_p 0, \nxbase
|
||||
_sve_wrffr 0
|
||||
|
|
@ -242,3 +243,8 @@
|
|||
ldr w\nxtmp, [\xpfpsr, #4]
|
||||
msr fpcr, x\nxtmp
|
||||
.endm
|
||||
|
||||
.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
|
||||
sve_load_vq \xvqminus1, x\nxtmp, \xtmp2
|
||||
__sve_load \nxbase, \xpfpsr, \nxtmp
|
||||
.endm
|
||||
|
|
|
|||
|
|
@ -10,11 +10,15 @@
|
|||
#define __HYP_CONCAT(a, b) a ## b
|
||||
#define HYP_CONCAT(a, b) __HYP_CONCAT(a, b)
|
||||
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
/*
|
||||
* KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_,
|
||||
* to separate it from the kernel proper.
|
||||
*/
|
||||
#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym
|
||||
#else
|
||||
#define kvm_nvhe_sym(sym) sym
|
||||
#endif
|
||||
|
||||
#ifdef LINKER_SCRIPT
|
||||
|
||||
|
|
@ -56,6 +60,9 @@
|
|||
*/
|
||||
#define KVM_NVHE_ALIAS(sym) kvm_nvhe_sym(sym) = sym;
|
||||
|
||||
/* Defines a linker script alias for KVM nVHE hyp symbols */
|
||||
#define KVM_NVHE_ALIAS_HYP(first, sec) kvm_nvhe_sym(first) = kvm_nvhe_sym(sec);
|
||||
|
||||
#endif /* LINKER_SCRIPT */
|
||||
|
||||
#endif /* __ARM64_HYP_IMAGE_H__ */
|
||||
|
|
|
|||
|
|
@ -4,4 +4,7 @@
|
|||
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
void kvm_init_hyp_services(void);
|
||||
bool kvm_arm_hyp_service_available(u32 func_id);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -278,6 +278,8 @@
|
|||
#define CPTR_EL2_DEFAULT CPTR_EL2_RES1
|
||||
|
||||
/* Hyp Debug Configuration Register bits */
|
||||
#define MDCR_EL2_E2TB_MASK (UL(0x3))
|
||||
#define MDCR_EL2_E2TB_SHIFT (UL(24))
|
||||
#define MDCR_EL2_TTRF (1 << 19)
|
||||
#define MDCR_EL2_TPMS (1 << 14)
|
||||
#define MDCR_EL2_E2PB_MASK (UL(0x3))
|
||||
|
|
|
|||
|
|
@ -57,6 +57,12 @@
|
|||
#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13
|
||||
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings 16
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
|
||||
#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
|
|
@ -154,6 +160,9 @@ struct kvm_nvhe_init_params {
|
|||
unsigned long tpidr_el2;
|
||||
unsigned long stack_hyp_va;
|
||||
phys_addr_t pgd_pa;
|
||||
unsigned long hcr_el2;
|
||||
unsigned long vttbr;
|
||||
unsigned long vtcr;
|
||||
};
|
||||
|
||||
/* Translate a kernel address @ptr into its equivalent linear mapping */
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ struct kvm_s2_mmu {
|
|||
/* The last vcpu id that ran on each physical CPU */
|
||||
int __percpu *last_vcpu_ran;
|
||||
|
||||
struct kvm *kvm;
|
||||
struct kvm_arch *arch;
|
||||
};
|
||||
|
||||
struct kvm_arch_memory_slot {
|
||||
|
|
@ -315,6 +315,8 @@ struct kvm_vcpu_arch {
|
|||
struct kvm_guest_debug_arch regs;
|
||||
/* Statistical profiling extension */
|
||||
u64 pmscr_el1;
|
||||
/* Self-hosted trace */
|
||||
u64 trfcr_el1;
|
||||
} host_debug_state;
|
||||
|
||||
/* VGIC state */
|
||||
|
|
@ -372,8 +374,10 @@ struct kvm_vcpu_arch {
|
|||
};
|
||||
|
||||
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
|
||||
#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
|
||||
sve_ffr_offset((vcpu)->arch.sve_max_vl)))
|
||||
#define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \
|
||||
sve_ffr_offset((vcpu)->arch.sve_max_vl))
|
||||
|
||||
#define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl)
|
||||
|
||||
#define vcpu_sve_state_size(vcpu) ({ \
|
||||
size_t __size_ret; \
|
||||
|
|
@ -382,7 +386,7 @@ struct kvm_vcpu_arch {
|
|||
if (WARN_ON(!sve_vl_valid((vcpu)->arch.sve_max_vl))) { \
|
||||
__size_ret = 0; \
|
||||
} else { \
|
||||
__vcpu_vq = sve_vq_from_vl((vcpu)->arch.sve_max_vl); \
|
||||
__vcpu_vq = vcpu_sve_max_vq(vcpu); \
|
||||
__size_ret = SVE_SIG_REGS_SIZE(__vcpu_vq); \
|
||||
} \
|
||||
\
|
||||
|
|
@ -400,7 +404,13 @@ struct kvm_vcpu_arch {
|
|||
#define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */
|
||||
#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */
|
||||
#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */
|
||||
#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */
|
||||
#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */
|
||||
|
||||
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
|
||||
KVM_GUESTDBG_USE_SW_BP | \
|
||||
KVM_GUESTDBG_USE_HW | \
|
||||
KVM_GUESTDBG_SINGLESTEP)
|
||||
/*
|
||||
* When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can
|
||||
* take the following values:
|
||||
|
|
@ -582,15 +592,11 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
|
|||
struct kvm_vcpu_events *events);
|
||||
|
||||
#define KVM_ARCH_WANT_MMU_NOTIFIER
|
||||
int kvm_unmap_hva_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end, unsigned flags);
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
|
||||
|
||||
void kvm_arm_halt_guest(struct kvm *kvm);
|
||||
void kvm_arm_resume_guest(struct kvm *kvm);
|
||||
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
#define kvm_call_hyp_nvhe(f, ...) \
|
||||
({ \
|
||||
struct arm_smccc_res res; \
|
||||
|
|
@ -630,9 +636,13 @@ void kvm_arm_resume_guest(struct kvm *kvm);
|
|||
\
|
||||
ret; \
|
||||
})
|
||||
#else /* __KVM_NVHE_HYPERVISOR__ */
|
||||
#define kvm_call_hyp(f, ...) f(__VA_ARGS__)
|
||||
#define kvm_call_hyp_ret(f, ...) f(__VA_ARGS__)
|
||||
#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
|
||||
#endif /* __KVM_NVHE_HYPERVISOR__ */
|
||||
|
||||
void force_vm_exit(const cpumask_t *mask);
|
||||
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
|
||||
|
||||
int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
|
||||
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
|
||||
|
|
@ -692,19 +702,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
|
|||
ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
|
||||
}
|
||||
|
||||
static inline bool kvm_arch_requires_vhe(void)
|
||||
{
|
||||
/*
|
||||
* The Arm architecture specifies that implementation of SVE
|
||||
* requires VHE also to be implemented. The KVM code for arm64
|
||||
* relies on this when SVE is present:
|
||||
*/
|
||||
if (system_supports_sve())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
|
||||
|
||||
static inline void kvm_arch_hardware_unsetup(void) {}
|
||||
|
|
@ -713,6 +710,7 @@ static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
|
|||
static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
|
||||
|
||||
void kvm_arm_init_debug(void);
|
||||
void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
|
||||
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
|
||||
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
|
||||
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
|
||||
|
|
@ -734,6 +732,10 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
|
|||
return (!has_vhe() && attr->exclude_host);
|
||||
}
|
||||
|
||||
/* Flags for host debug state */
|
||||
void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu);
|
||||
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu);
|
||||
|
||||
#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */
|
||||
static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
|
|
@ -771,5 +773,12 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
|
|||
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
|
||||
|
||||
int kvm_trng_call(struct kvm_vcpu *vcpu);
|
||||
#ifdef CONFIG_KVM
|
||||
extern phys_addr_t hyp_mem_base;
|
||||
extern phys_addr_t hyp_mem_size;
|
||||
void __init kvm_hyp_reserve(void);
|
||||
#else
|
||||
static inline void kvm_hyp_reserve(void) { }
|
||||
#endif
|
||||
|
||||
#endif /* __ARM64_KVM_HOST_H__ */
|
||||
|
|
|
|||
|
|
@ -90,6 +90,8 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
|
|||
|
||||
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
|
||||
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
|
||||
void __sve_save_state(void *sve_pffr, u32 *fpsr);
|
||||
void __sve_restore_state(void *sve_pffr, u32 *fpsr);
|
||||
|
||||
#ifndef __KVM_NVHE_HYPERVISOR__
|
||||
void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
|
||||
|
|
@ -100,10 +102,20 @@ u64 __guest_enter(struct kvm_vcpu *vcpu);
|
|||
|
||||
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
|
||||
|
||||
void __noreturn hyp_panic(void);
|
||||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
|
||||
u64 elr, u64 par);
|
||||
#endif
|
||||
|
||||
#ifdef __KVM_NVHE_HYPERVISOR__
|
||||
void __pkvm_init_switch_pgd(phys_addr_t phys, unsigned long size,
|
||||
phys_addr_t pgd, void *sp, void *cont_fn);
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
||||
unsigned long *per_cpu_base, u32 hyp_va_bits);
|
||||
void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
|
||||
#endif
|
||||
|
||||
extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
|
||||
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
|
||||
|
||||
#endif /* __ARM64_KVM_HYP_H__ */
|
||||
|
|
|
|||
|
|
@ -121,6 +121,8 @@ void kvm_update_va_mask(struct alt_instr *alt,
|
|||
void kvm_compute_layout(void);
|
||||
void kvm_apply_hyp_relocations(void);
|
||||
|
||||
#define __hyp_pa(x) (((phys_addr_t)(x)) + hyp_physvirt_offset)
|
||||
|
||||
static __always_inline unsigned long __kern_hyp_va(unsigned long v)
|
||||
{
|
||||
asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n"
|
||||
|
|
@ -166,7 +168,15 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
|
|||
|
||||
phys_addr_t kvm_mmu_get_httbr(void);
|
||||
phys_addr_t kvm_get_idmap_vector(void);
|
||||
int kvm_mmu_init(void);
|
||||
int kvm_mmu_init(u32 *hyp_va_bits);
|
||||
|
||||
static inline void *__kvm_vector_slot2addr(void *base,
|
||||
enum arm64_hyp_spectre_vector slot)
|
||||
{
|
||||
int idx = slot - (slot != HYP_VECTOR_DIRECT);
|
||||
|
||||
return base + (idx * SZ_2K);
|
||||
}
|
||||
|
||||
struct kvm;
|
||||
|
||||
|
|
@ -262,9 +272,9 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
|
|||
* Must be called from hyp code running at EL2 with an updated VTTBR
|
||||
* and interrupts disabled.
|
||||
*/
|
||||
static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
|
||||
static __always_inline void __load_stage2(struct kvm_s2_mmu *mmu, unsigned long vtcr)
|
||||
{
|
||||
write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2);
|
||||
write_sysreg(vtcr, vtcr_el2);
|
||||
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
|
||||
|
||||
/*
|
||||
|
|
@ -275,5 +285,14 @@ static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
|
|||
asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT));
|
||||
}
|
||||
|
||||
static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
__load_stage2(mmu, kern_hyp_va(mmu->arch)->vtcr);
|
||||
}
|
||||
|
||||
static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
return container_of(mmu->arch, struct kvm, arch);
|
||||
}
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* __ARM64_KVM_MMU_H__ */
|
||||
|
|
|
|||
|
|
@ -11,22 +11,79 @@
|
|||
#include <linux/kvm_host.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define KVM_PGTABLE_MAX_LEVELS 4U
|
||||
|
||||
static inline u64 kvm_get_parange(u64 mmfr0)
|
||||
{
|
||||
u64 parange = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_PARANGE_SHIFT);
|
||||
if (parange > ID_AA64MMFR0_PARANGE_MAX)
|
||||
parange = ID_AA64MMFR0_PARANGE_MAX;
|
||||
|
||||
return parange;
|
||||
}
|
||||
|
||||
typedef u64 kvm_pte_t;
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable_mm_ops - Memory management callbacks.
|
||||
* @zalloc_page: Allocate a single zeroed memory page. The @arg parameter
|
||||
* can be used by the walker to pass a memcache. The
|
||||
* initial refcount of the page is 1.
|
||||
* @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The
|
||||
* @size parameter is in bytes, and is rounded-up to the
|
||||
* next page boundary. The resulting allocation is
|
||||
* physically contiguous.
|
||||
* @free_pages_exact: Free an exact number of memory pages previously
|
||||
* allocated by zalloc_pages_exact.
|
||||
* @get_page: Increment the refcount on a page.
|
||||
* @put_page: Decrement the refcount on a page. When the refcount
|
||||
* reaches 0 the page is automatically freed.
|
||||
* @page_count: Return the refcount of a page.
|
||||
* @phys_to_virt: Convert a physical address into a virtual address mapped
|
||||
* in the current context.
|
||||
* @virt_to_phys: Convert a virtual address mapped in the current context
|
||||
* into a physical address.
|
||||
*/
|
||||
struct kvm_pgtable_mm_ops {
|
||||
void* (*zalloc_page)(void *arg);
|
||||
void* (*zalloc_pages_exact)(size_t size);
|
||||
void (*free_pages_exact)(void *addr, size_t size);
|
||||
void (*get_page)(void *addr);
|
||||
void (*put_page)(void *addr);
|
||||
int (*page_count)(void *addr);
|
||||
void* (*phys_to_virt)(phys_addr_t phys);
|
||||
phys_addr_t (*virt_to_phys)(void *addr);
|
||||
};
|
||||
|
||||
/**
|
||||
* enum kvm_pgtable_stage2_flags - Stage-2 page-table flags.
|
||||
* @KVM_PGTABLE_S2_NOFWB: Don't enforce Normal-WB even if the CPUs have
|
||||
* ARM64_HAS_STAGE2_FWB.
|
||||
* @KVM_PGTABLE_S2_IDMAP: Only use identity mappings.
|
||||
*/
|
||||
enum kvm_pgtable_stage2_flags {
|
||||
KVM_PGTABLE_S2_NOFWB = BIT(0),
|
||||
KVM_PGTABLE_S2_IDMAP = BIT(1),
|
||||
};
|
||||
|
||||
/**
|
||||
* struct kvm_pgtable - KVM page-table.
|
||||
* @ia_bits: Maximum input address size, in bits.
|
||||
* @start_level: Level at which the page-table walk starts.
|
||||
* @pgd: Pointer to the first top-level entry of the page-table.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
* @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
|
||||
*/
|
||||
struct kvm_pgtable {
|
||||
u32 ia_bits;
|
||||
u32 start_level;
|
||||
kvm_pte_t *pgd;
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
|
||||
/* Stage-2 only */
|
||||
struct kvm_s2_mmu *mmu;
|
||||
enum kvm_pgtable_stage2_flags flags;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
@ -49,6 +106,16 @@ enum kvm_pgtable_prot {
|
|||
#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R)
|
||||
#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
|
||||
|
||||
/**
|
||||
* struct kvm_mem_range - Range of Intermediate Physical Addresses
|
||||
* @start: Start of the range.
|
||||
* @end: End of the range.
|
||||
*/
|
||||
struct kvm_mem_range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
|
||||
* @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid
|
||||
|
|
@ -86,10 +153,12 @@ struct kvm_pgtable_walker {
|
|||
* kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
|
||||
* @pgt: Uninitialised page-table structure to initialise.
|
||||
* @va_bits: Maximum virtual address bits.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits);
|
||||
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
|
||||
struct kvm_pgtable_mm_ops *mm_ops);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table.
|
||||
|
|
@ -123,17 +192,41 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
|
|||
enum kvm_pgtable_prot prot);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
|
||||
* kvm_get_vtcr() - Helper to construct VTCR_EL2
|
||||
* @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register.
|
||||
* @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register.
|
||||
* @phys_shfit: Value to set in VTCR_EL2.T0SZ.
|
||||
*
|
||||
* The VTCR value is common across all the physical CPUs on the system.
|
||||
* We use system wide sanitised values to fill in different fields,
|
||||
* except for Hardware Management of Access Flags. HA Flag is set
|
||||
* unconditionally on all CPUs, as it is safe to run with or without
|
||||
* the feature and the bit is RES0 on CPUs that don't support it.
|
||||
*
|
||||
* Return: VTCR_EL2 value
|
||||
*/
|
||||
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table.
|
||||
* @pgt: Uninitialised page-table structure to initialise.
|
||||
* @kvm: KVM structure representing the guest virtual machine.
|
||||
* @arch: Arch-specific KVM structure representing the guest virtual
|
||||
* machine.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
* @flags: Stage-2 configuration flags.
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm);
|
||||
int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
enum kvm_pgtable_stage2_flags flags);
|
||||
|
||||
#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
|
||||
kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
*
|
||||
* The page-table is assumed to be unreachable by any hardware walkers prior
|
||||
* to freeing and therefore no TLB invalidation is performed.
|
||||
|
|
@ -142,13 +235,13 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
|||
|
||||
/**
|
||||
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Intermediate physical address at which to place the mapping.
|
||||
* @size: Size of the mapping.
|
||||
* @phys: Physical address of the memory to map.
|
||||
* @prot: Permissions and attributes for the mapping.
|
||||
* @mc: Cache of pre-allocated GFP_PGTABLE_USER memory from which to
|
||||
* allocate page-table pages.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
*
|
||||
* The offset of @addr within a page is ignored, @size is rounded-up to
|
||||
* the next page boundary and @phys is rounded-down to the previous page
|
||||
|
|
@ -170,11 +263,31 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
|||
*/
|
||||
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
u64 phys, enum kvm_pgtable_prot prot,
|
||||
struct kvm_mmu_memory_cache *mc);
|
||||
void *mc);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
|
||||
* track ownership.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Base intermediate physical address to annotate.
|
||||
* @size: Size of the annotated range.
|
||||
* @mc: Cache of pre-allocated and zeroed memory from which to allocate
|
||||
* page-table pages.
|
||||
* @owner_id: Unique identifier for the owner of the page.
|
||||
*
|
||||
* By default, all page-tables are owned by identifier 0. This function can be
|
||||
* used to mark portions of the IPA space as owned by other entities. When a
|
||||
* stage 2 is used with identity-mappings, these annotations allow to use the
|
||||
* page-table data structure as a simple rmap.
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
void *mc, u8 owner_id);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Intermediate physical address from which to remove the mapping.
|
||||
* @size: Size of the mapping.
|
||||
*
|
||||
|
|
@ -194,7 +307,7 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
|||
/**
|
||||
* kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range
|
||||
* without TLB invalidation.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Intermediate physical address from which to write-protect,
|
||||
* @size: Size of the range.
|
||||
*
|
||||
|
|
@ -211,7 +324,7 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
|||
|
||||
/**
|
||||
* kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Intermediate physical address to identify the page-table entry.
|
||||
*
|
||||
* The offset of @addr within a page is ignored.
|
||||
|
|
@ -225,7 +338,7 @@ kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
|
|||
|
||||
/**
|
||||
* kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Intermediate physical address to identify the page-table entry.
|
||||
*
|
||||
* The offset of @addr within a page is ignored.
|
||||
|
|
@ -244,7 +357,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
|
|||
/**
|
||||
* kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
|
||||
* page-table entry.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Intermediate physical address to identify the page-table entry.
|
||||
* @prot: Additional permissions to grant for the mapping.
|
||||
*
|
||||
|
|
@ -263,7 +376,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
|
|||
/**
|
||||
* kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
|
||||
* access flag set.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Intermediate physical address to identify the page-table entry.
|
||||
*
|
||||
* The offset of @addr within a page is ignored.
|
||||
|
|
@ -276,7 +389,7 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
|
|||
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
|
||||
* of Coherency for guest stage-2 address
|
||||
* range.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Intermediate physical address from which to flush.
|
||||
* @size: Size of the range.
|
||||
*
|
||||
|
|
@ -311,4 +424,23 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
|
|||
int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
struct kvm_pgtable_walker *walker);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
|
||||
* Addresses with compatible permission
|
||||
* attributes.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Address that must be covered by the range.
|
||||
* @prot: Protection attributes that the range must be compatible with.
|
||||
* @range: Range structure used to limit the search space at call time and
|
||||
* that will hold the result.
|
||||
*
|
||||
* The offset of @addr within a page is ignored. An IPA is compatible with @prot
|
||||
* iff its corresponding stage-2 page-table entry has default ownership and, if
|
||||
* valid, is mapped with protection attributes identical to @prot.
|
||||
*
|
||||
* Return: 0 on success, negative error code on failure.
|
||||
*/
|
||||
int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
|
||||
enum kvm_pgtable_prot prot,
|
||||
struct kvm_mem_range *range);
|
||||
#endif /* __ARM64_KVM_PGTABLE_H__ */
|
||||
|
|
|
|||
|
|
@ -71,10 +71,10 @@ extern bool arm64_use_ng_mappings;
|
|||
#define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN)
|
||||
#define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT)
|
||||
|
||||
#define PAGE_S2_MEMATTR(attr) \
|
||||
#define PAGE_S2_MEMATTR(attr, has_fwb) \
|
||||
({ \
|
||||
u64 __val; \
|
||||
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) \
|
||||
if (has_fwb) \
|
||||
__val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr); \
|
||||
else \
|
||||
__val = PTE_S2_MEMATTR(MT_S2_ ## attr); \
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
|
|||
extern char __hyp_text_start[], __hyp_text_end[];
|
||||
extern char __hyp_rodata_start[], __hyp_rodata_end[];
|
||||
extern char __hyp_reloc_begin[], __hyp_reloc_end[];
|
||||
extern char __hyp_bss_start[], __hyp_bss_end[];
|
||||
extern char __idmap_text_start[], __idmap_text_end[];
|
||||
extern char __initdata_begin[], __initdata_end[];
|
||||
extern char __inittext_begin[], __inittext_end[];
|
||||
|
|
|
|||
|
|
@ -283,6 +283,8 @@
|
|||
#define SYS_PMSIRR_EL1_INTERVAL_MASK 0xffffffUL
|
||||
|
||||
/* Filtering controls */
|
||||
#define SYS_PMSNEVFR_EL1 sys_reg(3, 0, 9, 9, 1)
|
||||
|
||||
#define SYS_PMSFCR_EL1 sys_reg(3, 0, 9, 9, 4)
|
||||
#define SYS_PMSFCR_EL1_FE_SHIFT 0
|
||||
#define SYS_PMSFCR_EL1_FT_SHIFT 1
|
||||
|
|
@ -333,6 +335,55 @@
|
|||
|
||||
/*** End of Statistical Profiling Extension ***/
|
||||
|
||||
/*
|
||||
* TRBE Registers
|
||||
*/
|
||||
#define SYS_TRBLIMITR_EL1 sys_reg(3, 0, 9, 11, 0)
|
||||
#define SYS_TRBPTR_EL1 sys_reg(3, 0, 9, 11, 1)
|
||||
#define SYS_TRBBASER_EL1 sys_reg(3, 0, 9, 11, 2)
|
||||
#define SYS_TRBSR_EL1 sys_reg(3, 0, 9, 11, 3)
|
||||
#define SYS_TRBMAR_EL1 sys_reg(3, 0, 9, 11, 4)
|
||||
#define SYS_TRBTRG_EL1 sys_reg(3, 0, 9, 11, 6)
|
||||
#define SYS_TRBIDR_EL1 sys_reg(3, 0, 9, 11, 7)
|
||||
|
||||
#define TRBLIMITR_LIMIT_MASK GENMASK_ULL(51, 0)
|
||||
#define TRBLIMITR_LIMIT_SHIFT 12
|
||||
#define TRBLIMITR_NVM BIT(5)
|
||||
#define TRBLIMITR_TRIG_MODE_MASK GENMASK(1, 0)
|
||||
#define TRBLIMITR_TRIG_MODE_SHIFT 3
|
||||
#define TRBLIMITR_FILL_MODE_MASK GENMASK(1, 0)
|
||||
#define TRBLIMITR_FILL_MODE_SHIFT 1
|
||||
#define TRBLIMITR_ENABLE BIT(0)
|
||||
#define TRBPTR_PTR_MASK GENMASK_ULL(63, 0)
|
||||
#define TRBPTR_PTR_SHIFT 0
|
||||
#define TRBBASER_BASE_MASK GENMASK_ULL(51, 0)
|
||||
#define TRBBASER_BASE_SHIFT 12
|
||||
#define TRBSR_EC_MASK GENMASK(5, 0)
|
||||
#define TRBSR_EC_SHIFT 26
|
||||
#define TRBSR_IRQ BIT(22)
|
||||
#define TRBSR_TRG BIT(21)
|
||||
#define TRBSR_WRAP BIT(20)
|
||||
#define TRBSR_ABORT BIT(18)
|
||||
#define TRBSR_STOP BIT(17)
|
||||
#define TRBSR_MSS_MASK GENMASK(15, 0)
|
||||
#define TRBSR_MSS_SHIFT 0
|
||||
#define TRBSR_BSC_MASK GENMASK(5, 0)
|
||||
#define TRBSR_BSC_SHIFT 0
|
||||
#define TRBSR_FSC_MASK GENMASK(5, 0)
|
||||
#define TRBSR_FSC_SHIFT 0
|
||||
#define TRBMAR_SHARE_MASK GENMASK(1, 0)
|
||||
#define TRBMAR_SHARE_SHIFT 8
|
||||
#define TRBMAR_OUTER_MASK GENMASK(3, 0)
|
||||
#define TRBMAR_OUTER_SHIFT 4
|
||||
#define TRBMAR_INNER_MASK GENMASK(3, 0)
|
||||
#define TRBMAR_INNER_SHIFT 0
|
||||
#define TRBTRG_TRG_MASK GENMASK(31, 0)
|
||||
#define TRBTRG_TRG_SHIFT 0
|
||||
#define TRBIDR_FLAG BIT(5)
|
||||
#define TRBIDR_PROG BIT(4)
|
||||
#define TRBIDR_ALIGN_MASK GENMASK(3, 0)
|
||||
#define TRBIDR_ALIGN_SHIFT 0
|
||||
|
||||
#define SYS_PMINTENSET_EL1 sys_reg(3, 0, 9, 14, 1)
|
||||
#define SYS_PMINTENCLR_EL1 sys_reg(3, 0, 9, 14, 2)
|
||||
|
||||
|
|
@ -587,9 +638,6 @@
|
|||
#define SCTLR_ELx_A (BIT(1))
|
||||
#define SCTLR_ELx_M (BIT(0))
|
||||
|
||||
#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
|
||||
SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB)
|
||||
|
||||
/* SCTLR_EL2 specific flags. */
|
||||
#define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \
|
||||
(BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \
|
||||
|
|
@ -601,6 +649,10 @@
|
|||
#define ENDIAN_SET_EL2 0
|
||||
#endif
|
||||
|
||||
#define INIT_SCTLR_EL2_MMU_ON \
|
||||
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
|
||||
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
|
||||
|
||||
#define INIT_SCTLR_EL2_MMU_OFF \
|
||||
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
|
||||
|
||||
|
|
@ -849,6 +901,7 @@
|
|||
#define ID_AA64MMFR2_CNP_SHIFT 0
|
||||
|
||||
/* id_aa64dfr0 */
|
||||
#define ID_AA64DFR0_TRBE_SHIFT 44
|
||||
#define ID_AA64DFR0_TRACE_FILT_SHIFT 40
|
||||
#define ID_AA64DFR0_DOUBLELOCK_SHIFT 36
|
||||
#define ID_AA64DFR0_PMSVER_SHIFT 32
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@
|
|||
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
|
||||
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
|
||||
|
||||
#define __NR_compat_syscalls 444
|
||||
#define __NR_compat_syscalls 447
|
||||
#endif
|
||||
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
|
|
|
|||
|
|
@ -895,6 +895,12 @@ __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
|
|||
__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
|
||||
#define __NR_quotactl_path 443
|
||||
__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
|
||||
#define __NR_landlock_create_ruleset 444
|
||||
__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
|
||||
#define __NR_landlock_add_rule 445
|
||||
__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
|
||||
#define __NR_landlock_restrict_self 446
|
||||
__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
|
||||
|
||||
/*
|
||||
* Please add new compat syscalls above this comment and update
|
||||
|
|
|
|||
|
|
@ -123,6 +123,9 @@ int main(void)
|
|||
DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2));
|
||||
DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va));
|
||||
DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa));
|
||||
DEFINE(NVHE_INIT_HCR_EL2, offsetof(struct kvm_nvhe_init_params, hcr_el2));
|
||||
DEFINE(NVHE_INIT_VTTBR, offsetof(struct kvm_nvhe_init_params, vttbr));
|
||||
DEFINE(NVHE_INIT_VTCR, offsetof(struct kvm_nvhe_init_params, vtcr));
|
||||
#endif
|
||||
#ifdef CONFIG_CPU_PM
|
||||
DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp));
|
||||
|
|
|
|||
|
|
@ -30,10 +30,7 @@
|
|||
* flat identity mapping.
|
||||
*/
|
||||
SYM_CODE_START(__cpu_soft_restart)
|
||||
/* Clear sctlr_el1 flags. */
|
||||
mrs x12, sctlr_el1
|
||||
mov_q x13, SCTLR_ELx_FLAGS
|
||||
bic x12, x12, x13
|
||||
mov_q x12, INIT_SCTLR_EL1_MMU_OFF
|
||||
pre_disable_mmu_workaround
|
||||
/*
|
||||
* either disable EL1&0 translation regime or disable EL2&0 translation
|
||||
|
|
|
|||
|
|
@ -115,9 +115,10 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
|
|||
mrs_s x0, SYS_VBAR_EL12
|
||||
msr vbar_el1, x0
|
||||
|
||||
// Use EL2 translations for SPE and disable access from EL1
|
||||
// Use EL2 translations for SPE & TRBE and disable access from EL1
|
||||
mrs x0, mdcr_el2
|
||||
bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT)
|
||||
bic x0, x0, #(MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT)
|
||||
msr mdcr_el2, x0
|
||||
|
||||
// Transfer the MM state from EL1 to EL2
|
||||
|
|
|
|||
|
|
@ -65,13 +65,13 @@ __efistub__ctype = _ctype;
|
|||
KVM_NVHE_ALIAS(kvm_patch_vector_branch);
|
||||
KVM_NVHE_ALIAS(kvm_update_va_mask);
|
||||
KVM_NVHE_ALIAS(kvm_get_kimage_voffset);
|
||||
KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0);
|
||||
|
||||
/* Global kernel state accessed by nVHE hyp code. */
|
||||
KVM_NVHE_ALIAS(kvm_vgic_global_state);
|
||||
|
||||
/* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */
|
||||
KVM_NVHE_ALIAS(__hyp_panic_string);
|
||||
KVM_NVHE_ALIAS(panic);
|
||||
KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
|
||||
|
||||
/* Vectors installed by hyp-init on reset HVC. */
|
||||
KVM_NVHE_ALIAS(__hyp_stub_vectors);
|
||||
|
|
@ -104,6 +104,36 @@ KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
|
|||
/* PMU available static key */
|
||||
KVM_NVHE_ALIAS(kvm_arm_pmu_available);
|
||||
|
||||
/* Position-independent library routines */
|
||||
KVM_NVHE_ALIAS_HYP(clear_page, __pi_clear_page);
|
||||
KVM_NVHE_ALIAS_HYP(copy_page, __pi_copy_page);
|
||||
KVM_NVHE_ALIAS_HYP(memcpy, __pi_memcpy);
|
||||
KVM_NVHE_ALIAS_HYP(memset, __pi_memset);
|
||||
|
||||
#ifdef CONFIG_KASAN
|
||||
KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy);
|
||||
KVM_NVHE_ALIAS_HYP(__memset, __pi_memset);
|
||||
#endif
|
||||
|
||||
/* Kernel memory sections */
|
||||
KVM_NVHE_ALIAS(__start_rodata);
|
||||
KVM_NVHE_ALIAS(__end_rodata);
|
||||
KVM_NVHE_ALIAS(__bss_start);
|
||||
KVM_NVHE_ALIAS(__bss_stop);
|
||||
|
||||
/* Hyp memory sections */
|
||||
KVM_NVHE_ALIAS(__hyp_idmap_text_start);
|
||||
KVM_NVHE_ALIAS(__hyp_idmap_text_end);
|
||||
KVM_NVHE_ALIAS(__hyp_text_start);
|
||||
KVM_NVHE_ALIAS(__hyp_text_end);
|
||||
KVM_NVHE_ALIAS(__hyp_bss_start);
|
||||
KVM_NVHE_ALIAS(__hyp_bss_end);
|
||||
KVM_NVHE_ALIAS(__hyp_rodata_start);
|
||||
KVM_NVHE_ALIAS(__hyp_rodata_end);
|
||||
|
||||
/* pKVM static key */
|
||||
KVM_NVHE_ALIAS(kvm_protected_mode_initialized);
|
||||
|
||||
#endif /* CONFIG_KVM */
|
||||
|
||||
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */
|
||||
|
|
|
|||
|
|
@ -5,24 +5,7 @@
|
|||
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
|
||||
*/
|
||||
|
||||
#define RO_EXCEPTION_TABLE_ALIGN 8
|
||||
#define RUNTIME_DISCARD_EXIT
|
||||
|
||||
#include <asm-generic/vmlinux.lds.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/hyp_image.h>
|
||||
#include <asm/kernel-pgtable.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#include "image.h"
|
||||
|
||||
OUTPUT_ARCH(aarch64)
|
||||
ENTRY(_text)
|
||||
|
||||
jiffies = jiffies_64;
|
||||
|
||||
|
||||
#ifdef CONFIG_KVM
|
||||
#define HYPERVISOR_EXTABLE \
|
||||
. = ALIGN(SZ_8); \
|
||||
|
|
@ -32,9 +15,11 @@ jiffies = jiffies_64;
|
|||
|
||||
#define HYPERVISOR_DATA_SECTIONS \
|
||||
HYP_SECTION_NAME(.rodata) : { \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_rodata_start = .; \
|
||||
*(HYP_SECTION_NAME(.data..ro_after_init)) \
|
||||
*(HYP_SECTION_NAME(.rodata)) \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_rodata_end = .; \
|
||||
}
|
||||
|
||||
|
|
@ -51,29 +36,52 @@ jiffies = jiffies_64;
|
|||
__hyp_reloc_end = .; \
|
||||
}
|
||||
|
||||
#define BSS_FIRST_SECTIONS \
|
||||
__hyp_bss_start = .; \
|
||||
*(HYP_SECTION_NAME(.bss)) \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_bss_end = .;
|
||||
|
||||
/*
|
||||
* We require that __hyp_bss_start and __bss_start are aligned, and enforce it
|
||||
* with an assertion. But the BSS_SECTION macro places an empty .sbss section
|
||||
* between them, which can in some cases cause the linker to misalign them. To
|
||||
* work around the issue, force a page alignment for __bss_start.
|
||||
*/
|
||||
#define SBSS_ALIGN PAGE_SIZE
|
||||
#else /* CONFIG_KVM */
|
||||
#define HYPERVISOR_EXTABLE
|
||||
#define HYPERVISOR_DATA_SECTIONS
|
||||
#define HYPERVISOR_PERCPU_SECTION
|
||||
#define HYPERVISOR_RELOC_SECTION
|
||||
#define SBSS_ALIGN 0
|
||||
#endif
|
||||
|
||||
#define RO_EXCEPTION_TABLE_ALIGN 8
|
||||
#define RUNTIME_DISCARD_EXIT
|
||||
|
||||
#include <asm-generic/vmlinux.lds.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/kernel-pgtable.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#include "image.h"
|
||||
|
||||
OUTPUT_ARCH(aarch64)
|
||||
ENTRY(_text)
|
||||
|
||||
jiffies = jiffies_64;
|
||||
|
||||
#define HYPERVISOR_TEXT \
|
||||
/* \
|
||||
* Align to 4 KB so that \
|
||||
* a) the HYP vector table is at its minimum \
|
||||
* alignment of 2048 bytes \
|
||||
* b) the HYP init code will not cross a page \
|
||||
* boundary if its size does not exceed \
|
||||
* 4 KB (see related ASSERT() below) \
|
||||
*/ \
|
||||
. = ALIGN(SZ_4K); \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_idmap_text_start = .; \
|
||||
*(.hyp.idmap.text) \
|
||||
__hyp_idmap_text_end = .; \
|
||||
__hyp_text_start = .; \
|
||||
*(.hyp.text) \
|
||||
HYPERVISOR_EXTABLE \
|
||||
. = ALIGN(PAGE_SIZE); \
|
||||
__hyp_text_end = .;
|
||||
|
||||
#define IDMAP_TEXT \
|
||||
|
|
@ -276,7 +284,7 @@ SECTIONS
|
|||
__pecoff_data_rawsize = ABSOLUTE(. - __initdata_begin);
|
||||
_edata = .;
|
||||
|
||||
BSS_SECTION(0, 0, 0)
|
||||
BSS_SECTION(SBSS_ALIGN, 0, 0)
|
||||
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
init_pg_dir = .;
|
||||
|
|
@ -309,11 +317,12 @@ SECTIONS
|
|||
#include "image-vars.h"
|
||||
|
||||
/*
|
||||
* The HYP init code and ID map text can't be longer than a page each,
|
||||
* and should not cross a page boundary.
|
||||
* The HYP init code and ID map text can't be longer than a page each. The
|
||||
* former is page-aligned, but the latter may not be with 16K or 64K pages, so
|
||||
* it should also not cross a page boundary.
|
||||
*/
|
||||
ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
|
||||
"HYP init code too big or misaligned")
|
||||
ASSERT(__hyp_idmap_text_end - __hyp_idmap_text_start <= PAGE_SIZE,
|
||||
"HYP init code too big")
|
||||
ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
|
||||
"ID map text too big or misaligned")
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
|
|
@ -324,6 +333,9 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
|
|||
ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
|
||||
"Entry trampoline text too big")
|
||||
#endif
|
||||
#ifdef CONFIG_KVM
|
||||
ASSERT(__hyp_bss_start == __bss_start, "HYP and Host BSS are misaligned")
|
||||
#endif
|
||||
/*
|
||||
* If padding is applied before .head.text, virt<->phys conversions will fail.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -206,8 +206,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
|||
case KVM_CAP_ARM_INJECT_EXT_DABT:
|
||||
case KVM_CAP_SET_GUEST_DEBUG:
|
||||
case KVM_CAP_VCPU_ATTRIBUTES:
|
||||
case KVM_CAP_PTP_KVM:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_SET_GUEST_DEBUG2:
|
||||
return KVM_GUESTDBG_VALID_MASK;
|
||||
case KVM_CAP_ARM_SET_DEVICE_ADDR:
|
||||
r = 1;
|
||||
break;
|
||||
|
|
@ -416,10 +419,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|||
|
||||
if (vcpu_has_ptrauth(vcpu))
|
||||
vcpu_ptrauth_disable(vcpu);
|
||||
kvm_arch_vcpu_load_debug_state_flags(vcpu);
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_arch_vcpu_put_debug_state_flags(vcpu);
|
||||
kvm_arch_vcpu_put_fp(vcpu);
|
||||
if (has_vhe())
|
||||
kvm_vcpu_put_sysregs_vhe(vcpu);
|
||||
|
|
@ -580,6 +585,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
|
|||
|
||||
vcpu->arch.has_run_once = true;
|
||||
|
||||
kvm_arm_vcpu_init_debug(vcpu);
|
||||
|
||||
if (likely(irqchip_in_kernel(kvm))) {
|
||||
/*
|
||||
* Map the VGIC hardware resources before running a vcpu the
|
||||
|
|
@ -1268,7 +1275,7 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
|
|||
}
|
||||
|
||||
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot)
|
||||
const struct kvm_memory_slot *memslot)
|
||||
{
|
||||
kvm_flush_remote_tlbs(kvm);
|
||||
}
|
||||
|
|
@ -1350,16 +1357,9 @@ static unsigned long nvhe_percpu_order(void)
|
|||
/* A lookup table holding the hypervisor VA for each vector slot */
|
||||
static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
|
||||
|
||||
static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot)
|
||||
{
|
||||
return slot - (slot != HYP_VECTOR_DIRECT);
|
||||
}
|
||||
|
||||
static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot)
|
||||
{
|
||||
int idx = __kvm_vector_slot2idx(slot);
|
||||
|
||||
hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K);
|
||||
hyp_spectre_vector_selector[slot] = __kvm_vector_slot2addr(base, slot);
|
||||
}
|
||||
|
||||
static int kvm_init_vector_slots(void)
|
||||
|
|
@ -1388,22 +1388,18 @@ static int kvm_init_vector_slots(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void cpu_init_hyp_mode(void)
|
||||
static void cpu_prepare_hyp_mode(int cpu)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params);
|
||||
struct arm_smccc_res res;
|
||||
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
|
||||
unsigned long tcr;
|
||||
|
||||
/* Switch from the HYP stub to our own HYP init vector */
|
||||
__hyp_set_vectors(kvm_get_idmap_vector());
|
||||
|
||||
/*
|
||||
* Calculate the raw per-cpu offset without a translation from the
|
||||
* kernel's mapping to the linear mapping, and store it in tpidr_el2
|
||||
* so that we can use adr_l to access per-cpu variables in EL2.
|
||||
* Also drop the KASAN tag which gets in the way...
|
||||
*/
|
||||
params->tpidr_el2 = (unsigned long)kasan_reset_tag(this_cpu_ptr_nvhe_sym(__per_cpu_start)) -
|
||||
params->tpidr_el2 = (unsigned long)kasan_reset_tag(per_cpu_ptr_nvhe_sym(__per_cpu_start, cpu)) -
|
||||
(unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start));
|
||||
|
||||
params->mair_el2 = read_sysreg(mair_el1);
|
||||
|
|
@ -1427,14 +1423,28 @@ static void cpu_init_hyp_mode(void)
|
|||
tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
|
||||
params->tcr_el2 = tcr;
|
||||
|
||||
params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE);
|
||||
params->stack_hyp_va = kern_hyp_va(per_cpu(kvm_arm_hyp_stack_page, cpu) + PAGE_SIZE);
|
||||
params->pgd_pa = kvm_mmu_get_httbr();
|
||||
if (is_protected_kvm_enabled())
|
||||
params->hcr_el2 = HCR_HOST_NVHE_PROTECTED_FLAGS;
|
||||
else
|
||||
params->hcr_el2 = HCR_HOST_NVHE_FLAGS;
|
||||
params->vttbr = params->vtcr = 0;
|
||||
|
||||
/*
|
||||
* Flush the init params from the data cache because the struct will
|
||||
* be read while the MMU is off.
|
||||
*/
|
||||
kvm_flush_dcache_to_poc(params, sizeof(*params));
|
||||
}
|
||||
|
||||
static void hyp_install_host_vector(void)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params;
|
||||
struct arm_smccc_res res;
|
||||
|
||||
/* Switch from the HYP stub to our own HYP init vector */
|
||||
__hyp_set_vectors(kvm_get_idmap_vector());
|
||||
|
||||
/*
|
||||
* Call initialization code, and switch to the full blown HYP code.
|
||||
|
|
@ -1443,8 +1453,14 @@ static void cpu_init_hyp_mode(void)
|
|||
* cpus_have_const_cap() wrapper.
|
||||
*/
|
||||
BUG_ON(!system_capabilities_finalized());
|
||||
params = this_cpu_ptr_nvhe_sym(kvm_init_params);
|
||||
arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res);
|
||||
WARN_ON(res.a0 != SMCCC_RET_SUCCESS);
|
||||
}
|
||||
|
||||
static void cpu_init_hyp_mode(void)
|
||||
{
|
||||
hyp_install_host_vector();
|
||||
|
||||
/*
|
||||
* Disabling SSBD on a non-VHE system requires us to enable SSBS
|
||||
|
|
@ -1487,7 +1503,10 @@ static void cpu_set_hyp_vector(void)
|
|||
struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data);
|
||||
void *vector = hyp_spectre_vector_selector[data->slot];
|
||||
|
||||
*this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
|
||||
if (!is_protected_kvm_enabled())
|
||||
*this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector;
|
||||
else
|
||||
kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot);
|
||||
}
|
||||
|
||||
static void cpu_hyp_reinit(void)
|
||||
|
|
@ -1495,13 +1514,14 @@ static void cpu_hyp_reinit(void)
|
|||
kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
|
||||
|
||||
cpu_hyp_reset();
|
||||
cpu_set_hyp_vector();
|
||||
|
||||
if (is_kernel_in_hyp_mode())
|
||||
kvm_timer_init_vhe();
|
||||
else
|
||||
cpu_init_hyp_mode();
|
||||
|
||||
cpu_set_hyp_vector();
|
||||
|
||||
kvm_arm_init_debug();
|
||||
|
||||
if (vgic_present)
|
||||
|
|
@ -1697,18 +1717,62 @@ static void teardown_hyp_mode(void)
|
|||
}
|
||||
}
|
||||
|
||||
static int do_pkvm_init(u32 hyp_va_bits)
|
||||
{
|
||||
void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base);
|
||||
int ret;
|
||||
|
||||
preempt_disable();
|
||||
hyp_install_host_vector();
|
||||
ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size,
|
||||
num_possible_cpus(), kern_hyp_va(per_cpu_base),
|
||||
hyp_va_bits);
|
||||
preempt_enable();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_hyp_init_protection(u32 hyp_va_bits)
|
||||
{
|
||||
void *addr = phys_to_virt(hyp_mem_base);
|
||||
int ret;
|
||||
|
||||
kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
|
||||
|
||||
ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = do_pkvm_init(hyp_va_bits);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
free_hyp_pgds();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inits Hyp-mode on all online CPUs
|
||||
*/
|
||||
static int init_hyp_mode(void)
|
||||
{
|
||||
u32 hyp_va_bits;
|
||||
int cpu;
|
||||
int err = 0;
|
||||
int err = -ENOMEM;
|
||||
|
||||
/*
|
||||
* The protected Hyp-mode cannot be initialized if the memory pool
|
||||
* allocation has failed.
|
||||
*/
|
||||
if (is_protected_kvm_enabled() && !hyp_mem_base)
|
||||
goto out_err;
|
||||
|
||||
/*
|
||||
* Allocate Hyp PGD and setup Hyp identity mapping
|
||||
*/
|
||||
err = kvm_mmu_init();
|
||||
err = kvm_mmu_init(&hyp_va_bits);
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
|
|
@ -1769,7 +1833,19 @@ static int init_hyp_mode(void)
|
|||
goto out_err;
|
||||
}
|
||||
|
||||
err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
|
||||
/*
|
||||
* .hyp.bss is guaranteed to be placed at the beginning of the .bss
|
||||
* section thanks to an assertion in the linker script. Map it RW and
|
||||
* the rest of .bss RO.
|
||||
*/
|
||||
err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_start),
|
||||
kvm_ksym_ref(__hyp_bss_end), PAGE_HYP);
|
||||
if (err) {
|
||||
kvm_err("Cannot map hyp bss section: %d\n", err);
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
err = create_hyp_mappings(kvm_ksym_ref(__hyp_bss_end),
|
||||
kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
|
||||
if (err) {
|
||||
kvm_err("Cannot map bss section\n");
|
||||
|
|
@ -1790,26 +1866,36 @@ static int init_hyp_mode(void)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Map Hyp percpu pages
|
||||
*/
|
||||
for_each_possible_cpu(cpu) {
|
||||
char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
|
||||
char *percpu_end = percpu_begin + nvhe_percpu_size();
|
||||
|
||||
/* Map Hyp percpu pages */
|
||||
err = create_hyp_mappings(percpu_begin, percpu_end, PAGE_HYP);
|
||||
|
||||
if (err) {
|
||||
kvm_err("Cannot map hyp percpu region\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
/* Prepare the CPU initialization parameters */
|
||||
cpu_prepare_hyp_mode(cpu);
|
||||
}
|
||||
|
||||
if (is_protected_kvm_enabled()) {
|
||||
init_cpu_logical_map();
|
||||
|
||||
if (!init_psci_relay())
|
||||
if (!init_psci_relay()) {
|
||||
err = -ENODEV;
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_protected_kvm_enabled()) {
|
||||
err = kvm_hyp_init_protection(hyp_va_bits);
|
||||
if (err) {
|
||||
kvm_err("Failed to init hyp memory protection\n");
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -1820,6 +1906,72 @@ static int init_hyp_mode(void)
|
|||
return err;
|
||||
}
|
||||
|
||||
static void _kvm_host_prot_finalize(void *discard)
|
||||
{
|
||||
WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
|
||||
}
|
||||
|
||||
static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
|
||||
}
|
||||
|
||||
#define pkvm_mark_hyp_section(__section) \
|
||||
pkvm_mark_hyp(__pa_symbol(__section##_start), \
|
||||
__pa_symbol(__section##_end))
|
||||
|
||||
static int finalize_hyp_mode(void)
|
||||
{
|
||||
int cpu, ret;
|
||||
|
||||
if (!is_protected_kvm_enabled())
|
||||
return 0;
|
||||
|
||||
ret = pkvm_mark_hyp_section(__hyp_idmap_text);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_mark_hyp_section(__hyp_text);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_mark_hyp_section(__hyp_rodata);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_mark_hyp_section(__hyp_bss);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]);
|
||||
phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
|
||||
|
||||
ret = pkvm_mark_hyp(start, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu));
|
||||
end = start + PAGE_SIZE;
|
||||
ret = pkvm_mark_hyp(start, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Flip the static key upfront as that may no longer be possible
|
||||
* once the host stage 2 is installed.
|
||||
*/
|
||||
static_branch_enable(&kvm_protected_mode_initialized);
|
||||
on_each_cpu(_kvm_host_prot_finalize, NULL, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void check_kvm_target_cpu(void *ret)
|
||||
{
|
||||
*(int *)ret = kvm_target_cpu();
|
||||
|
|
@ -1894,11 +2046,6 @@ int kvm_arch_init(void *opaque)
|
|||
|
||||
in_hyp_mode = is_kernel_in_hyp_mode();
|
||||
|
||||
if (!in_hyp_mode && kvm_arch_requires_vhe()) {
|
||||
kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
|
||||
cpus_have_final_cap(ARM64_WORKAROUND_1508412))
|
||||
kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
|
||||
|
|
@ -1936,8 +2083,15 @@ int kvm_arch_init(void *opaque)
|
|||
if (err)
|
||||
goto out_hyp;
|
||||
|
||||
if (!in_hyp_mode) {
|
||||
err = finalize_hyp_mode();
|
||||
if (err) {
|
||||
kvm_err("Failed to finalize Hyp protection\n");
|
||||
goto out_hyp;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_protected_kvm_enabled()) {
|
||||
static_branch_enable(&kvm_protected_mode_initialized);
|
||||
kvm_info("Protected nVHE mode initialized successfully\n");
|
||||
} else if (in_hyp_mode) {
|
||||
kvm_info("VHE mode initialized successfully\n");
|
||||
|
|
|
|||
|
|
@ -68,6 +68,65 @@ void kvm_arm_init_debug(void)
|
|||
__this_cpu_write(mdcr_el2, kvm_call_hyp_ret(__kvm_get_mdcr_el2));
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_arm_setup_mdcr_el2 - configure vcpu mdcr_el2 value
|
||||
*
|
||||
* @vcpu: the vcpu pointer
|
||||
*
|
||||
* This ensures we will trap access to:
|
||||
* - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
|
||||
* - Debug ROM Address (MDCR_EL2_TDRA)
|
||||
* - OS related registers (MDCR_EL2_TDOSA)
|
||||
* - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
|
||||
* - Self-hosted Trace Filter controls (MDCR_EL2_TTRF)
|
||||
* - Self-hosted Trace (MDCR_EL2_TTRF/MDCR_EL2_E2TB)
|
||||
*/
|
||||
static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* This also clears MDCR_EL2_E2PB_MASK and MDCR_EL2_E2TB_MASK
|
||||
* to disable guest access to the profiling and trace buffers
|
||||
*/
|
||||
vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
|
||||
vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
|
||||
MDCR_EL2_TPMS |
|
||||
MDCR_EL2_TTRF |
|
||||
MDCR_EL2_TPMCR |
|
||||
MDCR_EL2_TDRA |
|
||||
MDCR_EL2_TDOSA);
|
||||
|
||||
/* Is the VM being debugged by userspace? */
|
||||
if (vcpu->guest_debug)
|
||||
/* Route all software debug exceptions to EL2 */
|
||||
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
|
||||
|
||||
/*
|
||||
* Trap debug register access when one of the following is true:
|
||||
* - Userspace is using the hardware to debug the guest
|
||||
* (KVM_GUESTDBG_USE_HW is set).
|
||||
* - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear).
|
||||
*/
|
||||
if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
|
||||
!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
|
||||
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
|
||||
|
||||
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_arm_vcpu_init_debug - setup vcpu debug traps
|
||||
*
|
||||
* @vcpu: the vcpu pointer
|
||||
*
|
||||
* Set vcpu initial mdcr_el2 value.
|
||||
*/
|
||||
void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
preempt_disable();
|
||||
kvm_arm_setup_mdcr_el2(vcpu);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_arm_reset_debug_ptr - reset the debug ptr to point to the vcpu state
|
||||
*/
|
||||
|
|
@ -83,13 +142,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
|
|||
* @vcpu: the vcpu pointer
|
||||
*
|
||||
* This is called before each entry into the hypervisor to setup any
|
||||
* debug related registers. Currently this just ensures we will trap
|
||||
* access to:
|
||||
* - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
|
||||
* - Debug ROM Address (MDCR_EL2_TDRA)
|
||||
* - OS related registers (MDCR_EL2_TDOSA)
|
||||
* - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
|
||||
* - Self-hosted Trace Filter controls (MDCR_EL2_TTRF)
|
||||
* debug related registers.
|
||||
*
|
||||
* Additionally, KVM only traps guest accesses to the debug registers if
|
||||
* the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
|
||||
|
|
@ -101,28 +154,14 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
|
|||
|
||||
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY);
|
||||
unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2;
|
||||
|
||||
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
|
||||
|
||||
/*
|
||||
* This also clears MDCR_EL2_E2PB_MASK to disable guest access
|
||||
* to the profiling buffer.
|
||||
*/
|
||||
vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
|
||||
vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
|
||||
MDCR_EL2_TPMS |
|
||||
MDCR_EL2_TTRF |
|
||||
MDCR_EL2_TPMCR |
|
||||
MDCR_EL2_TDRA |
|
||||
MDCR_EL2_TDOSA);
|
||||
kvm_arm_setup_mdcr_el2(vcpu);
|
||||
|
||||
/* Is Guest debugging in effect? */
|
||||
if (vcpu->guest_debug) {
|
||||
/* Route all software debug exceptions to EL2 */
|
||||
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
|
||||
|
||||
/* Save guest debug state */
|
||||
save_guest_debug_regs(vcpu);
|
||||
|
||||
|
|
@ -176,7 +215,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
|
|||
|
||||
vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
|
||||
trap_debug = true;
|
||||
|
||||
trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
|
||||
&vcpu->arch.debug_ptr->dbg_bcr[0],
|
||||
|
|
@ -191,10 +229,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
|
|||
BUG_ON(!vcpu->guest_debug &&
|
||||
vcpu->arch.debug_ptr != &vcpu->arch.vcpu_debug_state);
|
||||
|
||||
/* Trap debug register access */
|
||||
if (trap_debug)
|
||||
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
|
||||
|
||||
/* If KDE or MDE are set, perform a full save/restore cycle. */
|
||||
if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE))
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
|
||||
|
|
@ -203,7 +237,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
|
|||
if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2)
|
||||
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
|
||||
|
||||
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
|
||||
trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1));
|
||||
}
|
||||
|
||||
|
|
@ -231,3 +264,32 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 dfr0;
|
||||
|
||||
/* For VHE, there is nothing to do */
|
||||
if (has_vhe())
|
||||
return;
|
||||
|
||||
dfr0 = read_sysreg(id_aa64dfr0_el1);
|
||||
/*
|
||||
* If SPE is present on this CPU and is available at current EL,
|
||||
* we may need to check if the host state needs to be saved.
|
||||
*/
|
||||
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMSVER_SHIFT) &&
|
||||
!(read_sysreg_s(SYS_PMBIDR_EL1) & BIT(SYS_PMBIDR_EL1_P_SHIFT)))
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_SPE;
|
||||
|
||||
/* Check if we have TRBE implemented and available at the host */
|
||||
if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRBE_SHIFT) &&
|
||||
!(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_PROG))
|
||||
vcpu->arch.flags |= KVM_ARM64_DEBUG_STATE_SAVE_TRBE;
|
||||
}
|
||||
|
||||
void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.flags &= ~(KVM_ARM64_DEBUG_STATE_SAVE_SPE |
|
||||
KVM_ARM64_DEBUG_STATE_SAVE_TRBE);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/kvm_host.h>
|
||||
#include <asm/fpsimd.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
|
|
@ -42,6 +43,17 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
|
|||
if (ret)
|
||||
goto error;
|
||||
|
||||
if (vcpu->arch.sve_state) {
|
||||
void *sve_end;
|
||||
|
||||
sve_end = vcpu->arch.sve_state + vcpu_sve_state_size(vcpu);
|
||||
|
||||
ret = create_hyp_mappings(vcpu->arch.sve_state, sve_end,
|
||||
PAGE_HYP);
|
||||
if (ret)
|
||||
goto error;
|
||||
}
|
||||
|
||||
vcpu->arch.host_thread_info = kern_hyp_va(ti);
|
||||
vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
|
||||
error:
|
||||
|
|
@ -109,11 +121,17 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
|
|||
local_irq_save(flags);
|
||||
|
||||
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
if (guest_has_sve) {
|
||||
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
|
||||
|
||||
if (guest_has_sve)
|
||||
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12);
|
||||
} else if (host_has_sve) {
|
||||
/* Restore the VL that was saved when bound to the CPU */
|
||||
if (!has_vhe())
|
||||
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
|
||||
SYS_ZCR_EL1);
|
||||
}
|
||||
|
||||
fpsimd_save_and_flush_cpu_state();
|
||||
} else if (has_vhe() && host_has_sve) {
|
||||
/*
|
||||
* The FPSIMD/SVE state in the CPU has not been touched, and we
|
||||
* have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
|
||||
|
|
|
|||
|
|
@ -299,7 +299,7 @@ static int get_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
|
|||
|
||||
memset(vqs, 0, sizeof(vqs));
|
||||
|
||||
max_vq = sve_vq_from_vl(vcpu->arch.sve_max_vl);
|
||||
max_vq = vcpu_sve_max_vq(vcpu);
|
||||
for (vq = SVE_VQ_MIN; vq <= max_vq; ++vq)
|
||||
if (sve_vq_available(vq))
|
||||
vqs[vq_word(vq)] |= vq_mask(vq);
|
||||
|
|
@ -427,7 +427,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region,
|
|||
if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
|
||||
return -ENOENT;
|
||||
|
||||
vq = sve_vq_from_vl(vcpu->arch.sve_max_vl);
|
||||
vq = vcpu_sve_max_vq(vcpu);
|
||||
|
||||
reqoffset = SVE_SIG_ZREG_OFFSET(vq, reg_num) -
|
||||
SVE_SIG_REGS_OFFSET;
|
||||
|
|
@ -437,7 +437,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region,
|
|||
if (!vcpu_has_sve(vcpu) || (reg->id & SVE_REG_SLICE_MASK) > 0)
|
||||
return -ENOENT;
|
||||
|
||||
vq = sve_vq_from_vl(vcpu->arch.sve_max_vl);
|
||||
vq = vcpu_sve_max_vq(vcpu);
|
||||
|
||||
reqoffset = SVE_SIG_PREG_OFFSET(vq, reg_num) -
|
||||
SVE_SIG_REGS_OFFSET;
|
||||
|
|
@ -888,11 +888,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
|
||||
KVM_GUESTDBG_USE_SW_BP | \
|
||||
KVM_GUESTDBG_USE_HW | \
|
||||
KVM_GUESTDBG_SINGLESTEP)
|
||||
|
||||
/**
|
||||
* kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
|
||||
* @kvm: pointer to the KVM struct
|
||||
|
|
|
|||
|
|
@ -291,3 +291,48 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
|
|||
if (exception_index == ARM_EXCEPTION_EL1_SERROR)
|
||||
kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
|
||||
}
|
||||
|
||||
void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
|
||||
u64 par, uintptr_t vcpu,
|
||||
u64 far, u64 hpfar) {
|
||||
u64 elr_in_kimg = __phys_to_kimg(__hyp_pa(elr));
|
||||
u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr;
|
||||
u64 mode = spsr & PSR_MODE_MASK;
|
||||
|
||||
/*
|
||||
* The nVHE hyp symbols are not included by kallsyms to avoid issues
|
||||
* with aliasing. That means that the symbols cannot be printed with the
|
||||
* "%pS" format specifier, so fall back to the vmlinux address if
|
||||
* there's no better option.
|
||||
*/
|
||||
if (mode != PSR_MODE_EL2t && mode != PSR_MODE_EL2h) {
|
||||
kvm_err("Invalid host exception to nVHE hyp!\n");
|
||||
} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
|
||||
(esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) {
|
||||
struct bug_entry *bug = find_bug(elr_in_kimg);
|
||||
const char *file = NULL;
|
||||
unsigned int line = 0;
|
||||
|
||||
/* All hyp bugs, including warnings, are treated as fatal. */
|
||||
if (bug)
|
||||
bug_get_file_line(bug, &file, &line);
|
||||
|
||||
if (file)
|
||||
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
|
||||
else
|
||||
kvm_err("nVHE hyp BUG at: %016llx!\n", elr + hyp_offset);
|
||||
} else {
|
||||
kvm_err("nVHE hyp panic at: %016llx!\n", elr + hyp_offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hyp has panicked and we're going to handle that by panicking the
|
||||
* kernel. The kernel offset will be revealed in the panic so we're
|
||||
* also safe to reveal the hyp offset as a debugging aid for translating
|
||||
* hyp VAs to vmlinux addresses.
|
||||
*/
|
||||
kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
|
||||
|
||||
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
|
||||
spsr, elr, esr, far, hpfar, par, vcpu);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \
|
|||
-DDISABLE_BRANCH_PROFILING \
|
||||
$(DISABLE_STACKLEAK_PLUGIN)
|
||||
|
||||
obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o
|
||||
obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o
|
||||
|
|
|
|||
|
|
@ -19,3 +19,13 @@ SYM_FUNC_START(__fpsimd_restore_state)
|
|||
fpsimd_restore x0, 1
|
||||
ret
|
||||
SYM_FUNC_END(__fpsimd_restore_state)
|
||||
|
||||
SYM_FUNC_START(__sve_restore_state)
|
||||
__sve_load 0, x1, 2
|
||||
ret
|
||||
SYM_FUNC_END(__sve_restore_state)
|
||||
|
||||
SYM_FUNC_START(__sve_save_state)
|
||||
sve_save 0, x1, 2
|
||||
ret
|
||||
SYM_FUNC_END(__sve_save_state)
|
||||
|
|
|
|||
|
|
@ -30,8 +30,6 @@
|
|||
#include <asm/processor.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
extern const char __hyp_panic_string[];
|
||||
|
||||
extern struct exception_table_entry __start___kvm_ex_table;
|
||||
extern struct exception_table_entry __stop___kvm_ex_table;
|
||||
|
||||
|
|
@ -160,18 +158,10 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
|
||||
static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
|
||||
{
|
||||
u8 ec;
|
||||
u64 esr;
|
||||
u64 hpfar, far;
|
||||
|
||||
esr = vcpu->arch.fault.esr_el2;
|
||||
ec = ESR_ELx_EC(esr);
|
||||
|
||||
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
|
||||
return true;
|
||||
|
||||
far = read_sysreg_el2(SYS_FAR);
|
||||
|
||||
/*
|
||||
|
|
@ -194,33 +184,59 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
|
|||
hpfar = read_sysreg(hpfar_el2);
|
||||
}
|
||||
|
||||
vcpu->arch.fault.far_el2 = far;
|
||||
vcpu->arch.fault.hpfar_el2 = hpfar;
|
||||
fault->far_el2 = far;
|
||||
fault->hpfar_el2 = hpfar;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u8 ec;
|
||||
u64 esr;
|
||||
|
||||
esr = vcpu->arch.fault.esr_el2;
|
||||
ec = ESR_ELx_EC(esr);
|
||||
|
||||
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
|
||||
return true;
|
||||
|
||||
return __get_fault_info(esr, &vcpu->arch.fault);
|
||||
}
|
||||
|
||||
static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct thread_struct *thread;
|
||||
|
||||
thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct,
|
||||
uw.fpsimd_state);
|
||||
|
||||
__sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr);
|
||||
}
|
||||
|
||||
static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
|
||||
__sve_restore_state(vcpu_sve_pffr(vcpu),
|
||||
&vcpu->arch.ctxt.fp_regs.fpsr);
|
||||
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
|
||||
}
|
||||
|
||||
/* Check for an FPSIMD/SVE trap and handle as appropriate */
|
||||
static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
bool vhe, sve_guest, sve_host;
|
||||
bool sve_guest, sve_host;
|
||||
u8 esr_ec;
|
||||
u64 reg;
|
||||
|
||||
if (!system_supports_fpsimd())
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Currently system_supports_sve() currently implies has_vhe(),
|
||||
* so the check is redundant. However, has_vhe() can be determined
|
||||
* statically and helps the compiler remove dead code.
|
||||
*/
|
||||
if (has_vhe() && system_supports_sve()) {
|
||||
if (system_supports_sve()) {
|
||||
sve_guest = vcpu_has_sve(vcpu);
|
||||
sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
|
||||
vhe = true;
|
||||
} else {
|
||||
sve_guest = false;
|
||||
sve_host = false;
|
||||
vhe = has_vhe();
|
||||
}
|
||||
|
||||
esr_ec = kvm_vcpu_trap_get_class(vcpu);
|
||||
|
|
@ -229,53 +245,38 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
|
|||
return false;
|
||||
|
||||
/* Don't handle SVE traps for non-SVE vcpus here: */
|
||||
if (!sve_guest)
|
||||
if (esr_ec != ESR_ELx_EC_FP_ASIMD)
|
||||
return false;
|
||||
if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD)
|
||||
return false;
|
||||
|
||||
/* Valid trap. Switch the context: */
|
||||
|
||||
if (vhe) {
|
||||
u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
|
||||
|
||||
if (has_vhe()) {
|
||||
reg = CPACR_EL1_FPEN;
|
||||
if (sve_guest)
|
||||
reg |= CPACR_EL1_ZEN;
|
||||
|
||||
write_sysreg(reg, cpacr_el1);
|
||||
sysreg_clear_set(cpacr_el1, 0, reg);
|
||||
} else {
|
||||
write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
|
||||
cptr_el2);
|
||||
}
|
||||
reg = CPTR_EL2_TFP;
|
||||
if (sve_guest)
|
||||
reg |= CPTR_EL2_TZ;
|
||||
|
||||
sysreg_clear_set(cptr_el2, reg, 0);
|
||||
}
|
||||
isb();
|
||||
|
||||
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
|
||||
/*
|
||||
* In the SVE case, VHE is assumed: it is enforced by
|
||||
* Kconfig and kvm_arch_init().
|
||||
*/
|
||||
if (sve_host) {
|
||||
struct thread_struct *thread = container_of(
|
||||
vcpu->arch.host_fpsimd_state,
|
||||
struct thread_struct, uw.fpsimd_state);
|
||||
|
||||
sve_save_state(sve_pffr(thread),
|
||||
&vcpu->arch.host_fpsimd_state->fpsr);
|
||||
} else {
|
||||
if (sve_host)
|
||||
__hyp_sve_save_host(vcpu);
|
||||
else
|
||||
__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
|
||||
}
|
||||
|
||||
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
|
||||
}
|
||||
|
||||
if (sve_guest) {
|
||||
sve_load_state(vcpu_sve_pffr(vcpu),
|
||||
&vcpu->arch.ctxt.fp_regs.fpsr,
|
||||
sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
|
||||
write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12);
|
||||
} else {
|
||||
if (sve_guest)
|
||||
__hyp_sve_restore_guest(vcpu);
|
||||
else
|
||||
__fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
|
||||
}
|
||||
|
||||
/* Skip restoring fpexc32 for AArch64 guests */
|
||||
if (!(read_sysreg(hcr_el2) & HCR_RW))
|
||||
|
|
|
|||
14
arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
Normal file
14
arch/arm64/kvm/hyp/include/nvhe/early_alloc.h
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __KVM_HYP_EARLY_ALLOC_H
|
||||
#define __KVM_HYP_EARLY_ALLOC_H
|
||||
|
||||
#include <asm/kvm_pgtable.h>
|
||||
|
||||
void hyp_early_alloc_init(void *virt, unsigned long size);
|
||||
unsigned long hyp_early_alloc_nr_used_pages(void);
|
||||
void *hyp_early_alloc_page(void *arg);
|
||||
void *hyp_early_alloc_contig(unsigned int nr_pages);
|
||||
|
||||
extern struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
|
||||
|
||||
#endif /* __KVM_HYP_EARLY_ALLOC_H */
|
||||
68
arch/arm64/kvm/hyp/include/nvhe/gfp.h
Normal file
68
arch/arm64/kvm/hyp/include/nvhe/gfp.h
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __KVM_HYP_GFP_H
|
||||
#define __KVM_HYP_GFP_H
|
||||
|
||||
#include <linux/list.h>
|
||||
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
#define HYP_NO_ORDER UINT_MAX
|
||||
|
||||
struct hyp_pool {
|
||||
/*
|
||||
* Spinlock protecting concurrent changes to the memory pool as well as
|
||||
* the struct hyp_page of the pool's pages until we have a proper atomic
|
||||
* API at EL2.
|
||||
*/
|
||||
hyp_spinlock_t lock;
|
||||
struct list_head free_area[MAX_ORDER];
|
||||
phys_addr_t range_start;
|
||||
phys_addr_t range_end;
|
||||
unsigned int max_order;
|
||||
};
|
||||
|
||||
static inline void hyp_page_ref_inc(struct hyp_page *p)
|
||||
{
|
||||
struct hyp_pool *pool = hyp_page_to_pool(p);
|
||||
|
||||
hyp_spin_lock(&pool->lock);
|
||||
p->refcount++;
|
||||
hyp_spin_unlock(&pool->lock);
|
||||
}
|
||||
|
||||
static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
|
||||
{
|
||||
struct hyp_pool *pool = hyp_page_to_pool(p);
|
||||
int ret;
|
||||
|
||||
hyp_spin_lock(&pool->lock);
|
||||
p->refcount--;
|
||||
ret = (p->refcount == 0);
|
||||
hyp_spin_unlock(&pool->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void hyp_set_page_refcounted(struct hyp_page *p)
|
||||
{
|
||||
struct hyp_pool *pool = hyp_page_to_pool(p);
|
||||
|
||||
hyp_spin_lock(&pool->lock);
|
||||
if (p->refcount) {
|
||||
hyp_spin_unlock(&pool->lock);
|
||||
BUG();
|
||||
}
|
||||
p->refcount = 1;
|
||||
hyp_spin_unlock(&pool->lock);
|
||||
}
|
||||
|
||||
/* Allocation */
|
||||
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
|
||||
void hyp_get_page(void *addr);
|
||||
void hyp_put_page(void *addr);
|
||||
|
||||
/* Used pages cannot be freed */
|
||||
int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
|
||||
unsigned int reserved_pages);
|
||||
#endif /* __KVM_HYP_GFP_H */
|
||||
36
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
Normal file
36
arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2020 Google LLC
|
||||
* Author: Quentin Perret <qperret@google.com>
|
||||
*/
|
||||
|
||||
#ifndef __KVM_NVHE_MEM_PROTECT__
|
||||
#define __KVM_NVHE_MEM_PROTECT__
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/virt.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
struct host_kvm {
|
||||
struct kvm_arch arch;
|
||||
struct kvm_pgtable pgt;
|
||||
struct kvm_pgtable_mm_ops mm_ops;
|
||||
hyp_spinlock_t lock;
|
||||
};
|
||||
extern struct host_kvm host_kvm;
|
||||
|
||||
int __pkvm_prot_finalize(void);
|
||||
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
|
||||
|
||||
int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
|
||||
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
|
||||
|
||||
static __always_inline void __load_host_stage2(void)
|
||||
{
|
||||
if (static_branch_likely(&kvm_protected_mode_initialized))
|
||||
__load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
|
||||
else
|
||||
write_sysreg(0, vttbr_el2);
|
||||
}
|
||||
#endif /* __KVM_NVHE_MEM_PROTECT__ */
|
||||
51
arch/arm64/kvm/hyp/include/nvhe/memory.h
Normal file
51
arch/arm64/kvm/hyp/include/nvhe/memory.h
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __KVM_HYP_MEMORY_H
|
||||
#define __KVM_HYP_MEMORY_H
|
||||
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct hyp_pool;
|
||||
struct hyp_page {
|
||||
unsigned int refcount;
|
||||
unsigned int order;
|
||||
struct hyp_pool *pool;
|
||||
struct list_head node;
|
||||
};
|
||||
|
||||
extern u64 __hyp_vmemmap;
|
||||
#define hyp_vmemmap ((struct hyp_page *)__hyp_vmemmap)
|
||||
|
||||
#define __hyp_va(phys) ((void *)((phys_addr_t)(phys) - hyp_physvirt_offset))
|
||||
|
||||
static inline void *hyp_phys_to_virt(phys_addr_t phys)
|
||||
{
|
||||
return __hyp_va(phys);
|
||||
}
|
||||
|
||||
static inline phys_addr_t hyp_virt_to_phys(void *addr)
|
||||
{
|
||||
return __hyp_pa(addr);
|
||||
}
|
||||
|
||||
#define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
|
||||
#define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT))
|
||||
#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)])
|
||||
#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt))
|
||||
#define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt))
|
||||
|
||||
#define hyp_page_to_pfn(page) ((struct hyp_page *)(page) - hyp_vmemmap)
|
||||
#define hyp_page_to_phys(page) hyp_pfn_to_phys((hyp_page_to_pfn(page)))
|
||||
#define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page))
|
||||
#define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool)
|
||||
|
||||
static inline int hyp_page_count(void *addr)
|
||||
{
|
||||
struct hyp_page *p = hyp_virt_to_page(addr);
|
||||
|
||||
return p->refcount;
|
||||
}
|
||||
|
||||
#endif /* __KVM_HYP_MEMORY_H */
|
||||
96
arch/arm64/kvm/hyp/include/nvhe/mm.h
Normal file
96
arch/arm64/kvm/hyp/include/nvhe/mm.h
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
#ifndef __KVM_HYP_MM_H
|
||||
#define __KVM_HYP_MM_H
|
||||
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/spectre.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
#define HYP_MEMBLOCK_REGIONS 128
|
||||
extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
|
||||
extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
|
||||
extern struct kvm_pgtable pkvm_pgtable;
|
||||
extern hyp_spinlock_t pkvm_pgd_lock;
|
||||
extern struct hyp_pool hpool;
|
||||
extern u64 __io_map_base;
|
||||
|
||||
int hyp_create_idmap(u32 hyp_va_bits);
|
||||
int hyp_map_vectors(void);
|
||||
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
|
||||
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
|
||||
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
|
||||
int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot);
|
||||
unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
|
||||
enum kvm_pgtable_prot prot);
|
||||
|
||||
static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
|
||||
unsigned long *start, unsigned long *end)
|
||||
{
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
struct hyp_page *p = hyp_phys_to_page(phys);
|
||||
|
||||
*start = (unsigned long)p;
|
||||
*end = *start + nr_pages * sizeof(struct hyp_page);
|
||||
*start = ALIGN_DOWN(*start, PAGE_SIZE);
|
||||
*end = ALIGN(*end, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
|
||||
{
|
||||
unsigned long total = 0, i;
|
||||
|
||||
/* Provision the worst case scenario */
|
||||
for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) {
|
||||
nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE);
|
||||
total += nr_pages;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static inline unsigned long __hyp_pgtable_total_pages(void)
|
||||
{
|
||||
unsigned long res = 0, i;
|
||||
|
||||
/* Cover all of memory with page-granularity */
|
||||
for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
|
||||
struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i];
|
||||
res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline unsigned long hyp_s1_pgtable_pages(void)
|
||||
{
|
||||
unsigned long res;
|
||||
|
||||
res = __hyp_pgtable_total_pages();
|
||||
|
||||
/* Allow 1 GiB for private mappings */
|
||||
res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline unsigned long host_s2_mem_pgtable_pages(void)
|
||||
{
|
||||
/*
|
||||
* Include an extra 16 pages to safely upper-bound the worst case of
|
||||
* concatenated pgds.
|
||||
*/
|
||||
return __hyp_pgtable_total_pages() + 16;
|
||||
}
|
||||
|
||||
static inline unsigned long host_s2_dev_pgtable_pages(void)
|
||||
{
|
||||
/* Allow 1 GiB for MMIO mappings */
|
||||
return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
#endif /* __KVM_HYP_MM_H */
|
||||
92
arch/arm64/kvm/hyp/include/nvhe/spinlock.h
Normal file
92
arch/arm64/kvm/hyp/include/nvhe/spinlock.h
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* A stand-alone ticket spinlock implementation for use by the non-VHE
|
||||
* KVM hypervisor code running at EL2.
|
||||
*
|
||||
* Copyright (C) 2020 Google LLC
|
||||
* Author: Will Deacon <will@kernel.org>
|
||||
*
|
||||
* Heavily based on the implementation removed by c11090474d70 which was:
|
||||
* Copyright (C) 2012 ARM Ltd.
|
||||
*/
|
||||
|
||||
#ifndef __ARM64_KVM_NVHE_SPINLOCK_H__
|
||||
#define __ARM64_KVM_NVHE_SPINLOCK_H__
|
||||
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/lse.h>
|
||||
|
||||
typedef union hyp_spinlock {
|
||||
u32 __val;
|
||||
struct {
|
||||
#ifdef __AARCH64EB__
|
||||
u16 next, owner;
|
||||
#else
|
||||
u16 owner, next;
|
||||
#endif
|
||||
};
|
||||
} hyp_spinlock_t;
|
||||
|
||||
#define hyp_spin_lock_init(l) \
|
||||
do { \
|
||||
*(l) = (hyp_spinlock_t){ .__val = 0 }; \
|
||||
} while (0)
|
||||
|
||||
static inline void hyp_spin_lock(hyp_spinlock_t *lock)
|
||||
{
|
||||
u32 tmp;
|
||||
hyp_spinlock_t lockval, newval;
|
||||
|
||||
asm volatile(
|
||||
/* Atomically increment the next ticket. */
|
||||
ARM64_LSE_ATOMIC_INSN(
|
||||
/* LL/SC */
|
||||
" prfm pstl1strm, %3\n"
|
||||
"1: ldaxr %w0, %3\n"
|
||||
" add %w1, %w0, #(1 << 16)\n"
|
||||
" stxr %w2, %w1, %3\n"
|
||||
" cbnz %w2, 1b\n",
|
||||
/* LSE atomics */
|
||||
" mov %w2, #(1 << 16)\n"
|
||||
" ldadda %w2, %w0, %3\n"
|
||||
__nops(3))
|
||||
|
||||
/* Did we get the lock? */
|
||||
" eor %w1, %w0, %w0, ror #16\n"
|
||||
" cbz %w1, 3f\n"
|
||||
/*
|
||||
* No: spin on the owner. Send a local event to avoid missing an
|
||||
* unlock before the exclusive load.
|
||||
*/
|
||||
" sevl\n"
|
||||
"2: wfe\n"
|
||||
" ldaxrh %w2, %4\n"
|
||||
" eor %w1, %w2, %w0, lsr #16\n"
|
||||
" cbnz %w1, 2b\n"
|
||||
/* We got the lock. Critical section starts here. */
|
||||
"3:"
|
||||
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
|
||||
: "Q" (lock->owner)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
static inline void hyp_spin_unlock(hyp_spinlock_t *lock)
|
||||
{
|
||||
u64 tmp;
|
||||
|
||||
asm volatile(
|
||||
ARM64_LSE_ATOMIC_INSN(
|
||||
/* LL/SC */
|
||||
" ldrh %w1, %0\n"
|
||||
" add %w1, %w1, #1\n"
|
||||
" stlrh %w1, %0",
|
||||
/* LSE atomics */
|
||||
" mov %w1, #1\n"
|
||||
" staddlh %w1, %0\n"
|
||||
__nops(1))
|
||||
: "=Q" (lock->owner), "=&r" (tmp)
|
||||
:
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */
|
||||
|
|
@ -9,10 +9,15 @@ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -D__DISABLE_EXPORTS
|
|||
hostprogs := gen-hyprel
|
||||
HOST_EXTRACFLAGS += -I$(objtree)/include
|
||||
|
||||
lib-objs := clear_page.o copy_page.o memcpy.o memset.o
|
||||
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
|
||||
|
||||
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
|
||||
hyp-main.o hyp-smp.o psci-relay.o
|
||||
hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
|
||||
cache.o setup.o mm.o mem_protect.o
|
||||
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o
|
||||
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
|
||||
obj-y += $(lib-objs)
|
||||
|
||||
##
|
||||
## Build rules for compiling nVHE hyp code
|
||||
|
|
|
|||
13
arch/arm64/kvm/hyp/nvhe/cache.S
Normal file
13
arch/arm64/kvm/hyp/nvhe/cache.S
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Code copied from arch/arm64/mm/cache.S.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/alternative.h>
|
||||
|
||||
SYM_FUNC_START_PI(__flush_dcache_area)
|
||||
dcache_by_line_op civac, sy, x0, x1, x2, x3
|
||||
ret
|
||||
SYM_FUNC_END_PI(__flush_dcache_area)
|
||||
|
|
@ -21,17 +21,11 @@ static void __debug_save_spe(u64 *pmscr_el1)
|
|||
/* Clear pmscr in case of early return */
|
||||
*pmscr_el1 = 0;
|
||||
|
||||
/* SPE present on this CPU? */
|
||||
if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
|
||||
ID_AA64DFR0_PMSVER_SHIFT))
|
||||
return;
|
||||
|
||||
/* Yes; is it owned by EL3? */
|
||||
reg = read_sysreg_s(SYS_PMBIDR_EL1);
|
||||
if (reg & BIT(SYS_PMBIDR_EL1_P_SHIFT))
|
||||
return;
|
||||
|
||||
/* No; is the host actually using the thing? */
|
||||
/*
|
||||
* At this point, we know that this CPU implements
|
||||
* SPE and is available to the host.
|
||||
* Check if the host is actually using it ?
|
||||
*/
|
||||
reg = read_sysreg_s(SYS_PMBLIMITR_EL1);
|
||||
if (!(reg & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)))
|
||||
return;
|
||||
|
|
@ -58,10 +52,43 @@ static void __debug_restore_spe(u64 pmscr_el1)
|
|||
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
|
||||
}
|
||||
|
||||
static void __debug_save_trace(u64 *trfcr_el1)
|
||||
{
|
||||
*trfcr_el1 = 0;
|
||||
|
||||
/* Check if the TRBE is enabled */
|
||||
if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_ENABLE))
|
||||
return;
|
||||
/*
|
||||
* Prohibit trace generation while we are in guest.
|
||||
* Since access to TRFCR_EL1 is trapped, the guest can't
|
||||
* modify the filtering set by the host.
|
||||
*/
|
||||
*trfcr_el1 = read_sysreg_s(SYS_TRFCR_EL1);
|
||||
write_sysreg_s(0, SYS_TRFCR_EL1);
|
||||
isb();
|
||||
/* Drain the trace buffer to memory */
|
||||
tsb_csync();
|
||||
dsb(nsh);
|
||||
}
|
||||
|
||||
static void __debug_restore_trace(u64 trfcr_el1)
|
||||
{
|
||||
if (!trfcr_el1)
|
||||
return;
|
||||
|
||||
/* Restore trace filter controls */
|
||||
write_sysreg_s(trfcr_el1, SYS_TRFCR_EL1);
|
||||
}
|
||||
|
||||
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/* Disable and flush SPE data generation */
|
||||
__debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
|
||||
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE)
|
||||
__debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
|
||||
/* Disable and flush Self-Hosted Trace generation */
|
||||
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE)
|
||||
__debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
|
||||
}
|
||||
|
||||
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
|
||||
|
|
@ -71,7 +98,10 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
|
|||
|
||||
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
|
||||
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_SPE)
|
||||
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
|
||||
if (vcpu->arch.flags & KVM_ARM64_DEBUG_STATE_SAVE_TRBE)
|
||||
__debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
|
||||
}
|
||||
|
||||
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
|
||||
|
|
|
|||
54
arch/arm64/kvm/hyp/nvhe/early_alloc.c
Normal file
54
arch/arm64/kvm/hyp/nvhe/early_alloc.c
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2020 Google LLC
|
||||
* Author: Quentin Perret <qperret@google.com>
|
||||
*/
|
||||
|
||||
#include <asm/kvm_pgtable.h>
|
||||
|
||||
#include <nvhe/early_alloc.h>
|
||||
#include <nvhe/memory.h>
|
||||
|
||||
struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops;
|
||||
s64 __ro_after_init hyp_physvirt_offset;
|
||||
|
||||
static unsigned long base;
|
||||
static unsigned long end;
|
||||
static unsigned long cur;
|
||||
|
||||
unsigned long hyp_early_alloc_nr_used_pages(void)
|
||||
{
|
||||
return (cur - base) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
void *hyp_early_alloc_contig(unsigned int nr_pages)
|
||||
{
|
||||
unsigned long size = (nr_pages << PAGE_SHIFT);
|
||||
void *ret = (void *)cur;
|
||||
|
||||
if (!nr_pages)
|
||||
return NULL;
|
||||
|
||||
if (end - cur < size)
|
||||
return NULL;
|
||||
|
||||
cur += size;
|
||||
memset(ret, 0, size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *hyp_early_alloc_page(void *arg)
|
||||
{
|
||||
return hyp_early_alloc_contig(1);
|
||||
}
|
||||
|
||||
void hyp_early_alloc_init(void *virt, unsigned long size)
|
||||
{
|
||||
base = cur = (unsigned long)virt;
|
||||
end = base + size;
|
||||
|
||||
hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page;
|
||||
hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt;
|
||||
hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys;
|
||||
}
|
||||
|
|
@ -50,6 +50,18 @@
|
|||
#ifndef R_AARCH64_ABS64
|
||||
#define R_AARCH64_ABS64 257
|
||||
#endif
|
||||
#ifndef R_AARCH64_PREL64
|
||||
#define R_AARCH64_PREL64 260
|
||||
#endif
|
||||
#ifndef R_AARCH64_PREL32
|
||||
#define R_AARCH64_PREL32 261
|
||||
#endif
|
||||
#ifndef R_AARCH64_PREL16
|
||||
#define R_AARCH64_PREL16 262
|
||||
#endif
|
||||
#ifndef R_AARCH64_PLT32
|
||||
#define R_AARCH64_PLT32 314
|
||||
#endif
|
||||
#ifndef R_AARCH64_LD_PREL_LO19
|
||||
#define R_AARCH64_LD_PREL_LO19 273
|
||||
#endif
|
||||
|
|
@ -371,6 +383,12 @@ static void emit_rela_section(Elf64_Shdr *sh_rela)
|
|||
case R_AARCH64_ABS64:
|
||||
emit_rela_abs64(rela, sh_orig_name);
|
||||
break;
|
||||
/* Allow position-relative data relocations. */
|
||||
case R_AARCH64_PREL64:
|
||||
case R_AARCH64_PREL32:
|
||||
case R_AARCH64_PREL16:
|
||||
case R_AARCH64_PLT32:
|
||||
break;
|
||||
/* Allow relocations to generate PC-relative addressing. */
|
||||
case R_AARCH64_LD_PREL_LO19:
|
||||
case R_AARCH64_ADR_PREL_LO21:
|
||||
|
|
|
|||
|
|
@ -79,22 +79,18 @@ SYM_FUNC_START(__hyp_do_panic)
|
|||
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
|
||||
PSR_MODE_EL1h)
|
||||
msr spsr_el2, lr
|
||||
ldr lr, =panic
|
||||
ldr lr, =nvhe_hyp_panic_handler
|
||||
hyp_kimg_va lr, x6
|
||||
msr elr_el2, lr
|
||||
|
||||
mov x29, x0
|
||||
|
||||
/* Load the format string into x0 and arguments into x1-7 */
|
||||
ldr x0, =__hyp_panic_string
|
||||
hyp_kimg_va x0, x6
|
||||
|
||||
/* Load the format arguments into x1-7. */
|
||||
mov x6, x3
|
||||
get_vcpu_ptr x7, x3
|
||||
mrs x3, esr_el2
|
||||
mrs x4, far_el2
|
||||
mrs x5, hpfar_el2
|
||||
/* Load the panic arguments into x0-7 */
|
||||
mrs x0, esr_el2
|
||||
get_vcpu_ptr x4, x5
|
||||
mrs x5, far_el2
|
||||
mrs x6, hpfar_el2
|
||||
mov x7, xzr // Unused argument
|
||||
|
||||
/* Enter the host, conditionally restoring the host context. */
|
||||
cbz x29, __host_enter_without_restoring
|
||||
|
|
|
|||
|
|
@ -83,11 +83,6 @@ SYM_CODE_END(__kvm_hyp_init)
|
|||
* x0: struct kvm_nvhe_init_params PA
|
||||
*/
|
||||
SYM_CODE_START_LOCAL(___kvm_hyp_init)
|
||||
alternative_if ARM64_KVM_PROTECTED_MODE
|
||||
mov_q x1, HCR_HOST_NVHE_PROTECTED_FLAGS
|
||||
msr hcr_el2, x1
|
||||
alternative_else_nop_endif
|
||||
|
||||
ldr x1, [x0, #NVHE_INIT_TPIDR_EL2]
|
||||
msr tpidr_el2, x1
|
||||
|
||||
|
|
@ -97,6 +92,15 @@ alternative_else_nop_endif
|
|||
ldr x1, [x0, #NVHE_INIT_MAIR_EL2]
|
||||
msr mair_el2, x1
|
||||
|
||||
ldr x1, [x0, #NVHE_INIT_HCR_EL2]
|
||||
msr hcr_el2, x1
|
||||
|
||||
ldr x1, [x0, #NVHE_INIT_VTTBR]
|
||||
msr vttbr_el2, x1
|
||||
|
||||
ldr x1, [x0, #NVHE_INIT_VTCR]
|
||||
msr vtcr_el2, x1
|
||||
|
||||
ldr x1, [x0, #NVHE_INIT_PGD_PA]
|
||||
phys_to_ttbr x2, x1
|
||||
alternative_if ARM64_HAS_CNP
|
||||
|
|
@ -115,15 +119,10 @@ alternative_else_nop_endif
|
|||
|
||||
/* Invalidate the stale TLBs from Bootloader */
|
||||
tlbi alle2
|
||||
tlbi vmalls12e1
|
||||
dsb sy
|
||||
|
||||
/*
|
||||
* Preserve all the RES1 bits while setting the default flags,
|
||||
* as well as the EE bit on BE. Drop the A flag since the compiler
|
||||
* is allowed to generate unaligned accesses.
|
||||
*/
|
||||
mov_q x0, (SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
|
||||
CPU_BE( orr x0, x0, #SCTLR_ELx_EE)
|
||||
mov_q x0, INIT_SCTLR_EL2_MMU_ON
|
||||
alternative_if ARM64_HAS_ADDRESS_AUTH
|
||||
mov_q x1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \
|
||||
SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)
|
||||
|
|
@ -221,9 +220,7 @@ SYM_CODE_START(__kvm_handle_stub_hvc)
|
|||
mov x0, xzr
|
||||
reset:
|
||||
/* Reset kvm back to the hyp stub. */
|
||||
mrs x5, sctlr_el2
|
||||
mov_q x6, SCTLR_ELx_FLAGS
|
||||
bic x5, x5, x6 // Clear SCTL_M and etc
|
||||
mov_q x5, INIT_SCTLR_EL2_MMU_OFF
|
||||
pre_disable_mmu_workaround
|
||||
msr sctlr_el2, x5
|
||||
isb
|
||||
|
|
@ -244,4 +241,31 @@ alternative_else_nop_endif
|
|||
|
||||
SYM_CODE_END(__kvm_handle_stub_hvc)
|
||||
|
||||
SYM_FUNC_START(__pkvm_init_switch_pgd)
|
||||
/* Turn the MMU off */
|
||||
pre_disable_mmu_workaround
|
||||
mrs x2, sctlr_el2
|
||||
bic x3, x2, #SCTLR_ELx_M
|
||||
msr sctlr_el2, x3
|
||||
isb
|
||||
|
||||
tlbi alle2
|
||||
|
||||
/* Install the new pgtables */
|
||||
ldr x3, [x0, #NVHE_INIT_PGD_PA]
|
||||
phys_to_ttbr x4, x3
|
||||
alternative_if ARM64_HAS_CNP
|
||||
orr x4, x4, #TTBR_CNP_BIT
|
||||
alternative_else_nop_endif
|
||||
msr ttbr0_el2, x4
|
||||
|
||||
/* Set the new stack pointer */
|
||||
ldr x0, [x0, #NVHE_INIT_STACK_HYP_VA]
|
||||
mov sp, x0
|
||||
|
||||
/* And turn the MMU back on! */
|
||||
set_sctlr_el2 x2
|
||||
ret x1
|
||||
SYM_FUNC_END(__pkvm_init_switch_pgd)
|
||||
|
||||
.popsection
|
||||
|
|
|
|||
|
|
@ -6,12 +6,15 @@
|
|||
|
||||
#include <hyp/switch.h>
|
||||
|
||||
#include <asm/pgtable-types.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
|
|
@ -106,6 +109,61 @@ static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
|
|||
__vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
|
||||
}
|
||||
|
||||
static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
|
||||
DECLARE_REG(unsigned long, size, host_ctxt, 2);
|
||||
DECLARE_REG(unsigned long, nr_cpus, host_ctxt, 3);
|
||||
DECLARE_REG(unsigned long *, per_cpu_base, host_ctxt, 4);
|
||||
DECLARE_REG(u32, hyp_va_bits, host_ctxt, 5);
|
||||
|
||||
/*
|
||||
* __pkvm_init() will return only if an error occurred, otherwise it
|
||||
* will tail-call in __pkvm_init_finalise() which will have to deal
|
||||
* with the host context directly.
|
||||
*/
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_init(phys, size, nr_cpus, per_cpu_base,
|
||||
hyp_va_bits);
|
||||
}
|
||||
|
||||
static void handle___pkvm_cpu_set_vector(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(enum arm64_hyp_spectre_vector, slot, host_ctxt, 1);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = pkvm_cpu_set_vector(slot);
|
||||
}
|
||||
|
||||
static void handle___pkvm_create_mappings(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(unsigned long, start, host_ctxt, 1);
|
||||
DECLARE_REG(unsigned long, size, host_ctxt, 2);
|
||||
DECLARE_REG(unsigned long, phys, host_ctxt, 3);
|
||||
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_create_mappings(start, size, phys, prot);
|
||||
}
|
||||
|
||||
static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(phys_addr_t, phys, host_ctxt, 1);
|
||||
DECLARE_REG(size_t, size, host_ctxt, 2);
|
||||
DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_create_private_mapping(phys, size, prot);
|
||||
}
|
||||
|
||||
static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
|
||||
}
|
||||
|
||||
static void handle___pkvm_mark_hyp(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(phys_addr_t, start, host_ctxt, 1);
|
||||
DECLARE_REG(phys_addr_t, end, host_ctxt, 2);
|
||||
|
||||
cpu_reg(host_ctxt, 1) = __pkvm_mark_hyp(start, end);
|
||||
}
|
||||
typedef void (*hcall_t)(struct kvm_cpu_context *);
|
||||
|
||||
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
|
||||
|
|
@ -125,6 +183,12 @@ static const hcall_t host_hcall[] = {
|
|||
HANDLE_FUNC(__kvm_get_mdcr_el2),
|
||||
HANDLE_FUNC(__vgic_v3_save_aprs),
|
||||
HANDLE_FUNC(__vgic_v3_restore_aprs),
|
||||
HANDLE_FUNC(__pkvm_init),
|
||||
HANDLE_FUNC(__pkvm_cpu_set_vector),
|
||||
HANDLE_FUNC(__pkvm_create_mappings),
|
||||
HANDLE_FUNC(__pkvm_create_private_mapping),
|
||||
HANDLE_FUNC(__pkvm_prot_finalize),
|
||||
HANDLE_FUNC(__pkvm_mark_hyp),
|
||||
};
|
||||
|
||||
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
|
||||
|
|
@ -177,7 +241,16 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
|
|||
case ESR_ELx_EC_SMC64:
|
||||
handle_host_smc(host_ctxt);
|
||||
break;
|
||||
case ESR_ELx_EC_SVE:
|
||||
sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
|
||||
isb();
|
||||
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
|
||||
break;
|
||||
case ESR_ELx_EC_IABT_LOW:
|
||||
case ESR_ELx_EC_DABT_LOW:
|
||||
handle_host_mem_abort(host_ctxt);
|
||||
break;
|
||||
default:
|
||||
hyp_panic();
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,8 +18,7 @@ u64 __ro_after_init hyp_cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID
|
|||
|
||||
u64 cpu_logical_map(unsigned int cpu)
|
||||
{
|
||||
if (cpu >= ARRAY_SIZE(hyp_cpu_logical_map))
|
||||
hyp_panic();
|
||||
BUG_ON(cpu >= ARRAY_SIZE(hyp_cpu_logical_map));
|
||||
|
||||
return hyp_cpu_logical_map[cpu];
|
||||
}
|
||||
|
|
@ -30,8 +29,7 @@ unsigned long __hyp_per_cpu_offset(unsigned int cpu)
|
|||
unsigned long this_cpu_base;
|
||||
unsigned long elf_base;
|
||||
|
||||
if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base))
|
||||
hyp_panic();
|
||||
BUG_ON(cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base));
|
||||
|
||||
cpu_base_array = (unsigned long *)&kvm_arm_hyp_percpu_base;
|
||||
this_cpu_base = kern_hyp_va(cpu_base_array[cpu]);
|
||||
|
|
|
|||
|
|
@ -25,4 +25,5 @@ SECTIONS {
|
|||
BEGIN_HYP_SECTION(.data..percpu)
|
||||
PERCPU_INPUT(L1_CACHE_BYTES)
|
||||
END_HYP_SECTION
|
||||
HYP_SECTION(.bss)
|
||||
}
|
||||
|
|
|
|||
279
arch/arm64/kvm/hyp/nvhe/mem_protect.c
Normal file
279
arch/arm64/kvm/hyp/nvhe/mem_protect.c
Normal file
|
|
@ -0,0 +1,279 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2020 Google LLC
|
||||
* Author: Quentin Perret <qperret@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_emulate.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/stage2_pgtable.h>
|
||||
|
||||
#include <hyp/switch.h>
|
||||
|
||||
#include <nvhe/gfp.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
|
||||
#define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
|
||||
|
||||
extern unsigned long hyp_nr_cpus;
|
||||
struct host_kvm host_kvm;
|
||||
|
||||
struct hyp_pool host_s2_mem;
|
||||
struct hyp_pool host_s2_dev;
|
||||
|
||||
/*
|
||||
* Copies of the host's CPU features registers holding sanitized values.
|
||||
*/
|
||||
u64 id_aa64mmfr0_el1_sys_val;
|
||||
u64 id_aa64mmfr1_el1_sys_val;
|
||||
|
||||
static const u8 pkvm_hyp_id = 1;
|
||||
|
||||
static void *host_s2_zalloc_pages_exact(size_t size)
|
||||
{
|
||||
return hyp_alloc_pages(&host_s2_mem, get_order(size));
|
||||
}
|
||||
|
||||
static void *host_s2_zalloc_page(void *pool)
|
||||
{
|
||||
return hyp_alloc_pages(pool, 0);
|
||||
}
|
||||
|
||||
static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
|
||||
{
|
||||
unsigned long nr_pages, pfn;
|
||||
int ret;
|
||||
|
||||
pfn = hyp_virt_to_pfn(mem_pgt_pool);
|
||||
nr_pages = host_s2_mem_pgtable_pages();
|
||||
ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
pfn = hyp_virt_to_pfn(dev_pgt_pool);
|
||||
nr_pages = host_s2_dev_pgtable_pages();
|
||||
ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) {
|
||||
.zalloc_pages_exact = host_s2_zalloc_pages_exact,
|
||||
.zalloc_page = host_s2_zalloc_page,
|
||||
.phys_to_virt = hyp_phys_to_virt,
|
||||
.virt_to_phys = hyp_virt_to_phys,
|
||||
.page_count = hyp_page_count,
|
||||
.get_page = hyp_get_page,
|
||||
.put_page = hyp_put_page,
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void prepare_host_vtcr(void)
|
||||
{
|
||||
u32 parange, phys_shift;
|
||||
|
||||
/* The host stage 2 is id-mapped, so use parange for T0SZ */
|
||||
parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
|
||||
phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
|
||||
|
||||
host_kvm.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
|
||||
id_aa64mmfr1_el1_sys_val, phys_shift);
|
||||
}
|
||||
|
||||
int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
|
||||
{
|
||||
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
|
||||
int ret;
|
||||
|
||||
prepare_host_vtcr();
|
||||
hyp_spin_lock_init(&host_kvm.lock);
|
||||
|
||||
ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = kvm_pgtable_stage2_init_flags(&host_kvm.pgt, &host_kvm.arch,
|
||||
&host_kvm.mm_ops, KVM_HOST_S2_FLAGS);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
|
||||
mmu->arch = &host_kvm.arch;
|
||||
mmu->pgt = &host_kvm.pgt;
|
||||
mmu->vmid.vmid_gen = 0;
|
||||
mmu->vmid.vmid = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __pkvm_prot_finalize(void)
|
||||
{
|
||||
struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
|
||||
struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
|
||||
|
||||
params->vttbr = kvm_get_vttbr(mmu);
|
||||
params->vtcr = host_kvm.arch.vtcr;
|
||||
params->hcr_el2 |= HCR_VM;
|
||||
kvm_flush_dcache_to_poc(params, sizeof(*params));
|
||||
|
||||
write_sysreg(params->hcr_el2, hcr_el2);
|
||||
__load_stage2(&host_kvm.arch.mmu, host_kvm.arch.vtcr);
|
||||
|
||||
/*
|
||||
* Make sure to have an ISB before the TLB maintenance below but only
|
||||
* when __load_stage2() doesn't include one already.
|
||||
*/
|
||||
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
|
||||
|
||||
/* Invalidate stale HCR bits that may be cached in TLBs */
|
||||
__tlbi(vmalls12e1);
|
||||
dsb(nsh);
|
||||
isb();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int host_stage2_unmap_dev_all(void)
|
||||
{
|
||||
struct kvm_pgtable *pgt = &host_kvm.pgt;
|
||||
struct memblock_region *reg;
|
||||
u64 addr = 0;
|
||||
int i, ret;
|
||||
|
||||
/* Unmap all non-memory regions to recycle the pages */
|
||||
for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) {
|
||||
reg = &hyp_memory[i];
|
||||
ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr);
|
||||
}
|
||||
|
||||
static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
|
||||
{
|
||||
int cur, left = 0, right = hyp_memblock_nr;
|
||||
struct memblock_region *reg;
|
||||
phys_addr_t end;
|
||||
|
||||
range->start = 0;
|
||||
range->end = ULONG_MAX;
|
||||
|
||||
/* The list of memblock regions is sorted, binary search it */
|
||||
while (left < right) {
|
||||
cur = (left + right) >> 1;
|
||||
reg = &hyp_memory[cur];
|
||||
end = reg->base + reg->size;
|
||||
if (addr < reg->base) {
|
||||
right = cur;
|
||||
range->end = reg->base;
|
||||
} else if (addr >= end) {
|
||||
left = cur + 1;
|
||||
range->start = end;
|
||||
} else {
|
||||
range->start = reg->base;
|
||||
range->end = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool range_is_memory(u64 start, u64 end)
|
||||
{
|
||||
struct kvm_mem_range r1, r2;
|
||||
|
||||
if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2))
|
||||
return false;
|
||||
if (r1.start != r2.start)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline int __host_stage2_idmap(u64 start, u64 end,
|
||||
enum kvm_pgtable_prot prot,
|
||||
struct hyp_pool *pool)
|
||||
{
|
||||
return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
|
||||
prot, pool);
|
||||
}
|
||||
|
||||
static int host_stage2_idmap(u64 addr)
|
||||
{
|
||||
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
|
||||
struct kvm_mem_range range;
|
||||
bool is_memory = find_mem_range(addr, &range);
|
||||
struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev;
|
||||
int ret;
|
||||
|
||||
if (is_memory)
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
hyp_spin_lock(&host_kvm.lock);
|
||||
ret = kvm_pgtable_stage2_find_range(&host_kvm.pgt, addr, prot, &range);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = __host_stage2_idmap(range.start, range.end, prot, pool);
|
||||
if (is_memory || ret != -ENOMEM)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* host_s2_mem has been provided with enough pages to cover all of
|
||||
* memory with page granularity, so we should never hit the ENOMEM case.
|
||||
* However, it is difficult to know how much of the MMIO range we will
|
||||
* need to cover upfront, so we may need to 'recycle' the pages if we
|
||||
* run out.
|
||||
*/
|
||||
ret = host_stage2_unmap_dev_all();
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = __host_stage2_idmap(range.start, range.end, prot, pool);
|
||||
|
||||
unlock:
|
||||
hyp_spin_unlock(&host_kvm.lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* host_stage2_unmap_dev_all() currently relies on MMIO mappings being
|
||||
* non-persistent, so don't allow changing page ownership in MMIO range.
|
||||
*/
|
||||
if (!range_is_memory(start, end))
|
||||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock(&host_kvm.lock);
|
||||
ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
|
||||
&host_s2_mem, pkvm_hyp_id);
|
||||
hyp_spin_unlock(&host_kvm.lock);
|
||||
|
||||
return ret != -EAGAIN ? ret : 0;
|
||||
}
|
||||
|
||||
void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
struct kvm_vcpu_fault_info fault;
|
||||
u64 esr, addr;
|
||||
int ret = 0;
|
||||
|
||||
esr = read_sysreg_el2(SYS_ESR);
|
||||
BUG_ON(!__get_fault_info(esr, &fault));
|
||||
|
||||
addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
|
||||
ret = host_stage2_idmap(addr);
|
||||
BUG_ON(ret && ret != -EAGAIN);
|
||||
}
|
||||
173
arch/arm64/kvm/hyp/nvhe/mm.c
Normal file
173
arch/arm64/kvm/hyp/nvhe/mm.c
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2020 Google LLC
|
||||
* Author: Quentin Perret <qperret@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
#include <asm/spectre.h>
|
||||
|
||||
#include <nvhe/early_alloc.h>
|
||||
#include <nvhe/gfp.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/spinlock.h>
|
||||
|
||||
struct kvm_pgtable pkvm_pgtable;
|
||||
hyp_spinlock_t pkvm_pgd_lock;
|
||||
u64 __io_map_base;
|
||||
|
||||
struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS];
|
||||
unsigned int hyp_memblock_nr;
|
||||
|
||||
int __pkvm_create_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
int err;
|
||||
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
err = kvm_pgtable_hyp_map(&pkvm_pgtable, start, size, phys, prot);
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
|
||||
enum kvm_pgtable_prot prot)
|
||||
{
|
||||
unsigned long addr;
|
||||
int err;
|
||||
|
||||
hyp_spin_lock(&pkvm_pgd_lock);
|
||||
|
||||
size = PAGE_ALIGN(size + offset_in_page(phys));
|
||||
addr = __io_map_base;
|
||||
__io_map_base += size;
|
||||
|
||||
/* Are we overflowing on the vmemmap ? */
|
||||
if (__io_map_base > __hyp_vmemmap) {
|
||||
__io_map_base -= size;
|
||||
addr = (unsigned long)ERR_PTR(-ENOMEM);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot);
|
||||
if (err) {
|
||||
addr = (unsigned long)ERR_PTR(err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
addr = addr + offset_in_page(phys);
|
||||
out:
|
||||
hyp_spin_unlock(&pkvm_pgd_lock);
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
unsigned long start = (unsigned long)from;
|
||||
unsigned long end = (unsigned long)to;
|
||||
unsigned long virt_addr;
|
||||
phys_addr_t phys;
|
||||
|
||||
start = start & PAGE_MASK;
|
||||
end = PAGE_ALIGN(end);
|
||||
|
||||
for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) {
|
||||
int err;
|
||||
|
||||
phys = hyp_virt_to_phys((void *)virt_addr);
|
||||
err = __pkvm_create_mappings(virt_addr, PAGE_SIZE, phys, prot);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
|
||||
{
|
||||
unsigned long start, end;
|
||||
|
||||
hyp_vmemmap_range(phys, size, &start, &end);
|
||||
|
||||
return __pkvm_create_mappings(start, end - start, back, PAGE_HYP);
|
||||
}
|
||||
|
||||
static void *__hyp_bp_vect_base;
|
||||
int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot)
|
||||
{
|
||||
void *vector;
|
||||
|
||||
switch (slot) {
|
||||
case HYP_VECTOR_DIRECT: {
|
||||
vector = __kvm_hyp_vector;
|
||||
break;
|
||||
}
|
||||
case HYP_VECTOR_SPECTRE_DIRECT: {
|
||||
vector = __bp_harden_hyp_vecs;
|
||||
break;
|
||||
}
|
||||
case HYP_VECTOR_INDIRECT:
|
||||
case HYP_VECTOR_SPECTRE_INDIRECT: {
|
||||
vector = (void *)__hyp_bp_vect_base;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vector = __kvm_vector_slot2addr(vector, slot);
|
||||
*this_cpu_ptr(&kvm_hyp_vector) = (unsigned long)vector;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hyp_map_vectors(void)
|
||||
{
|
||||
phys_addr_t phys;
|
||||
void *bp_base;
|
||||
|
||||
if (!cpus_have_const_cap(ARM64_SPECTRE_V3A))
|
||||
return 0;
|
||||
|
||||
phys = __hyp_pa(__bp_harden_hyp_vecs);
|
||||
bp_base = (void *)__pkvm_create_private_mapping(phys,
|
||||
__BP_HARDEN_HYP_VECS_SZ,
|
||||
PAGE_HYP_EXEC);
|
||||
if (IS_ERR_OR_NULL(bp_base))
|
||||
return PTR_ERR(bp_base);
|
||||
|
||||
__hyp_bp_vect_base = bp_base;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hyp_create_idmap(u32 hyp_va_bits)
|
||||
{
|
||||
unsigned long start, end;
|
||||
|
||||
start = hyp_virt_to_phys((void *)__hyp_idmap_text_start);
|
||||
start = ALIGN_DOWN(start, PAGE_SIZE);
|
||||
|
||||
end = hyp_virt_to_phys((void *)__hyp_idmap_text_end);
|
||||
end = ALIGN(end, PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* One half of the VA space is reserved to linearly map portions of
|
||||
* memory -- see va_layout.c for more details. The other half of the VA
|
||||
* space contains the trampoline page, and needs some care. Split that
|
||||
* second half in two and find the quarter of VA space not conflicting
|
||||
* with the idmap to place the IOs and the vmemmap. IOs use the lower
|
||||
* half of the quarter and the vmemmap the upper half.
|
||||
*/
|
||||
__io_map_base = start & BIT(hyp_va_bits - 2);
|
||||
__io_map_base ^= BIT(hyp_va_bits - 2);
|
||||
__hyp_vmemmap = __io_map_base | BIT(hyp_va_bits - 3);
|
||||
|
||||
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
|
||||
}
|
||||
195
arch/arm64/kvm/hyp/nvhe/page_alloc.c
Normal file
195
arch/arm64/kvm/hyp/nvhe/page_alloc.c
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2020 Google LLC
|
||||
* Author: Quentin Perret <qperret@google.com>
|
||||
*/
|
||||
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <nvhe/gfp.h>
|
||||
|
||||
u64 __hyp_vmemmap;
|
||||
|
||||
/*
|
||||
* Index the hyp_vmemmap to find a potential buddy page, but make no assumption
|
||||
* about its current state.
|
||||
*
|
||||
* Example buddy-tree for a 4-pages physically contiguous pool:
|
||||
*
|
||||
* o : Page 3
|
||||
* /
|
||||
* o-o : Page 2
|
||||
* /
|
||||
* / o : Page 1
|
||||
* / /
|
||||
* o---o-o : Page 0
|
||||
* Order 2 1 0
|
||||
*
|
||||
* Example of requests on this pool:
|
||||
* __find_buddy_nocheck(pool, page 0, order 0) => page 1
|
||||
* __find_buddy_nocheck(pool, page 0, order 1) => page 2
|
||||
* __find_buddy_nocheck(pool, page 1, order 0) => page 0
|
||||
* __find_buddy_nocheck(pool, page 2, order 0) => page 3
|
||||
*/
|
||||
static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
|
||||
struct hyp_page *p,
|
||||
unsigned int order)
|
||||
{
|
||||
phys_addr_t addr = hyp_page_to_phys(p);
|
||||
|
||||
addr ^= (PAGE_SIZE << order);
|
||||
|
||||
/*
|
||||
* Don't return a page outside the pool range -- it belongs to
|
||||
* something else and may not be mapped in hyp_vmemmap.
|
||||
*/
|
||||
if (addr < pool->range_start || addr >= pool->range_end)
|
||||
return NULL;
|
||||
|
||||
return hyp_phys_to_page(addr);
|
||||
}
|
||||
|
||||
/* Find a buddy page currently available for allocation */
|
||||
static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
|
||||
struct hyp_page *p,
|
||||
unsigned int order)
|
||||
{
|
||||
struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
|
||||
|
||||
if (!buddy || buddy->order != order || list_empty(&buddy->node))
|
||||
return NULL;
|
||||
|
||||
return buddy;
|
||||
|
||||
}
|
||||
|
||||
static void __hyp_attach_page(struct hyp_pool *pool,
|
||||
struct hyp_page *p)
|
||||
{
|
||||
unsigned int order = p->order;
|
||||
struct hyp_page *buddy;
|
||||
|
||||
memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
|
||||
|
||||
/*
|
||||
* Only the first struct hyp_page of a high-order page (otherwise known
|
||||
* as the 'head') should have p->order set. The non-head pages should
|
||||
* have p->order = HYP_NO_ORDER. Here @p may no longer be the head
|
||||
* after coallescing, so make sure to mark it HYP_NO_ORDER proactively.
|
||||
*/
|
||||
p->order = HYP_NO_ORDER;
|
||||
for (; (order + 1) < pool->max_order; order++) {
|
||||
buddy = __find_buddy_avail(pool, p, order);
|
||||
if (!buddy)
|
||||
break;
|
||||
|
||||
/* Take the buddy out of its list, and coallesce with @p */
|
||||
list_del_init(&buddy->node);
|
||||
buddy->order = HYP_NO_ORDER;
|
||||
p = min(p, buddy);
|
||||
}
|
||||
|
||||
/* Mark the new head, and insert it */
|
||||
p->order = order;
|
||||
list_add_tail(&p->node, &pool->free_area[order]);
|
||||
}
|
||||
|
||||
static void hyp_attach_page(struct hyp_page *p)
|
||||
{
|
||||
struct hyp_pool *pool = hyp_page_to_pool(p);
|
||||
|
||||
hyp_spin_lock(&pool->lock);
|
||||
__hyp_attach_page(pool, p);
|
||||
hyp_spin_unlock(&pool->lock);
|
||||
}
|
||||
|
||||
static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
|
||||
struct hyp_page *p,
|
||||
unsigned int order)
|
||||
{
|
||||
struct hyp_page *buddy;
|
||||
|
||||
list_del_init(&p->node);
|
||||
while (p->order > order) {
|
||||
/*
|
||||
* The buddy of order n - 1 currently has HYP_NO_ORDER as it
|
||||
* is covered by a higher-level page (whose head is @p). Use
|
||||
* __find_buddy_nocheck() to find it and inject it in the
|
||||
* free_list[n - 1], effectively splitting @p in half.
|
||||
*/
|
||||
p->order--;
|
||||
buddy = __find_buddy_nocheck(pool, p, p->order);
|
||||
buddy->order = p->order;
|
||||
list_add_tail(&buddy->node, &pool->free_area[buddy->order]);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void hyp_put_page(void *addr)
|
||||
{
|
||||
struct hyp_page *p = hyp_virt_to_page(addr);
|
||||
|
||||
if (hyp_page_ref_dec_and_test(p))
|
||||
hyp_attach_page(p);
|
||||
}
|
||||
|
||||
void hyp_get_page(void *addr)
|
||||
{
|
||||
struct hyp_page *p = hyp_virt_to_page(addr);
|
||||
|
||||
hyp_page_ref_inc(p);
|
||||
}
|
||||
|
||||
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
|
||||
{
|
||||
unsigned int i = order;
|
||||
struct hyp_page *p;
|
||||
|
||||
hyp_spin_lock(&pool->lock);
|
||||
|
||||
/* Look for a high-enough-order page */
|
||||
while (i < pool->max_order && list_empty(&pool->free_area[i]))
|
||||
i++;
|
||||
if (i >= pool->max_order) {
|
||||
hyp_spin_unlock(&pool->lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Extract it from the tree at the right order */
|
||||
p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
|
||||
p = __hyp_extract_page(pool, p, order);
|
||||
|
||||
hyp_spin_unlock(&pool->lock);
|
||||
hyp_set_page_refcounted(p);
|
||||
|
||||
return hyp_page_to_virt(p);
|
||||
}
|
||||
|
||||
int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
|
||||
unsigned int reserved_pages)
|
||||
{
|
||||
phys_addr_t phys = hyp_pfn_to_phys(pfn);
|
||||
struct hyp_page *p;
|
||||
int i;
|
||||
|
||||
hyp_spin_lock_init(&pool->lock);
|
||||
pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT));
|
||||
for (i = 0; i < pool->max_order; i++)
|
||||
INIT_LIST_HEAD(&pool->free_area[i]);
|
||||
pool->range_start = phys;
|
||||
pool->range_end = phys + (nr_pages << PAGE_SHIFT);
|
||||
|
||||
/* Init the vmemmap portion */
|
||||
p = hyp_phys_to_page(phys);
|
||||
memset(p, 0, sizeof(*p) * nr_pages);
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
p[i].pool = pool;
|
||||
INIT_LIST_HEAD(&p[i].node);
|
||||
}
|
||||
|
||||
/* Attach the unused pages to the buddy tree */
|
||||
for (i = reserved_pages; i < nr_pages; i++)
|
||||
__hyp_attach_page(pool, &p[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -11,6 +11,7 @@
|
|||
#include <linux/kvm_host.h>
|
||||
#include <uapi/linux/psci.h>
|
||||
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
void kvm_hyp_cpu_entry(unsigned long r0);
|
||||
|
|
@ -20,9 +21,6 @@ void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt);
|
|||
|
||||
/* Config options set by the host. */
|
||||
struct kvm_host_psci_config __ro_after_init kvm_host_psci_config;
|
||||
s64 __ro_after_init hyp_physvirt_offset;
|
||||
|
||||
#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset)
|
||||
|
||||
#define INVALID_CPU_ID UINT_MAX
|
||||
|
||||
|
|
|
|||
214
arch/arm64/kvm/hyp/nvhe/setup.c
Normal file
214
arch/arm64/kvm/hyp/nvhe/setup.c
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2020 Google LLC
|
||||
* Author: Quentin Perret <qperret@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
|
||||
#include <nvhe/early_alloc.h>
|
||||
#include <nvhe/gfp.h>
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mem_protect.h>
|
||||
#include <nvhe/mm.h>
|
||||
#include <nvhe/trap_handler.h>
|
||||
|
||||
struct hyp_pool hpool;
|
||||
struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
|
||||
unsigned long hyp_nr_cpus;
|
||||
|
||||
#define hyp_percpu_size ((unsigned long)__per_cpu_end - \
|
||||
(unsigned long)__per_cpu_start)
|
||||
|
||||
static void *vmemmap_base;
|
||||
static void *hyp_pgt_base;
|
||||
static void *host_s2_mem_pgt_base;
|
||||
static void *host_s2_dev_pgt_base;
|
||||
|
||||
static int divide_memory_pool(void *virt, unsigned long size)
|
||||
{
|
||||
unsigned long vstart, vend, nr_pages;
|
||||
|
||||
hyp_early_alloc_init(virt, size);
|
||||
|
||||
hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend);
|
||||
nr_pages = (vend - vstart) >> PAGE_SHIFT;
|
||||
vmemmap_base = hyp_early_alloc_contig(nr_pages);
|
||||
if (!vmemmap_base)
|
||||
return -ENOMEM;
|
||||
|
||||
nr_pages = hyp_s1_pgtable_pages();
|
||||
hyp_pgt_base = hyp_early_alloc_contig(nr_pages);
|
||||
if (!hyp_pgt_base)
|
||||
return -ENOMEM;
|
||||
|
||||
nr_pages = host_s2_mem_pgtable_pages();
|
||||
host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
|
||||
if (!host_s2_mem_pgt_base)
|
||||
return -ENOMEM;
|
||||
|
||||
nr_pages = host_s2_dev_pgtable_pages();
|
||||
host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
|
||||
if (!host_s2_dev_pgt_base)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
|
||||
unsigned long *per_cpu_base,
|
||||
u32 hyp_va_bits)
|
||||
{
|
||||
void *start, *end, *virt = hyp_phys_to_virt(phys);
|
||||
unsigned long pgt_size = hyp_s1_pgtable_pages() << PAGE_SHIFT;
|
||||
int ret, i;
|
||||
|
||||
/* Recreate the hyp page-table using the early page allocator */
|
||||
hyp_early_alloc_init(hyp_pgt_base, pgt_size);
|
||||
ret = kvm_pgtable_hyp_init(&pkvm_pgtable, hyp_va_bits,
|
||||
&hyp_early_alloc_mm_ops);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = hyp_create_idmap(hyp_va_bits);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = hyp_map_vectors();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__hyp_text_start, __hyp_text_end, PAGE_HYP_EXEC);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__start_rodata, __end_rodata, PAGE_HYP_RO);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__hyp_bss_start, __hyp_bss_end, PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, PAGE_HYP_RO);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = pkvm_create_mappings(virt, virt + size, PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < hyp_nr_cpus; i++) {
|
||||
start = (void *)kern_hyp_va(per_cpu_base[i]);
|
||||
end = start + PAGE_ALIGN(hyp_percpu_size);
|
||||
ret = pkvm_create_mappings(start, end, PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
end = (void *)per_cpu_ptr(&kvm_init_params, i)->stack_hyp_va;
|
||||
start = end - PAGE_SIZE;
|
||||
ret = pkvm_create_mappings(start, end, PAGE_HYP);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_nvhe_init_params(void)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params;
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < hyp_nr_cpus; i++) {
|
||||
params = per_cpu_ptr(&kvm_init_params, i);
|
||||
params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd);
|
||||
__flush_dcache_area(params, sizeof(*params));
|
||||
}
|
||||
}
|
||||
|
||||
static void *hyp_zalloc_hyp_page(void *arg)
|
||||
{
|
||||
return hyp_alloc_pages(&hpool, 0);
|
||||
}
|
||||
|
||||
void __noreturn __pkvm_init_finalise(void)
|
||||
{
|
||||
struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
|
||||
struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt;
|
||||
unsigned long nr_pages, reserved_pages, pfn;
|
||||
int ret;
|
||||
|
||||
/* Now that the vmemmap is backed, install the full-fledged allocator */
|
||||
pfn = hyp_virt_to_pfn(hyp_pgt_base);
|
||||
nr_pages = hyp_s1_pgtable_pages();
|
||||
reserved_pages = hyp_early_alloc_nr_used_pages();
|
||||
ret = hyp_pool_init(&hpool, pfn, nr_pages, reserved_pages);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) {
|
||||
.zalloc_page = hyp_zalloc_hyp_page,
|
||||
.phys_to_virt = hyp_phys_to_virt,
|
||||
.virt_to_phys = hyp_virt_to_phys,
|
||||
.get_page = hyp_get_page,
|
||||
.put_page = hyp_put_page,
|
||||
};
|
||||
pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
|
||||
|
||||
out:
|
||||
/*
|
||||
* We tail-called to here from handle___pkvm_init() and will not return,
|
||||
* so make sure to propagate the return value to the host.
|
||||
*/
|
||||
cpu_reg(host_ctxt, 1) = ret;
|
||||
|
||||
__host_enter(host_ctxt);
|
||||
}
|
||||
|
||||
int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus,
|
||||
unsigned long *per_cpu_base, u32 hyp_va_bits)
|
||||
{
|
||||
struct kvm_nvhe_init_params *params;
|
||||
void *virt = hyp_phys_to_virt(phys);
|
||||
void (*fn)(phys_addr_t params_pa, void *finalize_fn_va);
|
||||
int ret;
|
||||
|
||||
if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size))
|
||||
return -EINVAL;
|
||||
|
||||
hyp_spin_lock_init(&pkvm_pgd_lock);
|
||||
hyp_nr_cpus = nr_cpus;
|
||||
|
||||
ret = divide_memory_pool(virt, size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = recreate_hyp_mappings(phys, size, per_cpu_base, hyp_va_bits);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
update_nvhe_init_params();
|
||||
|
||||
/* Jump in the idmap page to switch to the new page-tables */
|
||||
params = this_cpu_ptr(&kvm_init_params);
|
||||
fn = (typeof(fn))__hyp_pa(__pkvm_init_switch_pgd);
|
||||
fn(__hyp_pa(params), __pkvm_init_finalise);
|
||||
|
||||
unreachable();
|
||||
}
|
||||
22
arch/arm64/kvm/hyp/nvhe/stub.c
Normal file
22
arch/arm64/kvm/hyp/nvhe/stub.c
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Stubs for out-of-line function calls caused by re-using kernel
|
||||
* infrastructure at EL2.
|
||||
*
|
||||
* Copyright (C) 2020 - Google LLC
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_LIST
|
||||
bool __list_add_valid(struct list_head *new, struct list_head *prev,
|
||||
struct list_head *next)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool __list_del_entry_valid(struct list_head *entry)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -28,6 +28,8 @@
|
|||
#include <asm/processor.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#include <nvhe/mem_protect.h>
|
||||
|
||||
/* Non-VHE specific context */
|
||||
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
|
||||
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
|
||||
|
|
@ -41,9 +43,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
|||
__activate_traps_common(vcpu);
|
||||
|
||||
val = CPTR_EL2_DEFAULT;
|
||||
val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM;
|
||||
val |= CPTR_EL2_TTA | CPTR_EL2_TAM;
|
||||
if (!update_fp_enabled(vcpu)) {
|
||||
val |= CPTR_EL2_TFP;
|
||||
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
|
||||
__activate_traps_fpsimd32(vcpu);
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +70,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
|
|||
static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
extern char __kvm_hyp_host_vector[];
|
||||
u64 mdcr_el2;
|
||||
u64 mdcr_el2, cptr;
|
||||
|
||||
___deactivate_traps(vcpu);
|
||||
|
||||
|
|
@ -95,19 +97,17 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu)
|
|||
|
||||
mdcr_el2 &= MDCR_EL2_HPMN_MASK;
|
||||
mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
|
||||
mdcr_el2 |= MDCR_EL2_E2TB_MASK << MDCR_EL2_E2TB_SHIFT;
|
||||
|
||||
write_sysreg(mdcr_el2, mdcr_el2);
|
||||
if (is_protected_kvm_enabled())
|
||||
write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2);
|
||||
else
|
||||
write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
|
||||
write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
|
||||
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
|
||||
}
|
||||
write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2);
|
||||
|
||||
static void __load_host_stage2(void)
|
||||
{
|
||||
write_sysreg(0, vttbr_el2);
|
||||
cptr = CPTR_EL2_DEFAULT;
|
||||
if (vcpu_has_sve(vcpu) && (vcpu->arch.flags & KVM_ARM64_FP_ENABLED))
|
||||
cptr |= CPTR_EL2_TZ;
|
||||
|
||||
write_sysreg(cptr, cptr_el2);
|
||||
write_sysreg(__kvm_hyp_host_vector, vbar_el2);
|
||||
}
|
||||
|
||||
/* Save VGICv3 state on non-VHE systems */
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@
|
|||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#include <nvhe/mem_protect.h>
|
||||
|
||||
struct tlb_inv_context {
|
||||
u64 tcr;
|
||||
};
|
||||
|
|
@ -43,7 +45,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
|
|||
|
||||
static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
|
||||
{
|
||||
write_sysreg(0, vttbr_el2);
|
||||
__load_host_stage2();
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
|
||||
/* Ensure write of the host VMID */
|
||||
|
|
|
|||
|
|
@ -9,8 +9,7 @@
|
|||
|
||||
#include <linux/bitfield.h>
|
||||
#include <asm/kvm_pgtable.h>
|
||||
|
||||
#define KVM_PGTABLE_MAX_LEVELS 4U
|
||||
#include <asm/stage2_pgtable.h>
|
||||
|
||||
#define KVM_PTE_VALID BIT(0)
|
||||
|
||||
|
|
@ -49,6 +48,11 @@
|
|||
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \
|
||||
KVM_PTE_LEAF_ATTR_HI_S2_XN)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_S2_IGNORED GENMASK(58, 55)
|
||||
|
||||
#define KVM_INVALID_PTE_OWNER_MASK GENMASK(63, 56)
|
||||
#define KVM_MAX_OWNER_ID 1
|
||||
|
||||
struct kvm_pgtable_walk_data {
|
||||
struct kvm_pgtable *pgt;
|
||||
struct kvm_pgtable_walker *walker;
|
||||
|
|
@ -68,21 +72,36 @@ static u64 kvm_granule_size(u32 level)
|
|||
return BIT(kvm_granule_shift(level));
|
||||
}
|
||||
|
||||
static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
|
||||
{
|
||||
u64 granule = kvm_granule_size(level);
|
||||
#define KVM_PHYS_INVALID (-1ULL)
|
||||
|
||||
static bool kvm_phys_is_valid(u64 phys)
|
||||
{
|
||||
return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_PARANGE_MAX));
|
||||
}
|
||||
|
||||
static bool kvm_level_supports_block_mapping(u32 level)
|
||||
{
|
||||
/*
|
||||
* Reject invalid block mappings and don't bother with 4TB mappings for
|
||||
* 52-bit PAs.
|
||||
*/
|
||||
if (level == 0 || (PAGE_SIZE != SZ_4K && level == 1))
|
||||
return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
|
||||
}
|
||||
|
||||
static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
|
||||
{
|
||||
u64 granule = kvm_granule_size(level);
|
||||
|
||||
if (!kvm_level_supports_block_mapping(level))
|
||||
return false;
|
||||
|
||||
if (granule > (end - addr))
|
||||
return false;
|
||||
|
||||
return IS_ALIGNED(addr, granule) && IS_ALIGNED(phys, granule);
|
||||
if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
|
||||
return false;
|
||||
|
||||
return IS_ALIGNED(addr, granule);
|
||||
}
|
||||
|
||||
static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
|
||||
|
|
@ -152,20 +171,20 @@ static kvm_pte_t kvm_phys_to_pte(u64 pa)
|
|||
return pte;
|
||||
}
|
||||
|
||||
static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte)
|
||||
static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
|
||||
{
|
||||
return __va(kvm_pte_to_phys(pte));
|
||||
return mm_ops->phys_to_virt(kvm_pte_to_phys(pte));
|
||||
}
|
||||
|
||||
static void kvm_set_invalid_pte(kvm_pte_t *ptep)
|
||||
static void kvm_clear_pte(kvm_pte_t *ptep)
|
||||
{
|
||||
kvm_pte_t pte = *ptep;
|
||||
WRITE_ONCE(*ptep, pte & ~KVM_PTE_VALID);
|
||||
WRITE_ONCE(*ptep, 0);
|
||||
}
|
||||
|
||||
static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp)
|
||||
static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp,
|
||||
struct kvm_pgtable_mm_ops *mm_ops)
|
||||
{
|
||||
kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(__pa(childp));
|
||||
kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
|
||||
|
||||
pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
|
||||
pte |= KVM_PTE_VALID;
|
||||
|
|
@ -187,6 +206,11 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
|
|||
return pte;
|
||||
}
|
||||
|
||||
static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
|
||||
{
|
||||
return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
|
||||
}
|
||||
|
||||
static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
|
||||
u32 level, kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag)
|
||||
|
|
@ -228,7 +252,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
|
|||
goto out;
|
||||
}
|
||||
|
||||
childp = kvm_pte_follow(pte);
|
||||
childp = kvm_pte_follow(pte, data->pgt->mm_ops);
|
||||
ret = __kvm_pgtable_walk(data, childp, level + 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
|
@ -303,12 +327,12 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|||
}
|
||||
|
||||
struct hyp_map_data {
|
||||
u64 phys;
|
||||
kvm_pte_t attr;
|
||||
u64 phys;
|
||||
kvm_pte_t attr;
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
};
|
||||
|
||||
static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
|
||||
struct hyp_map_data *data)
|
||||
static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
|
||||
{
|
||||
bool device = prot & KVM_PGTABLE_PROT_DEVICE;
|
||||
u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
|
||||
|
|
@ -333,7 +357,8 @@ static int hyp_map_set_prot_attr(enum kvm_pgtable_prot prot,
|
|||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap);
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh);
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF;
|
||||
data->attr = attr;
|
||||
*ptep = attr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -359,6 +384,8 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||
enum kvm_pgtable_walk_flags flag, void * const arg)
|
||||
{
|
||||
kvm_pte_t *childp;
|
||||
struct hyp_map_data *data = arg;
|
||||
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
|
||||
|
||||
if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
|
||||
return 0;
|
||||
|
|
@ -366,11 +393,11 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||
if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
|
||||
return -EINVAL;
|
||||
|
||||
childp = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
|
||||
childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
|
||||
if (!childp)
|
||||
return -ENOMEM;
|
||||
|
||||
kvm_set_table_pte(ptep, childp);
|
||||
kvm_set_table_pte(ptep, childp, mm_ops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -380,6 +407,7 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
|
|||
int ret;
|
||||
struct hyp_map_data map_data = {
|
||||
.phys = ALIGN_DOWN(phys, PAGE_SIZE),
|
||||
.mm_ops = pgt->mm_ops,
|
||||
};
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = hyp_map_walker,
|
||||
|
|
@ -387,7 +415,7 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
|
|||
.arg = &map_data,
|
||||
};
|
||||
|
||||
ret = hyp_map_set_prot_attr(prot, &map_data);
|
||||
ret = hyp_set_prot_attr(prot, &map_data.attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
|
@ -397,16 +425,18 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
|
|||
return ret;
|
||||
}
|
||||
|
||||
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits)
|
||||
int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
|
||||
struct kvm_pgtable_mm_ops *mm_ops)
|
||||
{
|
||||
u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
|
||||
|
||||
pgt->pgd = (kvm_pte_t *)get_zeroed_page(GFP_KERNEL);
|
||||
pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
|
||||
if (!pgt->pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
pgt->ia_bits = va_bits;
|
||||
pgt->start_level = KVM_PGTABLE_MAX_LEVELS - levels;
|
||||
pgt->mm_ops = mm_ops;
|
||||
pgt->mmu = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -414,7 +444,9 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits)
|
|||
static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag, void * const arg)
|
||||
{
|
||||
free_page((unsigned long)kvm_pte_follow(*ptep));
|
||||
struct kvm_pgtable_mm_ops *mm_ops = arg;
|
||||
|
||||
mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -423,29 +455,75 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
|
|||
struct kvm_pgtable_walker walker = {
|
||||
.cb = hyp_free_walker,
|
||||
.flags = KVM_PGTABLE_WALK_TABLE_POST,
|
||||
.arg = pgt->mm_ops,
|
||||
};
|
||||
|
||||
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
|
||||
free_page((unsigned long)pgt->pgd);
|
||||
pgt->mm_ops->put_page(pgt->pgd);
|
||||
pgt->pgd = NULL;
|
||||
}
|
||||
|
||||
struct stage2_map_data {
|
||||
u64 phys;
|
||||
kvm_pte_t attr;
|
||||
u8 owner_id;
|
||||
|
||||
kvm_pte_t *anchor;
|
||||
kvm_pte_t *childp;
|
||||
|
||||
struct kvm_s2_mmu *mmu;
|
||||
struct kvm_mmu_memory_cache *memcache;
|
||||
void *memcache;
|
||||
|
||||
struct kvm_pgtable_mm_ops *mm_ops;
|
||||
};
|
||||
|
||||
static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
|
||||
struct stage2_map_data *data)
|
||||
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift)
|
||||
{
|
||||
u64 vtcr = VTCR_EL2_FLAGS;
|
||||
u8 lvls;
|
||||
|
||||
vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT;
|
||||
vtcr |= VTCR_EL2_T0SZ(phys_shift);
|
||||
/*
|
||||
* Use a minimum 2 level page table to prevent splitting
|
||||
* host PMD huge pages at stage2.
|
||||
*/
|
||||
lvls = stage2_pgtable_levels(phys_shift);
|
||||
if (lvls < 2)
|
||||
lvls = 2;
|
||||
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
|
||||
|
||||
/*
|
||||
* Enable the Hardware Access Flag management, unconditionally
|
||||
* on all CPUs. The features is RES0 on CPUs without the support
|
||||
* and must be ignored by the CPUs.
|
||||
*/
|
||||
vtcr |= VTCR_EL2_HA;
|
||||
|
||||
/* Set the vmid bits */
|
||||
vtcr |= (get_vmid_bits(mmfr1) == 16) ?
|
||||
VTCR_EL2_VS_16BIT :
|
||||
VTCR_EL2_VS_8BIT;
|
||||
|
||||
return vtcr;
|
||||
}
|
||||
|
||||
static bool stage2_has_fwb(struct kvm_pgtable *pgt)
|
||||
{
|
||||
if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
|
||||
return false;
|
||||
|
||||
return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
|
||||
}
|
||||
|
||||
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
|
||||
|
||||
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
|
||||
kvm_pte_t *ptep)
|
||||
{
|
||||
bool device = prot & KVM_PGTABLE_PROT_DEVICE;
|
||||
kvm_pte_t attr = device ? PAGE_S2_MEMATTR(DEVICE_nGnRE) :
|
||||
PAGE_S2_MEMATTR(NORMAL);
|
||||
kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
|
||||
KVM_S2_MEMATTR(pgt, NORMAL);
|
||||
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
|
||||
|
||||
if (!(prot & KVM_PGTABLE_PROT_X))
|
||||
|
|
@ -461,44 +539,78 @@ static int stage2_map_set_prot_attr(enum kvm_pgtable_prot prot,
|
|||
|
||||
attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh);
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
|
||||
data->attr = attr;
|
||||
*ptep = attr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new)
|
||||
{
|
||||
if (!kvm_pte_valid(old) || !kvm_pte_valid(new))
|
||||
return true;
|
||||
|
||||
return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS));
|
||||
}
|
||||
|
||||
static bool stage2_pte_is_counted(kvm_pte_t pte)
|
||||
{
|
||||
/*
|
||||
* The refcount tracks valid entries as well as invalid entries if they
|
||||
* encode ownership of a page to another entity than the page-table
|
||||
* owner, whose id is 0.
|
||||
*/
|
||||
return !!pte;
|
||||
}
|
||||
|
||||
static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
|
||||
u32 level, struct kvm_pgtable_mm_ops *mm_ops)
|
||||
{
|
||||
/*
|
||||
* Clear the existing PTE, and perform break-before-make with
|
||||
* TLB maintenance if it was valid.
|
||||
*/
|
||||
if (kvm_pte_valid(*ptep)) {
|
||||
kvm_clear_pte(ptep);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
|
||||
}
|
||||
|
||||
mm_ops->put_page(ptep);
|
||||
}
|
||||
|
||||
static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
kvm_pte_t new, old = *ptep;
|
||||
u64 granule = kvm_granule_size(level), phys = data->phys;
|
||||
struct page *page = virt_to_page(ptep);
|
||||
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
|
||||
|
||||
if (!kvm_block_mapping_supported(addr, end, phys, level))
|
||||
return -E2BIG;
|
||||
|
||||
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
|
||||
if (kvm_pte_valid(old)) {
|
||||
if (kvm_phys_is_valid(phys))
|
||||
new = kvm_init_valid_leaf_pte(phys, data->attr, level);
|
||||
else
|
||||
new = kvm_init_invalid_leaf_owner(data->owner_id);
|
||||
|
||||
if (stage2_pte_is_counted(old)) {
|
||||
/*
|
||||
* Skip updating the PTE if we are trying to recreate the exact
|
||||
* same mapping or only change the access permissions. Instead,
|
||||
* the vCPU will exit one more time from guest if still needed
|
||||
* and then go through the path of relaxing permissions.
|
||||
*/
|
||||
if (!((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS)))
|
||||
if (!stage2_pte_needs_update(old, new))
|
||||
return -EAGAIN;
|
||||
|
||||
/*
|
||||
* There's an existing different valid leaf entry, so perform
|
||||
* break-before-make.
|
||||
*/
|
||||
kvm_set_invalid_pte(ptep);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
|
||||
put_page(page);
|
||||
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
|
||||
}
|
||||
|
||||
smp_store_release(ptep, new);
|
||||
get_page(page);
|
||||
data->phys += granule;
|
||||
if (stage2_pte_is_counted(new))
|
||||
mm_ops->get_page(ptep);
|
||||
if (kvm_phys_is_valid(phys))
|
||||
data->phys += granule;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -512,7 +624,8 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
|
|||
if (!kvm_block_mapping_supported(addr, end, data->phys, level))
|
||||
return 0;
|
||||
|
||||
kvm_set_invalid_pte(ptep);
|
||||
data->childp = kvm_pte_follow(*ptep, data->mm_ops);
|
||||
kvm_clear_pte(ptep);
|
||||
|
||||
/*
|
||||
* Invalidate the whole stage-2, as we may have numerous leaf
|
||||
|
|
@ -527,13 +640,13 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
|
|||
static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
int ret;
|
||||
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
|
||||
kvm_pte_t *childp, pte = *ptep;
|
||||
struct page *page = virt_to_page(ptep);
|
||||
int ret;
|
||||
|
||||
if (data->anchor) {
|
||||
if (kvm_pte_valid(pte))
|
||||
put_page(page);
|
||||
if (stage2_pte_is_counted(pte))
|
||||
mm_ops->put_page(ptep);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -548,7 +661,7 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||
if (!data->memcache)
|
||||
return -ENOMEM;
|
||||
|
||||
childp = kvm_mmu_memory_cache_alloc(data->memcache);
|
||||
childp = mm_ops->zalloc_page(data->memcache);
|
||||
if (!childp)
|
||||
return -ENOMEM;
|
||||
|
||||
|
|
@ -557,14 +670,11 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||
* a table. Accesses beyond 'end' that fall within the new table
|
||||
* will be mapped lazily.
|
||||
*/
|
||||
if (kvm_pte_valid(pte)) {
|
||||
kvm_set_invalid_pte(ptep);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, level);
|
||||
put_page(page);
|
||||
}
|
||||
if (stage2_pte_is_counted(pte))
|
||||
stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
|
||||
|
||||
kvm_set_table_pte(ptep, childp);
|
||||
get_page(page);
|
||||
kvm_set_table_pte(ptep, childp, mm_ops);
|
||||
mm_ops->get_page(ptep);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -573,19 +683,25 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
|
|||
kvm_pte_t *ptep,
|
||||
struct stage2_map_data *data)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
|
||||
kvm_pte_t *childp;
|
||||
int ret = 0;
|
||||
|
||||
if (!data->anchor)
|
||||
return 0;
|
||||
|
||||
free_page((unsigned long)kvm_pte_follow(*ptep));
|
||||
put_page(virt_to_page(ptep));
|
||||
|
||||
if (data->anchor == ptep) {
|
||||
childp = data->childp;
|
||||
data->anchor = NULL;
|
||||
data->childp = NULL;
|
||||
ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
|
||||
} else {
|
||||
childp = kvm_pte_follow(*ptep, mm_ops);
|
||||
}
|
||||
|
||||
mm_ops->put_page(childp);
|
||||
mm_ops->put_page(ptep);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
@ -627,13 +743,14 @@ static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||
|
||||
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
u64 phys, enum kvm_pgtable_prot prot,
|
||||
struct kvm_mmu_memory_cache *mc)
|
||||
void *mc)
|
||||
{
|
||||
int ret;
|
||||
struct stage2_map_data map_data = {
|
||||
.phys = ALIGN_DOWN(phys, PAGE_SIZE),
|
||||
.mmu = pgt->mmu,
|
||||
.memcache = mc,
|
||||
.mm_ops = pgt->mm_ops,
|
||||
};
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_map_walker,
|
||||
|
|
@ -643,7 +760,10 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|||
.arg = &map_data,
|
||||
};
|
||||
|
||||
ret = stage2_map_set_prot_attr(prot, &map_data);
|
||||
if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys)))
|
||||
return -EINVAL;
|
||||
|
||||
ret = stage2_set_prot_attr(pgt, prot, &map_data.attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
|
@ -652,38 +772,63 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void stage2_flush_dcache(void *addr, u64 size)
|
||||
int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
void *mc, u8 owner_id)
|
||||
{
|
||||
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
|
||||
return;
|
||||
int ret;
|
||||
struct stage2_map_data map_data = {
|
||||
.phys = KVM_PHYS_INVALID,
|
||||
.mmu = pgt->mmu,
|
||||
.memcache = mc,
|
||||
.mm_ops = pgt->mm_ops,
|
||||
.owner_id = owner_id,
|
||||
};
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_map_walker,
|
||||
.flags = KVM_PGTABLE_WALK_TABLE_PRE |
|
||||
KVM_PGTABLE_WALK_LEAF |
|
||||
KVM_PGTABLE_WALK_TABLE_POST,
|
||||
.arg = &map_data,
|
||||
};
|
||||
|
||||
__flush_dcache_area(addr, size);
|
||||
if (owner_id > KVM_MAX_OWNER_ID)
|
||||
return -EINVAL;
|
||||
|
||||
ret = kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool stage2_pte_cacheable(kvm_pte_t pte)
|
||||
static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
|
||||
{
|
||||
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
|
||||
return memattr == PAGE_S2_MEMATTR(NORMAL);
|
||||
return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
|
||||
}
|
||||
|
||||
static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
{
|
||||
struct kvm_s2_mmu *mmu = arg;
|
||||
struct kvm_pgtable *pgt = arg;
|
||||
struct kvm_s2_mmu *mmu = pgt->mmu;
|
||||
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
|
||||
kvm_pte_t pte = *ptep, *childp = NULL;
|
||||
bool need_flush = false;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
if (!kvm_pte_valid(pte)) {
|
||||
if (stage2_pte_is_counted(pte)) {
|
||||
kvm_clear_pte(ptep);
|
||||
mm_ops->put_page(ptep);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (kvm_pte_table(pte, level)) {
|
||||
childp = kvm_pte_follow(pte);
|
||||
childp = kvm_pte_follow(pte, mm_ops);
|
||||
|
||||
if (page_count(virt_to_page(childp)) != 1)
|
||||
if (mm_ops->page_count(childp) != 1)
|
||||
return 0;
|
||||
} else if (stage2_pte_cacheable(pte)) {
|
||||
need_flush = true;
|
||||
} else if (stage2_pte_cacheable(pgt, pte)) {
|
||||
need_flush = !stage2_has_fwb(pgt);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -691,17 +836,15 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||
* block entry and rely on the remaining portions being faulted
|
||||
* back lazily.
|
||||
*/
|
||||
kvm_set_invalid_pte(ptep);
|
||||
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
|
||||
put_page(virt_to_page(ptep));
|
||||
stage2_put_pte(ptep, mmu, addr, level, mm_ops);
|
||||
|
||||
if (need_flush) {
|
||||
stage2_flush_dcache(kvm_pte_follow(pte),
|
||||
__flush_dcache_area(kvm_pte_follow(pte, mm_ops),
|
||||
kvm_granule_size(level));
|
||||
}
|
||||
|
||||
if (childp)
|
||||
free_page((unsigned long)childp);
|
||||
mm_ops->put_page(childp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -710,7 +853,7 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
|||
{
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_unmap_walker,
|
||||
.arg = pgt->mmu,
|
||||
.arg = pgt,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
|
||||
};
|
||||
|
||||
|
|
@ -842,12 +985,14 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
{
|
||||
struct kvm_pgtable *pgt = arg;
|
||||
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
|
||||
kvm_pte_t pte = *ptep;
|
||||
|
||||
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pte))
|
||||
if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
|
||||
return 0;
|
||||
|
||||
stage2_flush_dcache(kvm_pte_follow(pte), kvm_granule_size(level));
|
||||
__flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -856,30 +1001,35 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
|
|||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_flush_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.arg = pgt,
|
||||
};
|
||||
|
||||
if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
|
||||
if (stage2_has_fwb(pgt))
|
||||
return 0;
|
||||
|
||||
return kvm_pgtable_walk(pgt, addr, size, &walker);
|
||||
}
|
||||
|
||||
int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm)
|
||||
int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
|
||||
struct kvm_pgtable_mm_ops *mm_ops,
|
||||
enum kvm_pgtable_stage2_flags flags)
|
||||
{
|
||||
size_t pgd_sz;
|
||||
u64 vtcr = kvm->arch.vtcr;
|
||||
u64 vtcr = arch->vtcr;
|
||||
u32 ia_bits = VTCR_EL2_IPA(vtcr);
|
||||
u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
|
||||
u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
|
||||
|
||||
pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
|
||||
pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
||||
pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz);
|
||||
if (!pgt->pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
pgt->ia_bits = ia_bits;
|
||||
pgt->start_level = start_level;
|
||||
pgt->mmu = &kvm->arch.mmu;
|
||||
pgt->mm_ops = mm_ops;
|
||||
pgt->mmu = &arch->mmu;
|
||||
pgt->flags = flags;
|
||||
|
||||
/* Ensure zeroed PGD pages are visible to the hardware walker */
|
||||
dsb(ishst);
|
||||
|
|
@ -890,15 +1040,16 @@ static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
|
|||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = arg;
|
||||
kvm_pte_t pte = *ptep;
|
||||
|
||||
if (!kvm_pte_valid(pte))
|
||||
if (!stage2_pte_is_counted(pte))
|
||||
return 0;
|
||||
|
||||
put_page(virt_to_page(ptep));
|
||||
mm_ops->put_page(ptep);
|
||||
|
||||
if (kvm_pte_table(pte, level))
|
||||
free_page((unsigned long)kvm_pte_follow(pte));
|
||||
mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -910,10 +1061,85 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
|||
.cb = stage2_free_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF |
|
||||
KVM_PGTABLE_WALK_TABLE_POST,
|
||||
.arg = pgt->mm_ops,
|
||||
};
|
||||
|
||||
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
|
||||
pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
|
||||
free_pages_exact(pgt->pgd, pgd_sz);
|
||||
pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
|
||||
pgt->pgd = NULL;
|
||||
}
|
||||
|
||||
#define KVM_PTE_LEAF_S2_COMPAT_MASK (KVM_PTE_LEAF_ATTR_S2_PERMS | \
|
||||
KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR | \
|
||||
KVM_PTE_LEAF_ATTR_S2_IGNORED)
|
||||
|
||||
static int stage2_check_permission_walker(u64 addr, u64 end, u32 level,
|
||||
kvm_pte_t *ptep,
|
||||
enum kvm_pgtable_walk_flags flag,
|
||||
void * const arg)
|
||||
{
|
||||
kvm_pte_t old_attr, pte = *ptep, *new_attr = arg;
|
||||
|
||||
/*
|
||||
* Compatible mappings are either invalid and owned by the page-table
|
||||
* owner (whose id is 0), or valid with matching permission attributes.
|
||||
*/
|
||||
if (kvm_pte_valid(pte)) {
|
||||
old_attr = pte & KVM_PTE_LEAF_S2_COMPAT_MASK;
|
||||
if (old_attr != *new_attr)
|
||||
return -EEXIST;
|
||||
} else if (pte) {
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
|
||||
enum kvm_pgtable_prot prot,
|
||||
struct kvm_mem_range *range)
|
||||
{
|
||||
kvm_pte_t attr;
|
||||
struct kvm_pgtable_walker check_perm_walker = {
|
||||
.cb = stage2_check_permission_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF,
|
||||
.arg = &attr,
|
||||
};
|
||||
u64 granule, start, end;
|
||||
u32 level;
|
||||
int ret;
|
||||
|
||||
ret = stage2_set_prot_attr(pgt, prot, &attr);
|
||||
if (ret)
|
||||
return ret;
|
||||
attr &= KVM_PTE_LEAF_S2_COMPAT_MASK;
|
||||
|
||||
for (level = pgt->start_level; level < KVM_PGTABLE_MAX_LEVELS; level++) {
|
||||
granule = kvm_granule_size(level);
|
||||
start = ALIGN_DOWN(addr, granule);
|
||||
end = start + granule;
|
||||
|
||||
if (!kvm_level_supports_block_mapping(level))
|
||||
continue;
|
||||
|
||||
if (start < range->start || range->end < end)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Check the presence of existing mappings with incompatible
|
||||
* permissions within the current block range, and try one level
|
||||
* deeper if one is found.
|
||||
*/
|
||||
ret = kvm_pgtable_walk(pgt, start, granule, &check_perm_walker);
|
||||
if (ret != -EEXIST)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
range->start = start;
|
||||
range->end = end;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
113
arch/arm64/kvm/hyp/reserved_mem.c
Normal file
113
arch/arm64/kvm/hyp/reserved_mem.c
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2020 - Google LLC
|
||||
* Author: Quentin Perret <qperret@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
#include <nvhe/memory.h>
|
||||
#include <nvhe/mm.h>
|
||||
|
||||
static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
|
||||
static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
|
||||
|
||||
phys_addr_t hyp_mem_base;
|
||||
phys_addr_t hyp_mem_size;
|
||||
|
||||
static int cmp_hyp_memblock(const void *p1, const void *p2)
|
||||
{
|
||||
const struct memblock_region *r1 = p1;
|
||||
const struct memblock_region *r2 = p2;
|
||||
|
||||
return r1->base < r2->base ? -1 : (r1->base > r2->base);
|
||||
}
|
||||
|
||||
static void __init sort_memblock_regions(void)
|
||||
{
|
||||
sort(hyp_memory,
|
||||
*hyp_memblock_nr_ptr,
|
||||
sizeof(struct memblock_region),
|
||||
cmp_hyp_memblock,
|
||||
NULL);
|
||||
}
|
||||
|
||||
static int __init register_memblock_regions(void)
|
||||
{
|
||||
struct memblock_region *reg;
|
||||
|
||||
for_each_mem_region(reg) {
|
||||
if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS)
|
||||
return -ENOMEM;
|
||||
|
||||
hyp_memory[*hyp_memblock_nr_ptr] = *reg;
|
||||
(*hyp_memblock_nr_ptr)++;
|
||||
}
|
||||
sort_memblock_regions();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init kvm_hyp_reserve(void)
|
||||
{
|
||||
u64 nr_pages, prev, hyp_mem_pages = 0;
|
||||
int ret;
|
||||
|
||||
if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
|
||||
return;
|
||||
|
||||
if (kvm_get_mode() != KVM_MODE_PROTECTED)
|
||||
return;
|
||||
|
||||
ret = register_memblock_regions();
|
||||
if (ret) {
|
||||
*hyp_memblock_nr_ptr = 0;
|
||||
kvm_err("Failed to register hyp memblocks: %d\n", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
hyp_mem_pages += hyp_s1_pgtable_pages();
|
||||
hyp_mem_pages += host_s2_mem_pgtable_pages();
|
||||
hyp_mem_pages += host_s2_dev_pgtable_pages();
|
||||
|
||||
/*
|
||||
* The hyp_vmemmap needs to be backed by pages, but these pages
|
||||
* themselves need to be present in the vmemmap, so compute the number
|
||||
* of pages needed by looking for a fixed point.
|
||||
*/
|
||||
nr_pages = 0;
|
||||
do {
|
||||
prev = nr_pages;
|
||||
nr_pages = hyp_mem_pages + prev;
|
||||
nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE);
|
||||
nr_pages += __hyp_pgtable_max_pages(nr_pages);
|
||||
} while (nr_pages != prev);
|
||||
hyp_mem_pages += nr_pages;
|
||||
|
||||
/*
|
||||
* Try to allocate a PMD-aligned region to reduce TLB pressure once
|
||||
* this is unmapped from the host stage-2, and fallback to PAGE_SIZE.
|
||||
*/
|
||||
hyp_mem_size = hyp_mem_pages << PAGE_SHIFT;
|
||||
hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
|
||||
ALIGN(hyp_mem_size, PMD_SIZE),
|
||||
PMD_SIZE);
|
||||
if (!hyp_mem_base)
|
||||
hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(),
|
||||
hyp_mem_size, PAGE_SIZE);
|
||||
else
|
||||
hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE);
|
||||
|
||||
if (!hyp_mem_base) {
|
||||
kvm_err("Failed to reserve hyp memory\n");
|
||||
return;
|
||||
}
|
||||
memblock_reserve(hyp_mem_base, hyp_mem_size);
|
||||
|
||||
kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
|
||||
hyp_mem_base);
|
||||
}
|
||||
|
|
@ -27,8 +27,6 @@
|
|||
#include <asm/processor.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
|
||||
|
||||
/* VHE specific context */
|
||||
DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
|
||||
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
|
||||
|
|
@ -207,7 +205,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par)
|
|||
__deactivate_traps(vcpu);
|
||||
sysreg_restore_host_state_vhe(host_ctxt);
|
||||
|
||||
panic(__hyp_panic_string,
|
||||
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n",
|
||||
spsr, elr,
|
||||
read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
|
||||
read_sysreg(hpfar_el2), par, vcpu);
|
||||
|
|
|
|||
|
|
@ -9,16 +9,65 @@
|
|||
#include <kvm/arm_hypercalls.h>
|
||||
#include <kvm/arm_psci.h>
|
||||
|
||||
static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
|
||||
{
|
||||
struct system_time_snapshot systime_snapshot;
|
||||
u64 cycles = ~0UL;
|
||||
u32 feature;
|
||||
|
||||
/*
|
||||
* system time and counter value must captured at the same
|
||||
* time to keep consistency and precision.
|
||||
*/
|
||||
ktime_get_snapshot(&systime_snapshot);
|
||||
|
||||
/*
|
||||
* This is only valid if the current clocksource is the
|
||||
* architected counter, as this is the only one the guest
|
||||
* can see.
|
||||
*/
|
||||
if (systime_snapshot.cs_id != CSID_ARM_ARCH_COUNTER)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The guest selects one of the two reference counters
|
||||
* (virtual or physical) with the first argument of the SMCCC
|
||||
* call. In case the identifier is not supported, error out.
|
||||
*/
|
||||
feature = smccc_get_arg1(vcpu);
|
||||
switch (feature) {
|
||||
case KVM_PTP_VIRT_COUNTER:
|
||||
cycles = systime_snapshot.cycles - vcpu_read_sys_reg(vcpu, CNTVOFF_EL2);
|
||||
break;
|
||||
case KVM_PTP_PHYS_COUNTER:
|
||||
cycles = systime_snapshot.cycles;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* This relies on the top bit of val[0] never being set for
|
||||
* valid values of system time, because that is *really* far
|
||||
* in the future (about 292 years from 1970, and at that stage
|
||||
* nobody will give a damn about it).
|
||||
*/
|
||||
val[0] = upper_32_bits(systime_snapshot.real);
|
||||
val[1] = lower_32_bits(systime_snapshot.real);
|
||||
val[2] = upper_32_bits(cycles);
|
||||
val[3] = lower_32_bits(cycles);
|
||||
}
|
||||
|
||||
int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 func_id = smccc_get_function(vcpu);
|
||||
long val = SMCCC_RET_NOT_SUPPORTED;
|
||||
u64 val[4] = {SMCCC_RET_NOT_SUPPORTED};
|
||||
u32 feature;
|
||||
gpa_t gpa;
|
||||
|
||||
switch (func_id) {
|
||||
case ARM_SMCCC_VERSION_FUNC_ID:
|
||||
val = ARM_SMCCC_VERSION_1_1;
|
||||
val[0] = ARM_SMCCC_VERSION_1_1;
|
||||
break;
|
||||
case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
|
||||
feature = smccc_get_arg1(vcpu);
|
||||
|
|
@ -28,10 +77,10 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
|||
case SPECTRE_VULNERABLE:
|
||||
break;
|
||||
case SPECTRE_MITIGATED:
|
||||
val = SMCCC_RET_SUCCESS;
|
||||
val[0] = SMCCC_RET_SUCCESS;
|
||||
break;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
val = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED;
|
||||
val[0] = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
|
@ -54,22 +103,35 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
|||
break;
|
||||
fallthrough;
|
||||
case SPECTRE_UNAFFECTED:
|
||||
val = SMCCC_RET_NOT_REQUIRED;
|
||||
val[0] = SMCCC_RET_NOT_REQUIRED;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ARM_SMCCC_HV_PV_TIME_FEATURES:
|
||||
val = SMCCC_RET_SUCCESS;
|
||||
val[0] = SMCCC_RET_SUCCESS;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ARM_SMCCC_HV_PV_TIME_FEATURES:
|
||||
val = kvm_hypercall_pv_features(vcpu);
|
||||
val[0] = kvm_hypercall_pv_features(vcpu);
|
||||
break;
|
||||
case ARM_SMCCC_HV_PV_TIME_ST:
|
||||
gpa = kvm_init_stolen_time(vcpu);
|
||||
if (gpa != GPA_INVALID)
|
||||
val = gpa;
|
||||
val[0] = gpa;
|
||||
break;
|
||||
case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID:
|
||||
val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0;
|
||||
val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1;
|
||||
val[2] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2;
|
||||
val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3;
|
||||
break;
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID:
|
||||
val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES);
|
||||
val[0] |= BIT(ARM_SMCCC_KVM_FUNC_PTP);
|
||||
break;
|
||||
case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID:
|
||||
kvm_ptp_get_time(vcpu, val);
|
||||
break;
|
||||
case ARM_SMCCC_TRNG_VERSION:
|
||||
case ARM_SMCCC_TRNG_FEATURES:
|
||||
|
|
@ -81,6 +143,6 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
|
|||
return kvm_psci_call(vcpu);
|
||||
}
|
||||
|
||||
smccc_set_retval(vcpu, val, 0, 0, 0);
|
||||
smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]);
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,6 +88,44 @@ static bool kvm_is_device_pfn(unsigned long pfn)
|
|||
return !pfn_valid(pfn);
|
||||
}
|
||||
|
||||
static void *stage2_memcache_zalloc_page(void *arg)
|
||||
{
|
||||
struct kvm_mmu_memory_cache *mc = arg;
|
||||
|
||||
/* Allocated with __GFP_ZERO, so no need to zero */
|
||||
return kvm_mmu_memory_cache_alloc(mc);
|
||||
}
|
||||
|
||||
static void *kvm_host_zalloc_pages_exact(size_t size)
|
||||
{
|
||||
return alloc_pages_exact(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
|
||||
}
|
||||
|
||||
static void kvm_host_get_page(void *addr)
|
||||
{
|
||||
get_page(virt_to_page(addr));
|
||||
}
|
||||
|
||||
static void kvm_host_put_page(void *addr)
|
||||
{
|
||||
put_page(virt_to_page(addr));
|
||||
}
|
||||
|
||||
static int kvm_host_page_count(void *addr)
|
||||
{
|
||||
return page_count(virt_to_page(addr));
|
||||
}
|
||||
|
||||
static phys_addr_t kvm_host_pa(void *addr)
|
||||
{
|
||||
return __pa(addr);
|
||||
}
|
||||
|
||||
static void *kvm_host_va(phys_addr_t phys)
|
||||
{
|
||||
return __va(phys);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unmapping vs dcache management:
|
||||
*
|
||||
|
|
@ -127,7 +165,7 @@ static bool kvm_is_device_pfn(unsigned long pfn)
|
|||
static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
|
||||
bool may_block)
|
||||
{
|
||||
struct kvm *kvm = mmu->kvm;
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
|
||||
phys_addr_t end = start + size;
|
||||
|
||||
assert_spin_locked(&kvm->mmu_lock);
|
||||
|
|
@ -183,15 +221,39 @@ void free_hyp_pgds(void)
|
|||
if (hyp_pgtable) {
|
||||
kvm_pgtable_hyp_destroy(hyp_pgtable);
|
||||
kfree(hyp_pgtable);
|
||||
hyp_pgtable = NULL;
|
||||
}
|
||||
mutex_unlock(&kvm_hyp_pgd_mutex);
|
||||
}
|
||||
|
||||
static bool kvm_host_owns_hyp_mappings(void)
|
||||
{
|
||||
if (static_branch_likely(&kvm_protected_mode_initialized))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* This can happen at boot time when __create_hyp_mappings() is called
|
||||
* after the hyp protection has been enabled, but the static key has
|
||||
* not been flipped yet.
|
||||
*/
|
||||
if (!hyp_pgtable && is_protected_kvm_enabled())
|
||||
return false;
|
||||
|
||||
WARN_ON(!hyp_pgtable);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __create_hyp_mappings(unsigned long start, unsigned long size,
|
||||
unsigned long phys, enum kvm_pgtable_prot prot)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!kvm_host_owns_hyp_mappings()) {
|
||||
return kvm_call_hyp_nvhe(__pkvm_create_mappings,
|
||||
start, size, phys, prot);
|
||||
}
|
||||
|
||||
mutex_lock(&kvm_hyp_pgd_mutex);
|
||||
err = kvm_pgtable_hyp_map(hyp_pgtable, start, size, phys, prot);
|
||||
mutex_unlock(&kvm_hyp_pgd_mutex);
|
||||
|
|
@ -253,6 +315,16 @@ static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
|
|||
unsigned long base;
|
||||
int ret = 0;
|
||||
|
||||
if (!kvm_host_owns_hyp_mappings()) {
|
||||
base = kvm_call_hyp_nvhe(__pkvm_create_private_mapping,
|
||||
phys_addr, size, prot);
|
||||
if (IS_ERR_OR_NULL((void *)base))
|
||||
return PTR_ERR((void *)base);
|
||||
*haddr = base;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
mutex_lock(&kvm_hyp_pgd_mutex);
|
||||
|
||||
/*
|
||||
|
|
@ -351,6 +423,17 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
|
||||
.zalloc_page = stage2_memcache_zalloc_page,
|
||||
.zalloc_pages_exact = kvm_host_zalloc_pages_exact,
|
||||
.free_pages_exact = free_pages_exact,
|
||||
.get_page = kvm_host_get_page,
|
||||
.put_page = kvm_host_put_page,
|
||||
.page_count = kvm_host_page_count,
|
||||
.phys_to_virt = kvm_host_va,
|
||||
.virt_to_phys = kvm_host_pa,
|
||||
};
|
||||
|
||||
/**
|
||||
* kvm_init_stage2_mmu - Initialise a S2 MMU strucrure
|
||||
* @kvm: The pointer to the KVM structure
|
||||
|
|
@ -374,7 +457,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
|
|||
if (!pgt)
|
||||
return -ENOMEM;
|
||||
|
||||
err = kvm_pgtable_stage2_init(pgt, kvm);
|
||||
err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops);
|
||||
if (err)
|
||||
goto out_free_pgtable;
|
||||
|
||||
|
|
@ -387,7 +470,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
|
|||
for_each_possible_cpu(cpu)
|
||||
*per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1;
|
||||
|
||||
mmu->kvm = kvm;
|
||||
mmu->arch = &kvm->arch;
|
||||
mmu->pgt = pgt;
|
||||
mmu->pgd_phys = __pa(pgt->pgd);
|
||||
mmu->vmid.vmid_gen = 0;
|
||||
|
|
@ -421,10 +504,11 @@ static void stage2_unmap_memslot(struct kvm *kvm,
|
|||
* +--------------------------------------------+
|
||||
*/
|
||||
do {
|
||||
struct vm_area_struct *vma = find_vma(current->mm, hva);
|
||||
struct vm_area_struct *vma;
|
||||
hva_t vm_start, vm_end;
|
||||
|
||||
if (!vma || vma->vm_start >= reg_end)
|
||||
vma = find_vma_intersection(current->mm, hva, reg_end);
|
||||
if (!vma)
|
||||
break;
|
||||
|
||||
/*
|
||||
|
|
@ -469,7 +553,7 @@ void stage2_unmap_vm(struct kvm *kvm)
|
|||
|
||||
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
|
||||
{
|
||||
struct kvm *kvm = mmu->kvm;
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
|
||||
struct kvm_pgtable *pgt = NULL;
|
||||
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
|
|
@ -538,7 +622,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
|
|||
*/
|
||||
static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
|
||||
{
|
||||
struct kvm *kvm = mmu->kvm;
|
||||
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
|
||||
stage2_apply_range_resched(kvm, addr, end, kvm_pgtable_stage2_wrprotect);
|
||||
}
|
||||
|
||||
|
|
@ -555,7 +639,7 @@ static void stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_
|
|||
* Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired,
|
||||
* serializing operations for VM memory regions.
|
||||
*/
|
||||
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
|
||||
static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
|
||||
{
|
||||
struct kvm_memslots *slots = kvm_memslots(kvm);
|
||||
struct kvm_memory_slot *memslot = id_to_memslot(slots, slot);
|
||||
|
|
@ -839,13 +923,18 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
* gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
|
||||
* the page we just got a reference to gets unmapped before we have a
|
||||
* chance to grab the mmu_lock, which ensure that if the page gets
|
||||
* unmapped afterwards, the call to kvm_unmap_hva will take it away
|
||||
* unmapped afterwards, the call to kvm_unmap_gfn will take it away
|
||||
* from us again properly. This smp_rmb() interacts with the smp_wmb()
|
||||
* in kvm_mmu_notifier_invalidate_<page|range_end>.
|
||||
*
|
||||
* Besides, __gfn_to_pfn_memslot() instead of gfn_to_pfn_prot() is
|
||||
* used to avoid unnecessary overhead introduced to locate the memory
|
||||
* slot because it's always fixed even @gfn is adjusted for huge pages.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
|
||||
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
|
||||
write_fault, &writable, NULL);
|
||||
if (pfn == KVM_PFN_ERR_HWPOISON) {
|
||||
kvm_send_hwpoison_signal(hva, vma_shift);
|
||||
return 0;
|
||||
|
|
@ -911,7 +1000,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
|||
/* Mark the page dirty only if the fault is handled successfully */
|
||||
if (writable && !ret) {
|
||||
kvm_set_pfn_dirty(pfn);
|
||||
mark_page_dirty(kvm, gfn);
|
||||
mark_page_dirty_in_slot(kvm, memslot, gfn);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
|
|
@ -1064,126 +1153,70 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int handle_hva_to_gpa(struct kvm *kvm,
|
||||
unsigned long start,
|
||||
unsigned long end,
|
||||
int (*handler)(struct kvm *kvm,
|
||||
gpa_t gpa, u64 size,
|
||||
void *data),
|
||||
void *data)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int ret = 0;
|
||||
|
||||
slots = kvm_memslots(kvm);
|
||||
|
||||
/* we only care about the pages that the guest sees */
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
unsigned long hva_start, hva_end;
|
||||
gfn_t gpa;
|
||||
|
||||
hva_start = max(start, memslot->userspace_addr);
|
||||
hva_end = min(end, memslot->userspace_addr +
|
||||
(memslot->npages << PAGE_SHIFT));
|
||||
if (hva_start >= hva_end)
|
||||
continue;
|
||||
|
||||
gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT;
|
||||
ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
|
||||
{
|
||||
unsigned flags = *(unsigned *)data;
|
||||
bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
|
||||
|
||||
__unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_unmap_hva_range(struct kvm *kvm,
|
||||
unsigned long start, unsigned long end, unsigned flags)
|
||||
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
return 0;
|
||||
|
||||
trace_kvm_unmap_hva_range(start, end);
|
||||
handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
|
||||
__unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
|
||||
(range->end - range->start) << PAGE_SHIFT,
|
||||
range->may_block);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
|
||||
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
kvm_pfn_t *pfn = (kvm_pfn_t *)data;
|
||||
|
||||
WARN_ON(size != PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* The MMU notifiers will have unmapped a huge PMD before calling
|
||||
* ->change_pte() (which in turn calls kvm_set_spte_hva()) and
|
||||
* therefore we never need to clear out a huge PMD through this
|
||||
* calling path and a memcache is not required.
|
||||
*/
|
||||
kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, gpa, PAGE_SIZE,
|
||||
__pfn_to_phys(*pfn), KVM_PGTABLE_PROT_R, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
|
||||
{
|
||||
unsigned long end = hva + PAGE_SIZE;
|
||||
kvm_pfn_t pfn = pte_pfn(pte);
|
||||
kvm_pfn_t pfn = pte_pfn(range->pte);
|
||||
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
return 0;
|
||||
|
||||
trace_kvm_set_spte_hva(hva);
|
||||
WARN_ON(range->end - range->start != 1);
|
||||
|
||||
/*
|
||||
* We've moved a page around, probably through CoW, so let's treat it
|
||||
* just like a translation fault and clean the cache to the PoC.
|
||||
*/
|
||||
clean_dcache_guest_page(pfn, PAGE_SIZE);
|
||||
handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pfn);
|
||||
|
||||
/*
|
||||
* The MMU notifiers will have unmapped a huge PMD before calling
|
||||
* ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
|
||||
* therefore we never need to clear out a huge PMD through this
|
||||
* calling path and a memcache is not required.
|
||||
*/
|
||||
kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
|
||||
PAGE_SIZE, __pfn_to_phys(pfn),
|
||||
KVM_PGTABLE_PROT_R, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
|
||||
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
pte_t pte;
|
||||
u64 size = (range->end - range->start) << PAGE_SHIFT;
|
||||
kvm_pte_t kpte;
|
||||
pte_t pte;
|
||||
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
return 0;
|
||||
|
||||
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
|
||||
kpte = kvm_pgtable_stage2_mkold(kvm->arch.mmu.pgt, gpa);
|
||||
|
||||
kpte = kvm_pgtable_stage2_mkold(kvm->arch.mmu.pgt,
|
||||
range->start << PAGE_SHIFT);
|
||||
pte = __pte(kpte);
|
||||
return pte_valid(pte) && pte_young(pte);
|
||||
}
|
||||
|
||||
static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
|
||||
{
|
||||
WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
|
||||
return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt, gpa);
|
||||
}
|
||||
|
||||
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
|
||||
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
|
||||
{
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
return 0;
|
||||
trace_kvm_age_hva(start, end);
|
||||
return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
|
||||
}
|
||||
|
||||
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
|
||||
{
|
||||
if (!kvm->arch.mmu.pgt)
|
||||
return 0;
|
||||
trace_kvm_test_age_hva(hva);
|
||||
return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE,
|
||||
kvm_test_age_hva_handler, NULL);
|
||||
return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
|
||||
range->start << PAGE_SHIFT);
|
||||
}
|
||||
|
||||
phys_addr_t kvm_mmu_get_httbr(void)
|
||||
|
|
@ -1208,10 +1241,22 @@ static int kvm_map_idmap_text(void)
|
|||
return err;
|
||||
}
|
||||
|
||||
int kvm_mmu_init(void)
|
||||
static void *kvm_hyp_zalloc_page(void *arg)
|
||||
{
|
||||
return (void *)get_zeroed_page(GFP_KERNEL);
|
||||
}
|
||||
|
||||
static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = {
|
||||
.zalloc_page = kvm_hyp_zalloc_page,
|
||||
.get_page = kvm_host_get_page,
|
||||
.put_page = kvm_host_put_page,
|
||||
.phys_to_virt = kvm_host_va,
|
||||
.virt_to_phys = kvm_host_pa,
|
||||
};
|
||||
|
||||
int kvm_mmu_init(u32 *hyp_va_bits)
|
||||
{
|
||||
int err;
|
||||
u32 hyp_va_bits;
|
||||
|
||||
hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
|
||||
hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
|
||||
|
|
@ -1225,8 +1270,8 @@ int kvm_mmu_init(void)
|
|||
*/
|
||||
BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
|
||||
|
||||
hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
|
||||
kvm_debug("Using %u-bit virtual addresses at EL2\n", hyp_va_bits);
|
||||
*hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
|
||||
kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits);
|
||||
kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
|
||||
kvm_debug("HYP VA range: %lx:%lx\n",
|
||||
kern_hyp_va(PAGE_OFFSET),
|
||||
|
|
@ -1251,7 +1296,7 @@ int kvm_mmu_init(void)
|
|||
goto out;
|
||||
}
|
||||
|
||||
err = kvm_pgtable_hyp_init(hyp_pgtable, hyp_va_bits);
|
||||
err = kvm_pgtable_hyp_init(hyp_pgtable, *hyp_va_bits, &kvm_hyp_mm_ops);
|
||||
if (err)
|
||||
goto out_free_pgtable;
|
||||
|
||||
|
|
@ -1329,10 +1374,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
|
|||
* +--------------------------------------------+
|
||||
*/
|
||||
do {
|
||||
struct vm_area_struct *vma = find_vma(current->mm, hva);
|
||||
struct vm_area_struct *vma;
|
||||
hva_t vm_start, vm_end;
|
||||
|
||||
if (!vma || vma->vm_start >= reg_end)
|
||||
vma = find_vma_intersection(current->mm, hva, reg_end);
|
||||
if (!vma)
|
||||
break;
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -50,12 +50,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
|
|||
|
||||
int kvm_perf_init(void)
|
||||
{
|
||||
/*
|
||||
* Check if HW_PERF_EVENTS are supported by checking the number of
|
||||
* hardware performance counters. This could ensure the presence of
|
||||
* a physical PMU and CONFIG_PERF_EVENT is selected.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0)
|
||||
if (kvm_pmu_probe_pmuver() != 0xf && !is_protected_kvm_enabled())
|
||||
static_branch_enable(&kvm_arm_pmu_available);
|
||||
|
||||
return perf_register_guest_info_callbacks(&kvm_guest_cbs);
|
||||
|
|
|
|||
|
|
@ -739,7 +739,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
|
|||
kvm_pmu_create_perf_event(vcpu, select_idx);
|
||||
}
|
||||
|
||||
static int kvm_pmu_probe_pmuver(void)
|
||||
int kvm_pmu_probe_pmuver(void)
|
||||
{
|
||||
struct perf_event_attr attr = { };
|
||||
struct perf_event *event;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr)
|
|||
{
|
||||
struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
|
||||
|
||||
if (!ctx || !kvm_pmu_switch_needed(attr))
|
||||
if (!kvm_arm_support_pmu_v3() || !ctx || !kvm_pmu_switch_needed(attr))
|
||||
return;
|
||||
|
||||
if (!attr->exclude_host)
|
||||
|
|
@ -49,7 +49,7 @@ void kvm_clr_pmu_events(u32 clr)
|
|||
{
|
||||
struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data);
|
||||
|
||||
if (!ctx)
|
||||
if (!kvm_arm_support_pmu_v3() || !ctx)
|
||||
return;
|
||||
|
||||
ctx->pmu_events.events_host &= ~clr;
|
||||
|
|
@ -172,7 +172,7 @@ void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu)
|
|||
struct kvm_host_data *host;
|
||||
u32 events_guest, events_host;
|
||||
|
||||
if (!has_vhe())
|
||||
if (!kvm_arm_support_pmu_v3() || !has_vhe())
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
|
|
@ -193,7 +193,7 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu)
|
|||
struct kvm_host_data *host;
|
||||
u32 events_guest, events_host;
|
||||
|
||||
if (!has_vhe())
|
||||
if (!kvm_arm_support_pmu_v3() || !has_vhe())
|
||||
return;
|
||||
|
||||
host = this_cpu_ptr_hyp_sym(kvm_host_data);
|
||||
|
|
|
|||
|
|
@ -74,10 +74,6 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
|
|||
if (!system_supports_sve())
|
||||
return -EINVAL;
|
||||
|
||||
/* Verify that KVM startup enforced this when SVE was detected: */
|
||||
if (WARN_ON(!has_vhe()))
|
||||
return -EINVAL;
|
||||
|
||||
vcpu->arch.sve_max_vl = kvm_sve_max_vl;
|
||||
|
||||
/*
|
||||
|
|
@ -242,6 +238,11 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
|
|||
|
||||
/* Reset core registers */
|
||||
memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu)));
|
||||
memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs));
|
||||
vcpu->arch.ctxt.spsr_abt = 0;
|
||||
vcpu->arch.ctxt.spsr_und = 0;
|
||||
vcpu->arch.ctxt.spsr_irq = 0;
|
||||
vcpu->arch.ctxt.spsr_fiq = 0;
|
||||
vcpu_gp_regs(vcpu)->pstate = pstate;
|
||||
|
||||
/* Reset system registers */
|
||||
|
|
@ -333,19 +334,10 @@ int kvm_set_ipa_limit(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Configure the VTCR_EL2 for this VM. The VTCR value is common
|
||||
* across all the physical CPUs on the system. We use system wide
|
||||
* sanitised values to fill in different fields, except for Hardware
|
||||
* Management of Access Flags. HA Flag is set unconditionally on
|
||||
* all CPUs, as it is safe to run with or without the feature and
|
||||
* the bit is RES0 on CPUs that don't support it.
|
||||
*/
|
||||
int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
|
||||
{
|
||||
u64 vtcr = VTCR_EL2_FLAGS, mmfr0;
|
||||
u32 parange, phys_shift;
|
||||
u8 lvls;
|
||||
u64 mmfr0, mmfr1;
|
||||
u32 phys_shift;
|
||||
|
||||
if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
|
||||
return -EINVAL;
|
||||
|
|
@ -365,33 +357,8 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
|
|||
}
|
||||
|
||||
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
|
||||
parange = cpuid_feature_extract_unsigned_field(mmfr0,
|
||||
ID_AA64MMFR0_PARANGE_SHIFT);
|
||||
if (parange > ID_AA64MMFR0_PARANGE_MAX)
|
||||
parange = ID_AA64MMFR0_PARANGE_MAX;
|
||||
vtcr |= parange << VTCR_EL2_PS_SHIFT;
|
||||
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
|
||||
kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
|
||||
|
||||
vtcr |= VTCR_EL2_T0SZ(phys_shift);
|
||||
/*
|
||||
* Use a minimum 2 level page table to prevent splitting
|
||||
* host PMD huge pages at stage2.
|
||||
*/
|
||||
lvls = stage2_pgtable_levels(phys_shift);
|
||||
if (lvls < 2)
|
||||
lvls = 2;
|
||||
vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
|
||||
|
||||
/*
|
||||
* Enable the Hardware Access Flag management, unconditionally
|
||||
* on all CPUs. The features is RES0 on CPUs without the support
|
||||
* and must be ignored by the CPUs.
|
||||
*/
|
||||
vtcr |= VTCR_EL2_HA;
|
||||
|
||||
/* Set the vmid bits */
|
||||
vtcr |= (kvm_get_vmid_bits() == 16) ?
|
||||
VTCR_EL2_VS_16BIT :
|
||||
VTCR_EL2_VS_8BIT;
|
||||
kvm->arch.vtcr = vtcr;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1063,6 +1063,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
|
|||
val = cpuid_feature_cap_perfmon_field(val,
|
||||
ID_AA64DFR0_PMUVER_SHIFT,
|
||||
kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_PMUVER_8_4 : 0);
|
||||
/* Hide SPE from guests */
|
||||
val &= ~FEATURE(ID_AA64DFR0_PMSVER);
|
||||
break;
|
||||
case SYS_ID_DFR0_EL1:
|
||||
/* Limit guests to PMUv3 for ARMv8.4 */
|
||||
|
|
@ -1472,6 +1474,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||
{ SYS_DESC(SYS_GCR_EL1), undef_access },
|
||||
|
||||
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
|
||||
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
|
||||
{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
|
||||
{ SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
|
||||
|
|
@ -1501,6 +1504,19 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
|||
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
|
||||
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
|
||||
|
||||
{ SYS_DESC(SYS_PMSCR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMSNEVFR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMSICR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMSIRR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMSFCR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMSEVFR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMSLATFR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMSIDR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMBLIMITR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMBPTR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_PMBSR_EL1), undef_access },
|
||||
/* PMBIDR_EL1 is not trapped */
|
||||
|
||||
{ PMU_SYS_REG(SYS_PMINTENSET_EL1),
|
||||
.access = access_pminten, .reg = PMINTENSET_EL1 },
|
||||
{ PMU_SYS_REG(SYS_PMINTENCLR_EL1),
|
||||
|
|
|
|||
|
|
@ -135,72 +135,6 @@ TRACE_EVENT(kvm_mmio_emulate,
|
|||
__entry->vcpu_pc, __entry->instr, __entry->cpsr)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_unmap_hva_range,
|
||||
TP_PROTO(unsigned long start, unsigned long end),
|
||||
TP_ARGS(start, end),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, start )
|
||||
__field( unsigned long, end )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->start = start;
|
||||
__entry->end = end;
|
||||
),
|
||||
|
||||
TP_printk("mmu notifier unmap range: %#016lx -- %#016lx",
|
||||
__entry->start, __entry->end)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_set_spte_hva,
|
||||
TP_PROTO(unsigned long hva),
|
||||
TP_ARGS(hva),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, hva )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->hva = hva;
|
||||
),
|
||||
|
||||
TP_printk("mmu notifier set pte hva: %#016lx", __entry->hva)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_age_hva,
|
||||
TP_PROTO(unsigned long start, unsigned long end),
|
||||
TP_ARGS(start, end),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, start )
|
||||
__field( unsigned long, end )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->start = start;
|
||||
__entry->end = end;
|
||||
),
|
||||
|
||||
TP_printk("mmu notifier age hva: %#016lx -- %#016lx",
|
||||
__entry->start, __entry->end)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_test_age_hva,
|
||||
TP_PROTO(unsigned long hva),
|
||||
TP_ARGS(hva),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( unsigned long, hva )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->hva = hva;
|
||||
),
|
||||
|
||||
TP_printk("mmu notifier test age hva: %#016lx", __entry->hva)
|
||||
);
|
||||
|
||||
TRACE_EVENT(kvm_set_way_flush,
|
||||
TP_PROTO(unsigned long vcpu_pc, bool cache),
|
||||
TP_ARGS(vcpu_pc, cache),
|
||||
|
|
|
|||
|
|
@ -288,3 +288,10 @@ void kvm_get_kimage_voffset(struct alt_instr *alt,
|
|||
{
|
||||
generate_mov_q(kimage_voffset, origptr, updptr, nr_inst);
|
||||
}
|
||||
|
||||
void kvm_compute_final_ctr_el0(struct alt_instr *alt,
|
||||
__le32 *origptr, __le32 *updptr, int nr_inst)
|
||||
{
|
||||
generate_mov_q(read_sanitised_ftr_reg(SYS_CTR_EL0),
|
||||
origptr, updptr, nr_inst);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -335,13 +335,14 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
|
|||
kfree(dist->spis);
|
||||
dist->spis = NULL;
|
||||
dist->nr_spis = 0;
|
||||
dist->vgic_dist_base = VGIC_ADDR_UNDEF;
|
||||
|
||||
if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
||||
list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) {
|
||||
list_del(&rdreg->list);
|
||||
kfree(rdreg);
|
||||
}
|
||||
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
||||
list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
|
||||
vgic_v3_free_redist_region(rdreg);
|
||||
INIT_LIST_HEAD(&dist->rd_regions);
|
||||
} else {
|
||||
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
|
||||
}
|
||||
|
||||
if (vgic_has_its(kvm))
|
||||
|
|
@ -362,6 +363,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|||
vgic_flush_pending_lpis(vcpu);
|
||||
|
||||
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
|
||||
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
|
||||
}
|
||||
|
||||
/* To be called with kvm->lock held */
|
||||
|
|
|
|||
|
|
@ -2218,10 +2218,10 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
|
|||
/*
|
||||
* If an LPI carries the HW bit, this means that this
|
||||
* interrupt is controlled by GICv4, and we do not
|
||||
* have direct access to that state. Let's simply fail
|
||||
* the save operation...
|
||||
* have direct access to that state without GICv4.1.
|
||||
* Let's simply fail the save operation...
|
||||
*/
|
||||
if (ite->irq->hw)
|
||||
if (ite->irq->hw && !kvm_vgic_global_state.has_gicv4_1)
|
||||
return -EACCES;
|
||||
|
||||
ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz);
|
||||
|
|
|
|||
|
|
@ -87,8 +87,8 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
|
|||
r = vgic_v3_set_redist_base(kvm, 0, *addr, 0);
|
||||
goto out;
|
||||
}
|
||||
rdreg = list_first_entry(&vgic->rd_regions,
|
||||
struct vgic_redist_region, list);
|
||||
rdreg = list_first_entry_or_null(&vgic->rd_regions,
|
||||
struct vgic_redist_region, list);
|
||||
if (!rdreg)
|
||||
addr_ptr = &undef_value;
|
||||
else
|
||||
|
|
@ -226,6 +226,9 @@ static int vgic_get_common_attr(struct kvm_device *dev,
|
|||
u64 addr;
|
||||
unsigned long type = (unsigned long)attr->attr;
|
||||
|
||||
if (copy_from_user(&addr, uaddr, sizeof(addr)))
|
||||
return -EFAULT;
|
||||
|
||||
r = kvm_vgic_addr(dev->kvm, type, &addr, false);
|
||||
if (r)
|
||||
return (r == -ENODEV) ? -ENXIO : r;
|
||||
|
|
|
|||
|
|
@ -251,45 +251,52 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
|
|||
vgic_enable_lpis(vcpu);
|
||||
}
|
||||
|
||||
static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_redist_region *iter, *rdreg = vgic_cpu->rdreg;
|
||||
|
||||
if (!rdreg)
|
||||
return false;
|
||||
|
||||
if (vgic_cpu->rdreg_index < rdreg->free_index - 1) {
|
||||
return false;
|
||||
} else if (rdreg->count && vgic_cpu->rdreg_index == (rdreg->count - 1)) {
|
||||
struct list_head *rd_regions = &vgic->rd_regions;
|
||||
gpa_t end = rdreg->base + rdreg->count * KVM_VGIC_V3_REDIST_SIZE;
|
||||
|
||||
/*
|
||||
* the rdist is the last one of the redist region,
|
||||
* check whether there is no other contiguous rdist region
|
||||
*/
|
||||
list_for_each_entry(iter, rd_regions, list) {
|
||||
if (iter->base == end && iter->free_index > 0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
|
||||
int target_vcpu_id = vcpu->vcpu_id;
|
||||
gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
|
||||
(rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;
|
||||
u64 value;
|
||||
|
||||
value = (u64)(mpidr & GENMASK(23, 0)) << 32;
|
||||
value |= ((target_vcpu_id & 0xffff) << 8);
|
||||
|
||||
if (addr == last_rdist_typer)
|
||||
if (vgic_has_its(vcpu->kvm))
|
||||
value |= GICR_TYPER_PLPIS;
|
||||
|
||||
if (vgic_mmio_vcpu_rdist_is_last(vcpu))
|
||||
value |= GICR_TYPER_LAST;
|
||||
if (vgic_has_its(vcpu->kvm))
|
||||
value |= GICR_TYPER_PLPIS;
|
||||
|
||||
return extract_bytes(value, addr & 7, len);
|
||||
}
|
||||
|
||||
static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
|
||||
int target_vcpu_id = vcpu->vcpu_id;
|
||||
u64 value;
|
||||
|
||||
value = (u64)(mpidr & GENMASK(23, 0)) << 32;
|
||||
value |= ((target_vcpu_id & 0xffff) << 8);
|
||||
|
||||
if (vgic_has_its(vcpu->kvm))
|
||||
value |= GICR_TYPER_PLPIS;
|
||||
|
||||
/* reporting of the Last bit is not supported for userspace */
|
||||
return extract_bytes(value, addr & 7, len);
|
||||
}
|
||||
|
||||
static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
|
|
@ -612,7 +619,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
|
|||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER,
|
||||
vgic_mmio_read_v3r_typer, vgic_mmio_write_wi,
|
||||
vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8,
|
||||
NULL, vgic_mmio_uaccess_write_wi, 8,
|
||||
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
|
||||
vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
|
||||
|
|
@ -714,6 +721,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
|
|||
return -EINVAL;
|
||||
|
||||
vgic_cpu->rdreg = rdreg;
|
||||
vgic_cpu->rdreg_index = rdreg->free_index;
|
||||
|
||||
rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
|
||||
|
||||
|
|
@ -768,7 +776,7 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
|
|||
}
|
||||
|
||||
/**
|
||||
* vgic_v3_insert_redist_region - Insert a new redistributor region
|
||||
* vgic_v3_alloc_redist_region - Allocate a new redistributor region
|
||||
*
|
||||
* Performs various checks before inserting the rdist region in the list.
|
||||
* Those tests depend on whether the size of the rdist region is known
|
||||
|
|
@ -782,8 +790,8 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
|
|||
*
|
||||
* Return 0 on success, < 0 otherwise
|
||||
*/
|
||||
static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
|
||||
gpa_t base, uint32_t count)
|
||||
static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
|
||||
gpa_t base, uint32_t count)
|
||||
{
|
||||
struct vgic_dist *d = &kvm->arch.vgic;
|
||||
struct vgic_redist_region *rdreg;
|
||||
|
|
@ -791,10 +799,6 @@ static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
|
|||
size_t size = count * KVM_VGIC_V3_REDIST_SIZE;
|
||||
int ret;
|
||||
|
||||
/* single rdist region already set ?*/
|
||||
if (!count && !list_empty(rd_regions))
|
||||
return -EINVAL;
|
||||
|
||||
/* cross the end of memory ? */
|
||||
if (base + size < base)
|
||||
return -EINVAL;
|
||||
|
|
@ -805,11 +809,15 @@ static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
|
|||
} else {
|
||||
rdreg = list_last_entry(rd_regions,
|
||||
struct vgic_redist_region, list);
|
||||
if (index != rdreg->index + 1)
|
||||
|
||||
/* Don't mix single region and discrete redist regions */
|
||||
if (!count && rdreg->count)
|
||||
return -EINVAL;
|
||||
|
||||
/* Cannot add an explicitly sized regions after legacy region */
|
||||
if (!rdreg->count)
|
||||
if (!count)
|
||||
return -EEXIST;
|
||||
|
||||
if (index != rdreg->index + 1)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
|
@ -848,11 +856,17 @@ static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
|
|||
return ret;
|
||||
}
|
||||
|
||||
void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg)
|
||||
{
|
||||
list_del(&rdreg->list);
|
||||
kfree(rdreg);
|
||||
}
|
||||
|
||||
int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = vgic_v3_insert_redist_region(kvm, index, addr, count);
|
||||
ret = vgic_v3_alloc_redist_region(kvm, index, addr, count);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
|
@ -861,8 +875,13 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
|
|||
* afterwards will register the iodevs when needed.
|
||||
*/
|
||||
ret = vgic_register_all_redist_iodevs(kvm);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
struct vgic_redist_region *rdreg;
|
||||
|
||||
rdreg = vgic_v3_rdist_region_from_index(kvm, index);
|
||||
vgic_v3_free_redist_region(rdreg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -938,10 +938,9 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
|
|||
return region;
|
||||
}
|
||||
|
||||
static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
|
||||
static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
|
||||
gpa_t addr, u32 *val)
|
||||
{
|
||||
struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
|
||||
const struct vgic_register_region *region;
|
||||
struct kvm_vcpu *r_vcpu;
|
||||
|
||||
|
|
@ -960,10 +959,9 @@ static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
|
||||
static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
|
||||
gpa_t addr, const u32 *val)
|
||||
{
|
||||
struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
|
||||
const struct vgic_register_region *region;
|
||||
struct kvm_vcpu *r_vcpu;
|
||||
|
||||
|
|
@ -986,9 +984,9 @@ int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
|
|||
bool is_write, int offset, u32 *val)
|
||||
{
|
||||
if (is_write)
|
||||
return vgic_uaccess_write(vcpu, &dev->dev, offset, val);
|
||||
return vgic_uaccess_write(vcpu, dev, offset, val);
|
||||
else
|
||||
return vgic_uaccess_read(vcpu, &dev->dev, offset, val);
|
||||
return vgic_uaccess_read(vcpu, dev, offset, val);
|
||||
}
|
||||
|
||||
static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/irqchip/arm-gic-v3.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/irqdomain.h>
|
||||
#include <linux/kvm.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <kvm/arm_vgic.h>
|
||||
|
|
@ -356,6 +358,32 @@ int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The deactivation of the doorbell interrupt will trigger the
|
||||
* unmapping of the associated vPE.
|
||||
*/
|
||||
static void unmap_all_vpes(struct vgic_dist *dist)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dist->its_vm.nr_vpes; i++) {
|
||||
desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
|
||||
irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
|
||||
}
|
||||
}
|
||||
|
||||
static void map_all_vpes(struct vgic_dist *dist)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dist->its_vm.nr_vpes; i++) {
|
||||
desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
|
||||
irq_domain_activate_irq(irq_desc_get_irq_data(desc), false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* vgic_v3_save_pending_tables - Save the pending tables into guest RAM
|
||||
* kvm lock and all vcpu lock must be held
|
||||
|
|
@ -365,13 +393,28 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
|
|||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
struct vgic_irq *irq;
|
||||
gpa_t last_ptr = ~(gpa_t)0;
|
||||
int ret;
|
||||
bool vlpi_avail = false;
|
||||
int ret = 0;
|
||||
u8 val;
|
||||
|
||||
if (unlikely(!vgic_initialized(kvm)))
|
||||
return -ENXIO;
|
||||
|
||||
/*
|
||||
* A preparation for getting any VLPI states.
|
||||
* The above vgic initialized check also ensures that the allocation
|
||||
* and enabling of the doorbells have already been done.
|
||||
*/
|
||||
if (kvm_vgic_global_state.has_gicv4_1) {
|
||||
unmap_all_vpes(dist);
|
||||
vlpi_avail = true;
|
||||
}
|
||||
|
||||
list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
|
||||
int byte_offset, bit_nr;
|
||||
struct kvm_vcpu *vcpu;
|
||||
gpa_t pendbase, ptr;
|
||||
bool is_pending;
|
||||
bool stored;
|
||||
|
||||
vcpu = irq->target_vcpu;
|
||||
|
|
@ -387,24 +430,35 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
|
|||
if (ptr != last_ptr) {
|
||||
ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
last_ptr = ptr;
|
||||
}
|
||||
|
||||
stored = val & (1U << bit_nr);
|
||||
if (stored == irq->pending_latch)
|
||||
|
||||
is_pending = irq->pending_latch;
|
||||
|
||||
if (irq->hw && vlpi_avail)
|
||||
vgic_v4_get_vlpi_state(irq, &is_pending);
|
||||
|
||||
if (stored == is_pending)
|
||||
continue;
|
||||
|
||||
if (irq->pending_latch)
|
||||
if (is_pending)
|
||||
val |= 1 << bit_nr;
|
||||
else
|
||||
val &= ~(1 << bit_nr);
|
||||
|
||||
ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
return 0;
|
||||
|
||||
out:
|
||||
if (vlpi_avail)
|
||||
map_all_vpes(dist);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user