diff --git a/.mailmap b/.mailmap index b9d358217586..93458154ce7d 100644 --- a/.mailmap +++ b/.mailmap @@ -391,6 +391,10 @@ Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König Valdis Kletnieks +Vasily Averin +Vasily Averin +Vasily Averin +Vasily Averin Vinod Koul Vinod Koul Vinod Koul diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst index ad168d16968f..867a4bba6bf6 100644 --- a/Documentation/dev-tools/kunit/start.rst +++ b/Documentation/dev-tools/kunit/start.rst @@ -41,13 +41,18 @@ or ``VFAT_FS``. To run ``FAT_KUNIT_TEST``, the ``.kunitconfig`` has: CONFIG_MSDOS_FS=y CONFIG_FAT_KUNIT_TEST=y -1. A good starting point for the ``.kunitconfig``, is the KUnit default - config. Run the command: +1. A good starting point for the ``.kunitconfig`` is the KUnit default config. + You can generate it by running: .. code-block:: bash cd $PATH_TO_LINUX_REPO - cp tools/testing/kunit/configs/default.config .kunitconfig + tools/testing/kunit/kunit.py config + cat .kunit/.kunitconfig + +.. note :: + ``.kunitconfig`` lives in the ``--build_dir`` used by kunit.py, which is + ``.kunit`` by default. .. note :: You may want to remove CONFIG_KUNIT_ALL_TESTS from the ``.kunitconfig`` as diff --git a/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml b/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml index 62c3bd4cb28d..4f0b7c71313c 100644 --- a/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml +++ b/Documentation/devicetree/bindings/display/bridge/chipone,icn6211.yaml @@ -41,17 +41,32 @@ properties: properties: port@0: - $ref: /schemas/graph.yaml#/properties/port + $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: Video port for MIPI DSI input + properties: + endpoint: + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false + + properties: + data-lanes: + description: array of physical DSI data lane indexes. + minItems: 1 + items: + - const: 1 + - const: 2 + - const: 3 + - const: 4 + port@1: $ref: /schemas/graph.yaml#/properties/port description: Video port for MIPI DPI output (panel or connector). required: - - port@0 - port@1 required: diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml index 6ec1d5fbb8bc..c6e81f532215 100644 --- a/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml @@ -38,6 +38,9 @@ properties: interrupts: maxItems: 1 + "#sound-dai-cells": + const: 0 + ports: $ref: /schemas/graph.yaml#/properties/ports diff --git a/Documentation/devicetree/bindings/display/bridge/lontium,lt9211.yaml b/Documentation/devicetree/bindings/display/bridge/lontium,lt9211.yaml new file mode 100644 index 000000000000..9a6e9b25d14a --- /dev/null +++ b/Documentation/devicetree/bindings/display/bridge/lontium,lt9211.yaml @@ -0,0 +1,117 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/bridge/lontium,lt9211.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Lontium LT9211 DSI/LVDS/DPI to DSI/LVDS/DPI bridge. + +maintainers: + - Marek Vasut + +description: | + The LT9211 are bridge devices which convert Single/Dual-Link DSI/LVDS + or Single DPI to Single/Dual-Link DSI/LVDS or Single DPI. + +properties: + compatible: + enum: + - lontium,lt9211 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + reset-gpios: + maxItems: 1 + description: GPIO connected to active high RESET pin. + + vccio-supply: + description: Regulator for 1.8V IO power. + + ports: + $ref: /schemas/graph.yaml#/properties/ports + + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: + Primary MIPI DSI port-1 for MIPI input or + LVDS port-1 for LVDS input or DPI input. + + port@1: + $ref: /schemas/graph.yaml#/properties/port + description: + Additional MIPI port-2 for MIPI input or LVDS port-2 + for LVDS input. Used in combination with primary + port-1 to drive higher resolution displays + + port@2: + $ref: /schemas/graph.yaml#/properties/port + description: + Primary MIPI DSI port-1 for MIPI output or + LVDS port-1 for LVDS output or DPI output. + + port@3: + $ref: /schemas/graph.yaml#/properties/port + description: + Additional MIPI port-2 for MIPI output or LVDS port-2 + for LVDS output. Used in combination with primary + port-1 to drive higher resolution displays. + + required: + - port@0 + - port@2 + +required: + - compatible + - reg + - vccio-supply + - ports + +additionalProperties: false + +examples: + - | + #include + #include + + i2c { + #address-cells = <1>; + #size-cells = <0>; + + hdmi-bridge@3b { + compatible = "lontium,lt9211"; + reg = <0x3b>; + + reset-gpios = <&tlmm 128 GPIO_ACTIVE_HIGH>; + interrupts-extended = <&tlmm 84 IRQ_TYPE_EDGE_FALLING>; + + vccio-supply = <<9211_1v8>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + endpoint { + remote-endpoint = <&dsi0_out>; + }; + }; + + port@2 { + reg = <2>; + + endpoint { + remote-endpoint = <&panel_in_lvds>; + }; + }; + }; + }; + }; + +... diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml index 5216c27fc0ad..a412a1da950f 100644 --- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml +++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358762.yaml @@ -39,7 +39,6 @@ properties: Video port for MIPI DPI output (panel or connector). required: - - port@0 - port@1 required: diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml index f1541cc05297..ed280053ec62 100644 --- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml +++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml @@ -53,16 +53,32 @@ properties: properties: port@0: - $ref: /schemas/graph.yaml#/properties/port + $ref: /schemas/graph.yaml#/$defs/port-base + unevaluatedProperties: false description: | DSI input port. The remote endpoint phandle should be a reference to a valid DSI output endpoint node + properties: + endpoint: + $ref: /schemas/media/video-interfaces.yaml# + unevaluatedProperties: false + + properties: + data-lanes: + description: array of physical DSI data lane indexes. + minItems: 1 + items: + - const: 1 + - const: 2 + - const: 3 + - const: 4 + port@1: $ref: /schemas/graph.yaml#/properties/port description: | - DPI input port. The remote endpoint phandle should be a - reference to a valid DPI output endpoint node + DPI input/output port. The remote endpoint phandle should be a + reference to a valid DPI output or input endpoint node. port@2: $ref: /schemas/graph.yaml#/properties/port diff --git a/Documentation/devicetree/bindings/display/panel/leadtek,ltk035c5444t.yaml b/Documentation/devicetree/bindings/display/panel/leadtek,ltk035c5444t.yaml new file mode 100644 index 000000000000..817a9bed7d5a --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/leadtek,ltk035c5444t.yaml @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/leadtek,ltk035c5444t.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Leadtek ltk035c5444t 3.5" (640x480 pixels) 24-bit IPS LCD panel + +maintainers: + - Paul Cercueil + - Christophe Branchereau + +allOf: + - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# + +properties: + compatible: + const: leadtek,ltk035c5444t + + backlight: true + port: true + power-supply: true + reg: true + reset-gpios: true + +required: + - compatible + - power-supply + - reset-gpios + +unevaluatedProperties: false + +examples: + - | + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + panel@0 { + compatible = "leadtek,ltk035c5444t"; + reg = <0>; + + spi-3wire; + spi-max-frequency = <3125000>; + + reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>; + + backlight = <&backlight>; + power-supply = <&vcc>; + + port { + panel_input: endpoint { + remote-endpoint = <&panel_output>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml b/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml index f29789994b18..c2df8d28aaf5 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml +++ b/Documentation/devicetree/bindings/display/panel/panel-mipi-dbi-spi.yaml @@ -83,6 +83,8 @@ properties: required: - compatible - reg + - width-mm + - height-mm - panel-timing unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml index 0cebaaefda03..157b1a7b18f9 100644 --- a/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml +++ b/Documentation/devicetree/bindings/display/sitronix,st7735r.yaml @@ -32,15 +32,13 @@ properties: - okaya,rh128128t - const: sitronix,st7715r - spi-max-frequency: - maximum: 32000000 - dc-gpios: maxItems: 1 description: Display data/command selection (D/CX) backlight: true reg: true + spi-max-frequency: true reset-gpios: true rotation: true @@ -48,7 +46,6 @@ required: - compatible - reg - dc-gpios - - reset-gpios additionalProperties: false @@ -72,6 +69,7 @@ examples: dc-gpios = <&gpio 43 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio 80 GPIO_ACTIVE_HIGH>; rotation = <270>; + backlight = <&backlight>; }; }; diff --git a/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml b/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml index 9baafd0c42dd..3fbd87c2c120 100644 --- a/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml +++ b/Documentation/devicetree/bindings/display/solomon,ssd1307fb.yaml @@ -12,11 +12,22 @@ maintainers: properties: compatible: - enum: - - solomon,ssd1305fb-i2c - - solomon,ssd1306fb-i2c - - solomon,ssd1307fb-i2c - - solomon,ssd1309fb-i2c + oneOf: + # Deprecated compatible strings + - items: + - enum: + - solomon,ssd1305fb-i2c + - solomon,ssd1306fb-i2c + - solomon,ssd1307fb-i2c + - solomon,ssd1309fb-i2c + deprecated: true + - items: + - enum: + - sinowealth,sh1106 + - solomon,ssd1305 + - solomon,ssd1306 + - solomon,ssd1307 + - solomon,ssd1309 reg: maxItems: 1 @@ -27,9 +38,20 @@ properties: reset-gpios: maxItems: 1 + # Only required for SPI + dc-gpios: + description: + GPIO connected to the controller's D/C# (Data/Command) pin, + that is needed for 4-wire SPI to tell the controller if the + data sent is for a command register or the display data RAM + maxItems: 1 + vbat-supply: description: The supply for VBAT + # Only required for SPI + spi-max-frequency: true + solomon,height: $ref: /schemas/types.yaml#/definitions/uint32 default: 16 @@ -135,7 +157,21 @@ allOf: properties: compatible: contains: - const: solomon,ssd1305fb-i2c + const: sinowealth,sh1106 + then: + properties: + solomon,dclk-div: + default: 1 + solomon,dclk-frq: + default: 5 + + - if: + properties: + compatible: + contains: + enum: + - solomon,ssd1305-i2c + - solomon,ssd1305 then: properties: solomon,dclk-div: @@ -147,7 +183,9 @@ allOf: properties: compatible: contains: - const: solomon,ssd1306fb-i2c + enum: + - solomon,ssd1306-i2c + - solomon,ssd1306 then: properties: solomon,dclk-div: @@ -159,7 +197,9 @@ allOf: properties: compatible: contains: - const: solomon,ssd1307fb-i2c + enum: + - solomon,ssd1307-i2c + - solomon,ssd1307 then: properties: solomon,dclk-div: @@ -173,7 +213,9 @@ allOf: properties: compatible: contains: - const: solomon,ssd1309fb-i2c + enum: + - solomon,ssd1309-i2c + - solomon,ssd1309 then: properties: solomon,dclk-div: @@ -189,15 +231,15 @@ examples: #address-cells = <1>; #size-cells = <0>; - ssd1307: oled@3c { - compatible = "solomon,ssd1307fb-i2c"; + ssd1307_i2c: oled@3c { + compatible = "solomon,ssd1307"; reg = <0x3c>; pwms = <&pwm 4 3000>; reset-gpios = <&gpio2 7>; }; - ssd1306: oled@3d { - compatible = "solomon,ssd1306fb-i2c"; + ssd1306_i2c: oled@3d { + compatible = "solomon,ssd1306"; reg = <0x3c>; pwms = <&pwm 4 3000>; reset-gpios = <&gpio2 7>; @@ -207,3 +249,30 @@ examples: solomon,lookup-table = /bits/ 8 <0x3f 0x3f 0x3f 0x3f>; }; }; + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + ssd1307_spi: oled@0 { + compatible = "solomon,ssd1307"; + reg = <0x0>; + pwms = <&pwm 4 3000>; + reset-gpios = <&gpio2 7>; + dc-gpios = <&gpio2 8>; + spi-max-frequency = <10000000>; + }; + + ssd1306_spi: oled@1 { + compatible = "solomon,ssd1306"; + reg = <0x1>; + pwms = <&pwm 4 3000>; + reset-gpios = <&gpio2 7>; + dc-gpios = <&gpio2 8>; + spi-max-frequency = <10000000>; + solomon,com-lrremap; + solomon,com-invdir; + solomon,com-offset = <32>; + solomon,lookup-table = /bits/ 8 <0x3f 0x3f 0x3f 0x3f>; + }; + }; diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 817794e56227..4f15463611f8 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -106,6 +106,12 @@ properties: phy-mode: $ref: "#/properties/phy-connection-type" + pcs-handle: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Specifies a reference to a node representing a PCS PHY device on a MDIO + bus to link with an external PHY (phy-handle) if exists. + phy-handle: $ref: /schemas/types.yaml#/definitions/phandle description: diff --git a/Documentation/devicetree/bindings/net/micrel.txt b/Documentation/devicetree/bindings/net/micrel.txt index c5ab62c39133..8d157f0295a5 100644 --- a/Documentation/devicetree/bindings/net/micrel.txt +++ b/Documentation/devicetree/bindings/net/micrel.txt @@ -45,20 +45,3 @@ Optional properties: In fiber mode, auto-negotiation is disabled and the PHY can only work in 100base-fx (full and half duplex) modes. - - - lan8814,ignore-ts: If present the PHY will not support timestamping. - - This option acts as check whether Timestamping is supported by - hardware or not. LAN8814 phy support hardware tmestamping. - - - lan8814,latency_rx_10: Configures Latency value of phy in ingress at 10 Mbps. - - - lan8814,latency_tx_10: Configures Latency value of phy in egress at 10 Mbps. - - - lan8814,latency_rx_100: Configures Latency value of phy in ingress at 100 Mbps. - - - lan8814,latency_tx_100: Configures Latency value of phy in egress at 100 Mbps. - - - lan8814,latency_rx_1000: Configures Latency value of phy in ingress at 1000 Mbps. - - - lan8814,latency_tx_1000: Configures Latency value of phy in egress at 1000 Mbps. diff --git a/Documentation/devicetree/bindings/net/xilinx_axienet.txt b/Documentation/devicetree/bindings/net/xilinx_axienet.txt index b8e4894bc634..1aa4c6006cd0 100644 --- a/Documentation/devicetree/bindings/net/xilinx_axienet.txt +++ b/Documentation/devicetree/bindings/net/xilinx_axienet.txt @@ -26,7 +26,8 @@ Required properties: specified, the TX/RX DMA interrupts should be on that node instead, and only the Ethernet core interrupt is optionally specified here. -- phy-handle : Should point to the external phy device. +- phy-handle : Should point to the external phy device if exists. Pointing + this to the PCS/PMA PHY is deprecated and should be avoided. See ethernet.txt file in the same directory. - xlnx,rxmem : Set to allocated memory buffer for Rx/Tx in the hardware @@ -68,6 +69,11 @@ Optional properties: required through the core's MDIO interface (i.e. always, unless the PHY is accessed through a different bus). + - pcs-handle: Phandle to the internal PCS/PMA PHY in SGMII or 1000Base-X + modes, where "pcs-handle" should be used to point + to the PCS/PMA PHY, and "phy-handle" should point to an + external PHY if exists. + Example: axi_ethernet_eth: ethernet@40c00000 { compatible = "xlnx,axi-ethernet-1.00.a"; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index 01430973ecec..79b72e370ade 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -1130,6 +1130,8 @@ patternProperties: description: Sinlinx Electronics Technology Co., LTD "^sinovoip,.*": description: SinoVoip Co., Ltd + "^sinowealth,.*": + description: SINO WEALTH Electronic Ltd. "^sipeed,.*": description: Shenzhen Sipeed Technology Co., Ltd. "^sirf,.*": diff --git a/Documentation/driver-api/dma-buf.rst b/Documentation/driver-api/dma-buf.rst index 55006678394a..36a76cbe9095 100644 --- a/Documentation/driver-api/dma-buf.rst +++ b/Documentation/driver-api/dma-buf.rst @@ -185,6 +185,12 @@ DMA Fence Chain .. kernel-doc:: include/linux/dma-fence-chain.h :internal: +DMA Fence unwrap +~~~~~~~~~~~~~~~~ + +.. kernel-doc:: include/linux/dma-fence-unwrap.h + :internal: + DMA Fence uABI/Sync File ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/gpu/drm-mm.rst b/Documentation/gpu/drm-mm.rst index 198bcc1affa1..f32ccce5722d 100644 --- a/Documentation/gpu/drm-mm.rst +++ b/Documentation/gpu/drm-mm.rst @@ -466,6 +466,15 @@ DRM MM Range Allocator Function References .. kernel-doc:: drivers/gpu/drm/drm_mm.c :export: +DRM Buddy Allocator +=================== + +DRM Buddy Function References +----------------------------- + +.. kernel-doc:: drivers/gpu/drm/drm_buddy.c + :export: + DRM Cache Handling and Fast WC memcpy() ======================================= diff --git a/Documentation/gpu/drm-uapi.rst b/Documentation/gpu/drm-uapi.rst index 199afb503ab1..ce47b4292481 100644 --- a/Documentation/gpu/drm-uapi.rst +++ b/Documentation/gpu/drm-uapi.rst @@ -148,7 +148,9 @@ clients together with the legacy drmAuth authentication procedure. If a driver advertises render node support, DRM core will create a separate render node called renderD. There will be one render node per device. No ioctls except PRIME-related ioctls will be allowed on -this node. Especially GEM_OPEN will be explicitly prohibited. Render +this node. Especially GEM_OPEN will be explicitly prohibited. For a +complete list of driver-independent ioctls that can be used on render +nodes, see the ioctls marked DRM_RENDER_ALLOW in drm_ioctl.c Render nodes are designed to avoid the buffer-leaks, which occur if clients guess the flink names or mmap offsets on the legacy interface. Additionally to this basic interface, drivers must mark their diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index 89bb4fa4c362..ddc1dd039337 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -10,21 +10,21 @@ in joining the effort. Design principles ================= -The Distributed Switch Architecture is a subsystem which was primarily designed -to support Marvell Ethernet switches (MV88E6xxx, a.k.a Linkstreet product line) -using Linux, but has since evolved to support other vendors as well. +The Distributed Switch Architecture subsystem was primarily designed to +support Marvell Ethernet switches (MV88E6xxx, a.k.a. Link Street product +line) using Linux, but has since evolved to support other vendors as well. The original philosophy behind this design was to be able to use unmodified Linux tools such as bridge, iproute2, ifconfig to work transparently whether they configured/queried a switch port network device or a regular network device. -An Ethernet switch is typically comprised of multiple front-panel ports, and one -or more CPU or management port. The DSA subsystem currently relies on the +An Ethernet switch typically comprises multiple front-panel ports and one +or more CPU or management ports. The DSA subsystem currently relies on the presence of a management port connected to an Ethernet controller capable of receiving Ethernet frames from the switch. This is a very common setup for all kinds of Ethernet switches found in Small Home and Office products: routers, -gateways, or even top-of-the rack switches. This host Ethernet controller will +gateways, or even top-of-rack switches. This host Ethernet controller will be later referred to as "master" and "cpu" in DSA terminology and code. The D in DSA stands for Distributed, because the subsystem has been designed @@ -33,14 +33,14 @@ using upstream and downstream Ethernet links between switches. These specific ports are referred to as "dsa" ports in DSA terminology and code. A collection of multiple switches connected to each other is called a "switch tree". -For each front-panel port, DSA will create specialized network devices which are +For each front-panel port, DSA creates specialized network devices which are used as controlling and data-flowing endpoints for use by the Linux networking stack. These specialized network interfaces are referred to as "slave" network interfaces in DSA terminology and code. The ideal case for using DSA is when an Ethernet switch supports a "switch tag" which is a hardware feature making the switch insert a specific tag for each -Ethernet frames it received to/from specific ports to help the management +Ethernet frame it receives to/from specific ports to help the management interface figure out: - what port is this frame coming from @@ -125,7 +125,7 @@ other switches from the same fabric, and in this case, the outermost switch ports must decapsulate the packet. Note that in certain cases, it might be the case that the tagging format used -by a leaf switch (not connected directly to the CPU) to not be the same as what +by a leaf switch (not connected directly to the CPU) is not the same as what the network stack sees. This can be seen with Marvell switch trees, where the CPU port can be configured to use either the DSA or the Ethertype DSA (EDSA) format, but the DSA links are configured to use the shorter (without Ethertype) @@ -270,21 +270,21 @@ These interfaces are specialized in order to: to/from specific switch ports - query the switch for ethtool operations: statistics, link state, Wake-on-LAN, register dumps... -- external/internal PHY management: link, auto-negotiation etc. +- manage external/internal PHY: link, auto-negotiation, etc. These slave network devices have custom net_device_ops and ethtool_ops function pointers which allow DSA to introduce a level of layering between the networking -stack/ethtool, and the switch driver implementation. +stack/ethtool and the switch driver implementation. Upon frame transmission from these slave network devices, DSA will look up which -switch tagging protocol is currently registered with these network devices, and +switch tagging protocol is currently registered with these network devices and invoke a specific transmit routine which takes care of adding the relevant switch tag in the Ethernet frames. These frames are then queued for transmission using the master network device -``ndo_start_xmit()`` function, since they contain the appropriate switch tag, the +``ndo_start_xmit()`` function. Since they contain the appropriate switch tag, the Ethernet switch will be able to process these incoming frames from the -management interface and delivers these frames to the physical switch port. +management interface and deliver them to the physical switch port. Graphical representation ------------------------ @@ -330,9 +330,9 @@ MDIO reads/writes towards specific PHY addresses. In most MDIO-connected switches, these functions would utilize direct or indirect PHY addressing mode to return standard MII registers from the switch builtin PHYs, allowing the PHY library and/or to return link status, link partner pages, auto-negotiation -results etc.. +results, etc. -For Ethernet switches which have both external and internal MDIO busses, the +For Ethernet switches which have both external and internal MDIO buses, the slave MII bus can be utilized to mux/demux MDIO reads and writes towards either internal or external MDIO devices this switch might be connected to: internal PHYs, external PHYs, or even external switches. @@ -349,7 +349,7 @@ DSA data structures are defined in ``include/net/dsa.h`` as well as table indication (when cascading switches) - ``dsa_platform_data``: platform device configuration data which can reference - a collection of dsa_chip_data structure if multiples switches are cascaded, + a collection of dsa_chip_data structures if multiple switches are cascaded, the master network device this switch tree is attached to needs to be referenced @@ -426,7 +426,7 @@ logic basically looks like this: "phy-handle" property, if found, this PHY device is created and registered using ``of_phy_connect()`` -- if Device Tree is used, and the PHY device is "fixed", that is, conforms to +- if Device Tree is used and the PHY device is "fixed", that is, conforms to the definition of a non-MDIO managed PHY as defined in ``Documentation/devicetree/bindings/net/fixed-link.txt``, the PHY is registered and connected transparently using the special fixed MDIO bus driver @@ -481,7 +481,7 @@ Device Tree DSA features a standardized binding which is documented in ``Documentation/devicetree/bindings/net/dsa/dsa.txt``. PHY/MDIO library helper functions such as ``of_get_phy_mode()``, ``of_phy_connect()`` are also used to query -per-port PHY specific details: interface connection, MDIO bus location etc.. +per-port PHY specific details: interface connection, MDIO bus location, etc. Driver development ================== @@ -509,7 +509,7 @@ Switch configuration - ``setup``: setup function for the switch, this function is responsible for setting up the ``dsa_switch_ops`` private structure with all it needs: register maps, - interrupts, mutexes, locks etc.. This function is also expected to properly + interrupts, mutexes, locks, etc. This function is also expected to properly configure the switch to separate all network interfaces from each other, that is, they should be isolated by the switch hardware itself, typically by creating a Port-based VLAN ID for each port and allowing only the CPU port and the @@ -526,13 +526,13 @@ PHY devices and link management - ``get_phy_flags``: Some switches are interfaced to various kinds of Ethernet PHYs, if the PHY library PHY driver needs to know about information it cannot obtain on its own (e.g.: coming from switch memory mapped registers), this function - should return a 32-bits bitmask of "flags", that is private between the switch + should return a 32-bit bitmask of "flags" that is private between the switch driver and the Ethernet PHY driver in ``drivers/net/phy/\*``. - ``phy_read``: Function invoked by the DSA slave MDIO bus when attempting to read the switch port MDIO registers. If unavailable, return 0xffff for each read. For builtin switch Ethernet PHYs, this function should allow reading the link - status, auto-negotiation results, link partner pages etc.. + status, auto-negotiation results, link partner pages, etc. - ``phy_write``: Function invoked by the DSA slave MDIO bus when attempting to write to the switch port MDIO registers. If unavailable return a negative error @@ -554,7 +554,7 @@ Ethtool operations ------------------ - ``get_strings``: ethtool function used to query the driver's strings, will - typically return statistics strings, private flags strings etc. + typically return statistics strings, private flags strings, etc. - ``get_ethtool_stats``: ethtool function used to query per-port statistics and return their values. DSA overlays slave network devices general statistics: @@ -564,7 +564,7 @@ Ethtool operations - ``get_sset_count``: ethtool function used to query the number of statistics items - ``get_wol``: ethtool function used to obtain Wake-on-LAN settings per-port, this - function may, for certain implementations also query the master network device + function may for certain implementations also query the master network device Wake-on-LAN settings if this interface needs to participate in Wake-on-LAN - ``set_wol``: ethtool function used to configure Wake-on-LAN settings per-port, @@ -607,14 +607,14 @@ Power management in a fully active state - ``port_enable``: function invoked by the DSA slave network device ndo_open - function when a port is administratively brought up, this function should be - fully enabling a given switch port. DSA takes care of marking the port with + function when a port is administratively brought up, this function should + fully enable a given switch port. DSA takes care of marking the port with ``BR_STATE_BLOCKING`` if the port is a bridge member, or ``BR_STATE_FORWARDING`` if it was not, and propagating these changes down to the hardware - ``port_disable``: function invoked by the DSA slave network device ndo_close - function when a port is administratively brought down, this function should be - fully disabling a given switch port. DSA takes care of marking the port with + function when a port is administratively brought down, this function should + fully disable a given switch port. DSA takes care of marking the port with ``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is disabled while being a bridge member @@ -622,12 +622,12 @@ Bridge layer ------------ - ``port_bridge_join``: bridge layer function invoked when a given switch port is - added to a bridge, this function should be doing the necessary at the switch - level to permit the joining port from being added to the relevant logical + added to a bridge, this function should do what's necessary at the switch + level to permit the joining port to be added to the relevant logical domain for it to ingress/egress traffic with other members of the bridge. - ``port_bridge_leave``: bridge layer function invoked when a given switch port is - removed from a bridge, this function should be doing the necessary at the + removed from a bridge, this function should do what's necessary at the switch level to deny the leaving port from ingress/egress traffic from the remaining bridge members. When the port leaves the bridge, it should be aged out at the switch hardware for the switch to (re) learn MAC addresses behind @@ -663,7 +663,7 @@ Bridge layer point for drivers that need to configure the hardware for enabling this feature. -- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoken when a driver +- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver leaves a bridge port which had the TX forwarding offload feature enabled. Bridge VLAN filtering diff --git a/MAINTAINERS b/MAINTAINERS index fd768d43e048..9b2b0dc44506 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4791,6 +4791,7 @@ F: .clang-format CLANG/LLVM BUILD SUPPORT M: Nathan Chancellor M: Nick Desaulniers +R: Tom Rix L: llvm@lists.linux.dev S: Supported W: https://clangbuiltlinux.github.io/ @@ -5715,7 +5716,7 @@ W: http://lanana.org/docs/device-list/index.html DEVICE RESOURCE MANAGEMENT HELPERS M: Hans de Goede -R: Matti Vaittinen +R: Matti Vaittinen S: Maintained F: include/linux/devm-helpers.h @@ -6307,6 +6308,11 @@ S: Maintained F: Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml F: drivers/gpu/drm/panel/panel-olimex-lcd-olinuxino.c +DRM DRIVER FOR PARADE PS8640 BRIDGE CHIP +R: Douglas Anderson +F: Documentation/devicetree/bindings/display/bridge/ps8640.yaml +F: drivers/gpu/drm/bridge/parade-ps8640.c + DRM DRIVER FOR PERVASIVE DISPLAYS REPAPER PANELS M: Noralf Trønnes S: Maintained @@ -6420,6 +6426,11 @@ DRM DRIVER FOR TDFX VIDEO CARDS S: Orphan / Obsolete F: drivers/gpu/drm/tdfx/ +DRM DRIVER FOR TI SN65DSI86 BRIDGE CHIP +R: Douglas Anderson +F: Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml +F: drivers/gpu/drm/bridge/ti-sn65dsi86.c + DRM DRIVER FOR TPO TPG110 PANELS M: Linus Walleij S: Maintained @@ -6539,6 +6550,7 @@ R: Jonas Karlman R: Jernej Skrabec S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc +F: Documentation/devicetree/bindings/display/bridge/ F: drivers/gpu/drm/bridge/ DRM DRIVERS FOR EXYNOS @@ -8675,7 +8687,6 @@ F: include/linux/cciss*.h F: include/uapi/linux/cciss*.h HFI1 DRIVER -M: Mike Marciniszyn M: Dennis Dalessandro L: linux-rdma@vger.kernel.org S: Supported @@ -9598,6 +9609,7 @@ F: drivers/iio/pressure/dps310.c INFINIBAND SUBSYSTEM M: Jason Gunthorpe +M: Leon Romanovsky L: linux-rdma@vger.kernel.org S: Supported W: https://github.com/linux-rdma/rdma-core @@ -11208,7 +11220,7 @@ F: scripts/spdxcheck.py LINEAR RANGES HELPERS M: Mark Brown -R: Matti Vaittinen +R: Matti Vaittinen F: lib/linear_ranges.c F: lib/test_linear_ranges.c F: include/linux/linear_range.h @@ -14656,7 +14668,6 @@ F: drivers/rtc/rtc-optee.c OPA-VNIC DRIVER M: Dennis Dalessandro -M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/ulp/opa_vnic @@ -16098,7 +16109,6 @@ F: include/uapi/linux/qemu_fw_cfg.h QIB DRIVER M: Dennis Dalessandro -M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/qib/ @@ -16616,7 +16626,6 @@ F: drivers/net/ethernet/rdc/r6040.c RDMAVT - RDMA verbs software M: Dennis Dalessandro -M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/sw/rdmavt @@ -17011,8 +17020,7 @@ S: Odd Fixes F: drivers/tty/serial/rp2.* ROHM BD99954 CHARGER IC -R: Matti Vaittinen -L: linux-power@fi.rohmeurope.com +R: Matti Vaittinen S: Supported F: drivers/power/supply/bd99954-charger.c F: drivers/power/supply/bd99954-charger.h @@ -17035,8 +17043,7 @@ F: drivers/regulator/bd9571mwv-regulator.c F: include/linux/mfd/bd9571mwv.h ROHM POWER MANAGEMENT IC DEVICE DRIVERS -R: Matti Vaittinen -L: linux-power@fi.rohmeurope.com +R: Matti Vaittinen S: Supported F: drivers/clk/clk-bd718x7.c F: drivers/gpio/gpio-bd71815.c @@ -21118,7 +21125,7 @@ F: include/linux/regulator/ K: regulator_get_optional VOLTAGE AND CURRENT REGULATOR IRQ HELPERS -R: Matti Vaittinen +R: Matti Vaittinen F: drivers/regulator/irq_helpers.c VRF diff --git a/Makefile b/Makefile index 8c7de9a72ea2..29e273d3f8cc 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Superb Owl # *DOCUMENTATION* diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 232b439cbaf3..ff8f4511df71 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_PART_CORTEX_A77 0xD0D #define ARM_CPU_PART_NEOVERSE_V1 0xD40 #define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_A78AE 0xD42 #define ARM_CPU_PART_CORTEX_X1 0xD44 #define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 @@ -130,6 +131,7 @@ #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) #define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE) #define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) #define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 7f3c87f7a0ce..c31be7eda9df 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -107,7 +107,7 @@ isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, .Lskip_gicv3_\@ // and check that it sticks - msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICH_HCR_EL2 to defaults .Lskip_gicv3_\@: .endm diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 3fb79b76e9d9..7bbf5104b7b7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -42,7 +42,7 @@ bool alternative_is_applied(u16 cpufeature) /* * Check if the target PC is within an alternative block. */ -static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) +static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) { unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt); return !(pc >= replptr && pc <= (replptr + alt->alt_len)); @@ -50,7 +50,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) +static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -95,7 +95,7 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp return insn; } -static void patch_alternative(struct alt_instr *alt, +static noinstr void patch_alternative(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { __le32 *replptr; diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 3ed39c61a510..2b3f3d0544b9 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,16 +8,9 @@ #include #include -#ifndef VMA_ITERATOR -#define VMA_ITERATOR(name, mm, addr) \ - struct mm_struct *name = mm -#define for_each_vma(vmi, vma) \ - for (vma = vmi->mmap; vma; vma = vma->vm_next) -#endif - -#define for_each_mte_vma(vmi, vma) \ +#define for_each_mte_vma(tsk, vma) \ if (system_supports_mte()) \ - for_each_vma(vmi, vma) \ + for (vma = tsk->mm->mmap; vma; vma = vma->vm_next) \ if (vma->vm_flags & VM_MTE) static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) @@ -32,10 +25,11 @@ static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) static int mte_dump_tag_range(struct coredump_params *cprm, unsigned long start, unsigned long end) { + int ret = 1; unsigned long addr; + void *tags = NULL; for (addr = start; addr < end; addr += PAGE_SIZE) { - char tags[MTE_PAGE_TAG_STORAGE]; struct page *page = get_dump_page(addr); /* @@ -59,22 +53,36 @@ static int mte_dump_tag_range(struct coredump_params *cprm, continue; } + if (!tags) { + tags = mte_allocate_tag_storage(); + if (!tags) { + put_page(page); + ret = 0; + break; + } + } + mte_save_page_tags(page_address(page), tags); put_page(page); - if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) - return 0; + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) { + mte_free_tag_storage(tags); + ret = 0; + break; + } } - return 1; + if (tags) + mte_free_tag_storage(tags); + + return ret; } Elf_Half elf_core_extra_phdrs(void) { struct vm_area_struct *vma; int vma_count = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(current, vma) vma_count++; return vma_count; @@ -83,9 +91,8 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(current, vma) { struct elf_phdr phdr; phdr.p_type = PT_ARM_MEMTAG_MTE; @@ -109,9 +116,8 @@ size_t elf_core_extra_data_size(void) { struct vm_area_struct *vma; size_t data_size = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(current, vma) data_size += mte_vma_tag_dump_size(vma); return data_size; @@ -120,9 +126,8 @@ size_t elf_core_extra_data_size(void) int elf_core_write_extra_data(struct coredump_params *cprm) { struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(current, vma) { if (vma->vm_flags & VM_DONTDUMP) continue; diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 712e97c03e54..cd868084e724 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -701,7 +701,7 @@ NOKPROBE_SYMBOL(breakpoint_handler); * addresses. There is no straight-forward way, short of disassembling the * offending instruction, to map that address back to the watchpoint. This * function computes the distance of the memory access from the watchpoint as a - * heuristic for the likelyhood that a given access triggered the watchpoint. + * heuristic for the likelihood that a given access triggered the watchpoint. * * See Section D2.10.5 "Determining the memory location that caused a Watchpoint * exception" of ARMv8 Architecture Reference Manual for details. diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index e53493d8b208..a3d0494f25a9 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -220,7 +220,7 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num, * increasing the section's alignment so that the * resulting address of this instruction is guaranteed * to equal the offset in that particular bit (as well - * as all less signficant bits). This ensures that the + * as all less significant bits). This ensures that the * address modulo 4 KB != 0xfff8 or 0xfffc (which would * have all ones in bits [11:3]) */ diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 771f543464e0..33e0fabc0b79 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -117,8 +117,8 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg) int i, ret = 0; struct aarch64_insn_patch *pp = arg; - /* The first CPU becomes master */ - if (atomic_inc_return(&pp->cpu_count) == 1) { + /* The last CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == num_online_cpus()) { for (i = 0; ret == 0 && i < pp->insn_cnt; i++) ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], pp->new_insns[i]); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 5777929d35bf..40be3a7c2c53 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -853,6 +853,7 @@ u8 spectre_bhb_loop_affected(int scope) if (scope == SCOPE_LOCAL_CPU) { static const struct midr_range spectre_bhb_k32_list[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 27df5c1e6baa..3b46041f2b97 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -234,6 +234,7 @@ asmlinkage notrace void secondary_start_kernel(void) * Log the CPU info before it is marked online and might get read. */ cpuinfo_store_cpu(); + store_cpu_topology(cpu); /* * Enable GIC and timers. @@ -242,7 +243,6 @@ asmlinkage notrace void secondary_start_kernel(void) ipi_setup(cpu); - store_cpu_topology(cpu); numa_add_cpu(cpu); /* diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 19ee7c33769d..2b0887e58a7c 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -140,7 +140,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) /* * Restore pstate flags. OS lock and mdscr have been already * restored, so from this point onwards, debugging is fully - * renabled if it was enabled when core started shutdown. + * reenabled if it was enabled when core started shutdown. */ local_daif_restore(flags); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8ac25f19084e..1e7b1550e2fc 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -73,7 +73,7 @@ EXPORT_SYMBOL(memstart_addr); * In this scheme a comparatively quicker boot is observed. * * If ZONE_DMA configs are defined, crash kernel memory reservation - * is delayed until DMA zone memory range size initilazation performed in + * is delayed until DMA zone memory range size initialization performed in * zone_sizes_init(). The defer is necessary to steer clear of DMA zone * memory range to avoid overlap allocation. So crash kernel memory boundaries * are not known when mapping all bank memory ranges, which otherwise means @@ -81,7 +81,7 @@ EXPORT_SYMBOL(memstart_addr); * so page-granularity mappings are created for the entire memory range. * Hence a slightly slower boot is observed. * - * Note: Page-granularity mapppings are necessary for crash kernel memory + * Note: Page-granularity mappings are necessary for crash kernel memory * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ #if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 827038a33064..4def2bd17b9b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -16,18 +16,6 @@ #include #include -#ifdef CONFIG_PPC_PSERIES -static inline bool kvmhv_on_pseries(void) -{ - return !cpu_has_feature(CPU_FTR_HVMODE); -} -#else -static inline bool kvmhv_on_pseries(void) -{ - return false; -} -#endif - /* * Structure for a nested guest, that is, for a guest that is managed by * one of our guests. diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c583d0c37f31..838d4cb460b7 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -586,6 +586,18 @@ static inline bool kvm_hv_mode_active(void) { return false; } #endif +#ifdef CONFIG_PPC_PSERIES +static inline bool kvmhv_on_pseries(void) +{ + return !cpu_has_feature(CPU_FTR_HVMODE); +} +#else +static inline bool kvmhv_on_pseries(void) +{ + return false; +} +#endif + #ifdef CONFIG_KVM_XICS static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 254687258f42..f2c5c26869f1 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -132,7 +132,11 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn(_addr)); \ +}) /* * On Book-E parts we need __va to parse the device tree and we can't diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 049ca26893e6..8fa37ef5da4d 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -28,11 +28,13 @@ void setup_panic(void); #define ARCH_PANIC_TIMEOUT 180 #ifdef CONFIG_PPC_PSERIES +extern bool pseries_reloc_on_exception(void); extern bool pseries_enable_reloc_on_exc(void); extern void pseries_disable_reloc_on_exc(void); extern void pseries_big_endian_exceptions(void); void __init pseries_little_endian_exceptions(void); #else +static inline bool pseries_reloc_on_exception(void) { return false; } static inline bool pseries_enable_reloc_on_exc(void) { return false; } static inline void pseries_disable_reloc_on_exc(void) {} static inline void pseries_big_endian_exceptions(void) {} diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h index 0a0bc79bd1fa..de1018cc522b 100644 --- a/arch/powerpc/include/asm/static_call.h +++ b/arch/powerpc/include/asm/static_call.h @@ -24,5 +24,6 @@ #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func) #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20") #endif /* _ASM_POWERPC_STATIC_CALL_H */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 55caeee37c08..b66dd6f775a4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -809,6 +809,10 @@ __start_interrupts: * - MSR_EE|MSR_RI is clear (no reentrant exceptions) * - Standard kernel environment is set up (stack, paca, etc) * + * KVM: + * These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM + * ensures that FSCR[SCV] is disabled whenever it has to force AIL off. + * * Call convention: * * syscall register convention is in Documentation/powerpc/syscall64-abi.rst diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index e547066a06aa..a96f05063bc9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -196,6 +196,34 @@ static void __init configure_exceptions(void) /* Under a PAPR hypervisor, we need hypercalls */ if (firmware_has_feature(FW_FEATURE_SET_MODE)) { + /* + * - PR KVM does not support AIL mode interrupts in the host + * while a PR guest is running. + * + * - SCV system call interrupt vectors are only implemented for + * AIL mode interrupts. + * + * - On pseries, AIL mode can only be enabled and disabled + * system-wide so when a PR VM is created on a pseries host, + * all CPUs of the host are set to AIL=0 mode. + * + * - Therefore host CPUs must not execute scv while a PR VM + * exists. + * + * - SCV support can not be disabled dynamically because the + * feature is advertised to host userspace. Disabling the + * facility and emulating it would be possible but is not + * implemented. + * + * - So SCV support is blanket disabled if PR KVM could possibly + * run. That is, PR support compiled in, booting on pseries + * with hash MMU. + */ + if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) { + init_task.thread.fscr &= ~FSCR_SCV; + cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV; + } + /* Enable AIL if possible */ if (!pseries_enable_reloc_on_exc()) { init_task.thread.fscr &= ~FSCR_SCV; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 18e58085447c..ddd88179110a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -112,12 +112,21 @@ config KVM_BOOK3S_64_PR guest in user mode (problem state) and emulating all privileged instructions and registers. + This is only available for hash MMU mode and only supports + guests that use hash MMU mode. + This is not as fast as using hypervisor mode, but works on machines where hypervisor mode is not available or not usable, and can emulate processors that are different from the host processor, including emulating 32-bit processors on a 64-bit host. + Selecting this option will cause the SCV facility to be + disabled when the kernel is booted on the pseries platform in + hash MMU mode (regardless of PR VMs running). When any PR VMs + are running, "AIL" mode is disabled which may slow interrupts + and system calls on the host. + config KVM_BOOK3S_HV_EXIT_TIMING bool "Detailed timing for hypervisor real-mode code" depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S index 05e003eb5d90..e42d1c609e47 100644 --- a/arch/powerpc/kvm/book3s_64_entry.S +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -414,10 +414,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) */ ld r10,HSTATE_SCRATCH0(r13) cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK - beq machine_check_common + beq .Lcall_machine_check_common cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET - beq system_reset_common + beq .Lcall_system_reset_common b . + +.Lcall_machine_check_common: + b machine_check_common + +.Lcall_system_reset_common: + b system_reset_common #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index c886557638a1..6fa518f6501d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -225,6 +225,13 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) int cpu; struct rcuwait *waitp; + /* + * rcuwait_wake_up contains smp_mb() which orders prior stores that + * create pending work vs below loads of cpu fields. The other side + * is the barrier in vcpu run that orders setting the cpu fields vs + * testing for pending work. + */ + waitp = kvm_arch_vcpu_get_wait(vcpu); if (rcuwait_wake_up(waitp)) ++vcpu->stat.generic.halt_wakeup; @@ -1089,7 +1096,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) break; } tvcpu->arch.prodded = 1; - smp_mb(); + smp_mb(); /* This orders prodded store vs ceded load */ if (tvcpu->arch.ceded) kvmppc_fast_vcpu_kick_hv(tvcpu); break; @@ -3766,6 +3773,14 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) pvc = core_info.vc[sub]; pvc->pcpu = pcpu + thr; for_each_runnable_thread(i, vcpu, pvc) { + /* + * XXX: is kvmppc_start_thread called too late here? + * It updates vcpu->cpu and vcpu->arch.thread_cpu + * which are used by kvmppc_fast_vcpu_kick_hv(), but + * kick is called after new exceptions become available + * and exceptions are checked earlier than here, by + * kvmppc_core_prepare_to_enter. + */ kvmppc_start_thread(vcpu, pvc); kvmppc_create_dtl_entry(vcpu, pvc); trace_kvm_guest_enter(vcpu); @@ -4487,6 +4502,21 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, if (need_resched() || !kvm->arch.mmu_ready) goto out; + vcpu->cpu = pcpu; + vcpu->arch.thread_cpu = pcpu; + vc->pcpu = pcpu; + local_paca->kvm_hstate.kvm_vcpu = vcpu; + local_paca->kvm_hstate.ptid = 0; + local_paca->kvm_hstate.fake_suspend = 0; + + /* + * Orders set cpu/thread_cpu vs testing for pending interrupts and + * doorbells below. The other side is when these fields are set vs + * kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to + * kick a vCPU to notice the pending interrupt. + */ + smp_mb(); + if (!nested) { kvmppc_core_prepare_to_enter(vcpu); if (test_bit(BOOK3S_IRQPRIO_EXTERNAL, @@ -4506,13 +4536,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, tb = mftb(); - vcpu->cpu = pcpu; - vcpu->arch.thread_cpu = pcpu; - vc->pcpu = pcpu; - local_paca->kvm_hstate.kvm_vcpu = vcpu; - local_paca->kvm_hstate.ptid = 0; - local_paca->kvm_hstate.fake_suspend = 0; - __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0); trace_kvm_guest_enter(vcpu); @@ -4614,6 +4637,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, run->exit_reason = KVM_EXIT_INTR; vcpu->arch.ret = -EINTR; out: + vcpu->cpu = -1; + vcpu->arch.thread_cpu = -1; powerpc_local_irq_pmu_restore(flags); preempt_enable(); goto done; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 34a801c3604a..7bf9e6ca5c2d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -137,12 +137,15 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu) svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max; svcpu->in_use = 0; svcpu_put(svcpu); -#endif /* Disable AIL if supported */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) - mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL); + if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV)) + mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV); + } +#endif vcpu->cpu = smp_processor_id(); #ifdef CONFIG_PPC_BOOK3S_32 @@ -165,6 +168,14 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb)); to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max; svcpu_put(svcpu); + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (cpu_has_feature(CPU_FTR_ARCH_207S)) + mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); + if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV)) + mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV); + } #endif if (kvmppc_is_split_real(vcpu)) @@ -174,11 +185,6 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu) kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); kvmppc_save_tm_pr(vcpu); - /* Enable AIL if supported */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) - mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3); - vcpu->cpu = -1; } @@ -1037,6 +1043,8 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac) void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr) { + if (fscr & FSCR_SCV) + fscr &= ~FSCR_SCV; /* SCV must not be enabled */ if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) { /* TAR got dropped, drop it in shadow too */ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG); diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index 1f10e7dfcdd0..dc4f51ac84bc 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -281,6 +281,22 @@ static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_h_pr_set_mode(struct kvm_vcpu *vcpu) +{ + unsigned long mflags = kvmppc_get_gpr(vcpu, 4); + unsigned long resource = kvmppc_get_gpr(vcpu, 5); + + if (resource == H_SET_MODE_RESOURCE_ADDR_TRANS_MODE) { + /* KVM PR does not provide AIL!=0 to guests */ + if (mflags == 0) + kvmppc_set_gpr(vcpu, 3, H_SUCCESS); + else + kvmppc_set_gpr(vcpu, 3, H_UNSUPPORTED_FLAG_START - 63); + return EMULATE_DONE; + } + return EMULATE_FAIL; +} + #ifdef CONFIG_SPAPR_TCE_IOMMU static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu) { @@ -384,6 +400,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) return kvmppc_h_pr_logical_ci_load(vcpu); case H_LOGICAL_CI_STORE: return kvmppc_h_pr_logical_ci_store(vcpu); + case H_SET_MODE: + return kvmppc_h_pr_set_mode(vcpu); case H_XIRR: case H_CPPR: case H_EOI: @@ -421,6 +439,7 @@ int kvmppc_hcall_impl_pr(unsigned long cmd) case H_CEDE: case H_LOGICAL_CI_LOAD: case H_LOGICAL_CI_STORE: + case H_SET_MODE: #ifdef CONFIG_KVM_XICS case H_XIRR: case H_CPPR: @@ -447,6 +466,7 @@ static unsigned int default_hcall_list[] = { H_BULK_REMOVE, H_PUT_TCE, H_CEDE, + H_SET_MODE, #ifdef CONFIG_KVM_XICS H_XIRR, H_CPPR, diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9772b176e406..875c30c12db0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -705,6 +705,23 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; #endif + case KVM_CAP_PPC_AIL_MODE_3: + r = 0; + /* + * KVM PR, POWER7, and some POWER9s don't support AIL=3 mode. + * The POWER9s can support it if the guest runs in hash mode, + * but QEMU doesn't necessarily query the capability in time. + */ + if (hv_enabled) { + if (kvmhv_on_pseries()) { + if (pseries_reloc_on_exception()) + r = 1; + } else if (cpu_has_feature(CPU_FTR_ARCH_207S) && + !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) { + r = 1; + } + } + break; default: r = 0; break; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8e301cd8925b..4d221d033804 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -255,7 +255,7 @@ void __init mem_init(void) #endif high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - set_max_mapnr(max_low_pfn); + set_max_mapnr(max_pfn); kasan_late_init(); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b9b7fefbb64b..13022d734951 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1436,7 +1436,7 @@ int find_and_online_cpu_nid(int cpu) if (new_nid < 0 || !node_possible(new_nid)) new_nid = first_online_node; - if (NODE_DATA(new_nid) == NULL) { + if (!node_online(new_nid)) { #ifdef CONFIG_MEMORY_HOTPLUG /* * Need to ensure that NODE_DATA is initialized for a node from diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 069d7b3bb142..955ff8aa1644 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -353,6 +353,14 @@ static void pseries_lpar_idle(void) pseries_idle_epilog(); } +static bool pseries_reloc_on_exception_enabled; + +bool pseries_reloc_on_exception(void) +{ + return pseries_reloc_on_exception_enabled; +} +EXPORT_SYMBOL_GPL(pseries_reloc_on_exception); + /* * Enable relocation on during exceptions. This has partition wide scope and * may take a while to complete, if it takes longer than one second we will @@ -377,6 +385,7 @@ bool pseries_enable_reloc_on_exc(void) " on exceptions: %ld\n", rc); return false; } + pseries_reloc_on_exception_enabled = true; return true; } @@ -404,7 +413,9 @@ void pseries_disable_reloc_on_exc(void) break; mdelay(get_longbusy_msecs(rc)); } - if (rc != H_SUCCESS) + if (rc == H_SUCCESS) + pseries_reloc_on_exception_enabled = false; + else pr_warn("Warning: Failed to disable relocation on exceptions: %ld\n", rc); } diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c b/arch/powerpc/platforms/pseries/vas-sysfs.c index 4a7fcde5afc0..909535ca513a 100644 --- a/arch/powerpc/platforms/pseries/vas-sysfs.c +++ b/arch/powerpc/platforms/pseries/vas-sysfs.c @@ -99,6 +99,7 @@ static struct attribute *vas_def_capab_attrs[] = { &nr_used_credits_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(vas_def_capab); static struct attribute *vas_qos_capab_attrs[] = { &nr_total_credits_attribute.attr, @@ -106,6 +107,7 @@ static struct attribute *vas_qos_capab_attrs[] = { &update_total_credits_attribute.attr, NULL, }; +ATTRIBUTE_GROUPS(vas_qos_capab); static ssize_t vas_type_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -154,13 +156,13 @@ static const struct sysfs_ops vas_sysfs_ops = { static struct kobj_type vas_def_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, - .default_attrs = vas_def_capab_attrs, + .default_groups = vas_def_capab_groups, }; static struct kobj_type vas_qos_attr_type = { .release = vas_type_release, .sysfs_ops = &vas_sysfs_ops, - .default_attrs = vas_qos_capab_attrs, + .default_groups = vas_qos_capab_groups, }; static char *vas_caps_kobj_name(struct vas_caps_entry *centry, diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b0142e01002e..374d3e85c1f1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -940,6 +940,12 @@ config GART_IOMMU If unsure, say Y. +config BOOT_VESA_SUPPORT + bool + help + If true, at least one selected framebuffer driver can take advantage + of VESA video modes set at an early boot stage via the vga= parameter. + config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 7e185977a984..c2c6d35e3a43 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -83,7 +83,7 @@ static int vesa_probe(void) (vminfo.memory_layout == 4 || vminfo.memory_layout == 6) && vminfo.memory_planes == 1) { -#ifdef CONFIG_FB_BOOT_VESA_SUPPORT +#ifdef CONFIG_BOOT_VESA_SUPPORT /* Graphics mode, color, linear frame buffer supported. Only register the mode if if framebuffer is configured, however, @@ -121,7 +121,7 @@ static int vesa_set_mode(struct mode_info *mode) if ((vminfo.mode_attr & 0x15) == 0x05) { /* It's a supported text mode */ is_graphic = 0; -#ifdef CONFIG_FB_BOOT_VESA_SUPPORT +#ifdef CONFIG_BOOT_VESA_SUPPORT } else if ((vminfo.mode_attr & 0x99) == 0x99) { /* It's a graphics mode with linear frame buffer */ is_graphic = 1; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index e88791b420ee..fc7f458eb3de 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -302,7 +302,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), - INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), + INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff1f, FE), INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), EVENT_EXTRA_END @@ -5536,7 +5536,11 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con /* Disabled fixed counters which are not in CPUID */ c->idxmsk64 &= intel_ctrl; - if (c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) + /* + * Don't extend the pseudo-encoding to the + * generic counters + */ + if (!use_fixed_pseudo_encoding(c->code)) c->idxmsk64 |= (1ULL << num_counters) - 1; } c->idxmsk64 &= @@ -6212,6 +6216,7 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_RAPTORLAKE: /* * Alder Lake has 2 types of CPU, core and atom. * diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index c6262b154c3a..5d7762288a24 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -40,7 +40,7 @@ * Model specific counters: * MSR_CORE_C1_RES: CORE C1 Residency Counter * perf code: 0x00 - * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL + * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL * Scope: Core (each processor core has a MSR) * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter * perf code: 0x01 @@ -51,49 +51,50 @@ * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL + * TGL,TNT,RKL,ADL,RPL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML, - * ICL,TGL,RKL,ADL + * ICL,TGL,RKL,ADL,RPL * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL, - * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL + * KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL, + * RPL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL, * GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL, - * ADL + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, - * TGL,TNT,RKL,ADL + * TGL,TNT,RKL,ADL,RPL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL, - * KBL,CML,ICL,TGL,RKL,ADL + * KBL,CML,ICL,TGL,RKL,ADL,RPL * Scope: Package (physical package) * MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter. * perf code: 0x04 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter. * perf code: 0x05 * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL,RKL, - * ADL + * ADL,RPL * Scope: Package (physical package) * MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter. * perf code: 0x06 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, - * TNT,RKL,ADL + * TNT,RKL,ADL,RPL * Scope: Package (physical package) * */ @@ -680,6 +681,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index e497da9bf427..7695dcae280e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1828,6 +1828,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), {}, diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index f698a55bde81..4262351f52b6 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -79,6 +79,10 @@ #define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650 #define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668 #define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670 +#define PCI_DEVICE_ID_INTEL_RPL_1_IMC 0xA700 +#define PCI_DEVICE_ID_INTEL_RPL_2_IMC 0xA702 +#define PCI_DEVICE_ID_INTEL_RPL_3_IMC 0xA706 +#define PCI_DEVICE_ID_INTEL_RPL_4_IMC 0xA709 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -1406,6 +1410,22 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_1_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_2_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_RPL_4_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ } }; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 96c775abe31f..6d759f88315c 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -103,6 +103,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_ROCKETLAKE: case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_RAPTORLAKE: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index c878fed3056f..fbcfec4dc4cc 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -154,24 +154,24 @@ # define DEFINE_EXTABLE_TYPE_REG \ ".macro extable_type_reg type:req reg:req\n" \ - ".set found, 0\n" \ - ".set regnr, 0\n" \ + ".set .Lfound, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,r14,r15\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".set regnr, 0\n" \ + ".set .Lregnr, 0\n" \ ".irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d,r13d,r14d,r15d\n" \ ".ifc \\reg, %%\\rs\n" \ - ".set found, found+1\n" \ - ".long \\type + (regnr << 8)\n" \ + ".set .Lfound, .Lfound+1\n" \ + ".long \\type + (.Lregnr << 8)\n" \ ".endif\n" \ - ".set regnr, regnr+1\n" \ + ".set .Lregnr, .Lregnr+1\n" \ ".endr\n" \ - ".if (found != 1)\n" \ + ".if (.Lfound != 1)\n" \ ".error \"extable_type_reg: bad register argument\"\n" \ ".endif\n" \ ".endm\n" diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 4d20a293c6fd..aaf0cb0db4ae 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -78,9 +78,9 @@ do { \ */ #define __WARN_FLAGS(flags) \ do { \ - __auto_type f = BUGFLAG_WARNING|(flags); \ + __auto_type __flags = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE); \ + _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \ instrumentation_end(); \ } while (0) diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index b85147d75626..d71c7e8b738d 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -12,14 +12,17 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, /* Structs and defines for the X86 specific MSI message format */ typedef struct x86_msi_data { - u32 vector : 8, - delivery_mode : 3, - dest_mode_logical : 1, - reserved : 2, - active_low : 1, - is_level : 1; - - u32 dmar_subhandle; + union { + struct { + u32 vector : 8, + delivery_mode : 3, + dest_mode_logical : 1, + reserved : 2, + active_low : 1, + is_level : 1; + }; + u32 dmar_subhandle; + }; } __attribute__ ((packed)) arch_msi_msg_data_t; #define arch_msi_msg_data x86_msi_data diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index a3c33b79fb86..13c0d63ed55e 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -38,9 +38,9 @@ #define arch_raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ - asm volatile("add " __percpu_arg(1) ", %0" \ - : "=r" (tcp_ptr__) \ - : "m" (this_cpu_off), "0" (ptr)); \ + asm ("add " __percpu_arg(1) ", %0" \ + : "=r" (tcp_ptr__) \ + : "m" (this_cpu_off), "0" (ptr)); \ (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ }) #else diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 58d9e4b1fa0a..b06e4c573add 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -241,6 +241,11 @@ struct x86_pmu_capability { #define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3) #define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS) +static inline bool use_fixed_pseudo_encoding(u64 code) +{ + return !(code & 0xff); +} + /* * We model BTS tracing as another fixed-mode PMC. * diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index ed4f8bb6c2d9..2455d721503e 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -38,6 +38,8 @@ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") +#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) #define ARCH_ADD_TRAMP_KEY(name) \ asm(".pushsection .static_call_tramp_key, \"a\" \n" \ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index 531fb4cbb63f..aa72cefdd5be 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -12,10 +12,9 @@ enum insn_type { }; /* - * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax - * The REX.W cancels the effect of any data16. + * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax */ -static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 }; +static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6eb4d91d5365..d400b6d9d246 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -855,13 +855,11 @@ static void flush_tlb_func(void *info) nr_invalidate); } -static bool tlb_is_not_lazy(int cpu) +static bool tlb_is_not_lazy(int cpu, void *data) { return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu); } -static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask); - DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared); @@ -890,36 +888,11 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask, * up on the new contents of what used to be page tables, while * doing a speculative memory access. */ - if (info->freed_tables) { + if (info->freed_tables) on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true); - } else { - /* - * Although we could have used on_each_cpu_cond_mask(), - * open-coding it has performance advantages, as it eliminates - * the need for indirect calls or retpolines. In addition, it - * allows to use a designated cpumask for evaluating the - * condition, instead of allocating one. - * - * This code works under the assumption that there are no nested - * TLB flushes, an assumption that is already made in - * flush_tlb_mm_range(). - * - * cond_cpumask is logically a stack-local variable, but it is - * more efficient to have it off the stack and not to allocate - * it on demand. Preemption is disabled and this code is - * non-reentrant. - */ - struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask); - int cpu; - - cpumask_clear(cond_cpumask); - - for_each_cpu(cpu, cpumask) { - if (tlb_is_not_lazy(cpu)) - __cpumask_set_cpu(cpu, cond_cpumask); - } - on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true); - } + else + on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func, + (void *)info, 1, cpumask); } void flush_tlb_multi(const struct cpumask *cpumask, diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8fe35ed11fd6..16b6efacf7c6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -412,6 +412,7 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else #endif diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 9f2b251e83c5..3822666fb73d 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -40,7 +40,8 @@ static void msr_save_context(struct saved_context *ctxt) struct saved_msr *end = msr + ctxt->saved_msrs.num; while (msr < end) { - msr->valid = !rdmsrl_safe(msr->info.msr_no, &msr->info.reg.q); + if (msr->valid) + rdmsrl(msr->info.msr_no, msr->info.reg.q); msr++; } } @@ -424,8 +425,10 @@ static int msr_build_context(const u32 *msr_id, const int num) } for (i = saved_msrs->num, j = 0; i < total_num; i++, j++) { + u64 dummy; + msr_array[i].info.msr_no = msr_id[j]; - msr_array[i].valid = false; + msr_array[i].valid = !rdmsrl_safe(msr_id[j], &dummy); msr_array[i].info.reg.q = 0; } saved_msrs->num = total_num; @@ -500,10 +503,24 @@ static int pm_cpu_check(const struct x86_cpu_id *c) return ret; } +static void pm_save_spec_msr(void) +{ + u32 spec_msr_id[] = { + MSR_IA32_SPEC_CTRL, + MSR_IA32_TSX_CTRL, + MSR_TSX_FORCE_ABORT, + MSR_IA32_MCU_OPT_CTRL, + MSR_AMD64_LS_CFG, + }; + + msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); +} + static int pm_check_save_msr(void) { dmi_check_system(msr_save_dmi_table); pm_cpu_check(msr_save_cpu_table); + pm_save_spec_msr(); return 0; } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 32b20efff5f8..4556c86c3465 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -570,8 +570,7 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); - if (cx->type == ACPI_STATE_C3) - ACPI_FLUSH_CPU_CACHE(); + ACPI_FLUSH_CPU_CACHE(); while (1) { diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 9efbfe087de7..762b61f67e6c 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -588,19 +588,6 @@ static struct acpi_device *handle_to_device(acpi_handle handle, return adev; } -int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device) -{ - if (!device) - return -EINVAL; - - *device = handle_to_device(handle, NULL); - if (!*device) - return -ENODEV; - - return 0; -} -EXPORT_SYMBOL(acpi_bus_get_device); - /** * acpi_fetch_acpi_dev - Retrieve ACPI device object. * @handle: ACPI handle associated with the requested ACPI device object. diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index e5641e6c52ee..bb45a9c00514 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -115,14 +115,16 @@ config SATA_AHCI If unsure, say N. -config SATA_LPM_POLICY +config SATA_MOBILE_LPM_POLICY int "Default SATA Link Power Management policy for low power chipsets" range 0 4 default 0 depends on SATA_AHCI help Select the Default SATA Link Power Management (LPM) policy to use - for chipsets / "South Bridges" designated as supporting low power. + for chipsets / "South Bridges" supporting low-power modes. Such + chipsets are typically found on most laptops but desktops and + servers now also widely use chipsets supporting low power modes. The value set has the following meanings: 0 => Keep firmware settings diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 84456c05e845..397dfd27c90d 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1595,7 +1595,7 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports, static void ahci_update_initial_lpm_policy(struct ata_port *ap, struct ahci_host_priv *hpriv) { - int policy = CONFIG_SATA_LPM_POLICY; + int policy = CONFIG_SATA_MOBILE_LPM_POLICY; /* Ignore processing for chipsets that don't use policy */ diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 6ead58c1b6e5..ad11a4c52fbe 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -236,7 +236,7 @@ enum { AHCI_HFLAG_NO_WRITE_TO_RO = (1 << 24), /* don't write to read only registers */ AHCI_HFLAG_USE_LPM_POLICY = (1 << 25), /* chipset that should use - SATA_LPM_POLICY + SATA_MOBILE_LPM_POLICY as default lpm_policy */ AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during suspend/resume */ diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cceedde51126..ca64837641be 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4014,6 +4014,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Samsung SSD 840 EVO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_NO_DMA_LOG | + ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index b3be7a8f5bea..b1666adc1c3a 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1634,7 +1634,7 @@ EXPORT_SYMBOL_GPL(ata_sff_interrupt); void ata_sff_lost_interrupt(struct ata_port *ap) { - u8 status; + u8 status = 0; struct ata_queued_cmd *qc; /* Only one outstanding command per SFF channel */ diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index bec33d781ae0..e3263e961045 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -137,7 +137,11 @@ struct sata_dwc_device { #endif }; -#define SATA_DWC_QCMD_MAX 32 +/* + * Allow one extra special slot for commands and DMA management + * to account for libata internal commands. + */ +#define SATA_DWC_QCMD_MAX (ATA_MAX_QUEUE + 1) struct sata_dwc_device_port { struct sata_dwc_device *hsdev; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4b55e864a0a3..4d3efaa20b7b 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1638,22 +1638,22 @@ struct sib_info { }; void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib); -extern void notify_resource_state(struct sk_buff *, +extern int notify_resource_state(struct sk_buff *, unsigned int, struct drbd_resource *, struct resource_info *, enum drbd_notification_type); -extern void notify_device_state(struct sk_buff *, +extern int notify_device_state(struct sk_buff *, unsigned int, struct drbd_device *, struct device_info *, enum drbd_notification_type); -extern void notify_connection_state(struct sk_buff *, +extern int notify_connection_state(struct sk_buff *, unsigned int, struct drbd_connection *, struct connection_info *, enum drbd_notification_type); -extern void notify_peer_device_state(struct sk_buff *, +extern int notify_peer_device_state(struct sk_buff *, unsigned int, struct drbd_peer_device *, struct peer_device_info *, diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 9676a1d214bc..4b0b25cc916e 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2719,6 +2719,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig sprintf(disk->disk_name, "drbd%d", minor); disk->private_data = device; + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); blk_queue_write_cache(disk->queue, true, true); /* Setting the max_hw_sectors to an odd value of 8kibyte here This triggers a max_bio_size message upon first attach or connect */ @@ -2773,12 +2774,12 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig if (init_submitter(device)) { err = ERR_NOMEM; - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; } err = add_disk(disk); if (err) - goto out_idr_remove_vol; + goto out_idr_remove_from_resource; /* inherit the connection state */ device->state.conn = first_connection(resource)->cstate; @@ -2792,8 +2793,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig drbd_debugfs_device_add(device); return NO_ERROR; -out_idr_remove_vol: - idr_remove(&connection->peer_devices, vnr); out_idr_remove_from_resource: for_each_connection(connection, resource) { peer_device = idr_remove(&connection->peer_devices, vnr); diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 02030c9c4d3b..b7216c186ba4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -4549,7 +4549,7 @@ static int nla_put_notification_header(struct sk_buff *msg, return drbd_notification_header_to_skb(msg, &nh, true); } -void notify_resource_state(struct sk_buff *skb, +int notify_resource_state(struct sk_buff *skb, unsigned int seq, struct drbd_resource *resource, struct resource_info *resource_info, @@ -4591,16 +4591,17 @@ void notify_resource_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_device_state(struct sk_buff *skb, +int notify_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_device *device, struct device_info *device_info, @@ -4640,16 +4641,17 @@ void notify_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_connection_state(struct sk_buff *skb, +int notify_connection_state(struct sk_buff *skb, unsigned int seq, struct drbd_connection *connection, struct connection_info *connection_info, @@ -4689,16 +4691,17 @@ void notify_connection_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } -void notify_peer_device_state(struct sk_buff *skb, +int notify_peer_device_state(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device *peer_device, struct peer_device_info *peer_device_info, @@ -4739,13 +4742,14 @@ void notify_peer_device_state(struct sk_buff *skb, if (err && err != -ESRCH) goto failed; } - return; + return 0; nla_put_failure: nlmsg_free(skb); failed: drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n", err, seq); + return err; } void notify_helper(enum drbd_notification_type type, @@ -4796,7 +4800,7 @@ void notify_helper(enum drbd_notification_type type, err, seq); } -static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) +static int notify_initial_state_done(struct sk_buff *skb, unsigned int seq) { struct drbd_genlmsghdr *dh; int err; @@ -4810,11 +4814,12 @@ static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq) if (nla_put_notification_header(skb, NOTIFY_EXISTS)) goto nla_put_failure; genlmsg_end(skb, dh); - return; + return 0; nla_put_failure: nlmsg_free(skb); pr_err("Error %d sending event. Event seq:%u\n", err, seq); + return err; } static void free_state_changes(struct list_head *list) @@ -4841,6 +4846,7 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) unsigned int seq = cb->args[2]; unsigned int n; enum drbd_notification_type flags = 0; + int err = 0; /* There is no need for taking notification_mutex here: it doesn't matter if the initial state events mix with later state chage @@ -4849,32 +4855,32 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[5]--; if (cb->args[5] == 1) { - notify_initial_state_done(skb, seq); + err = notify_initial_state_done(skb, seq); goto out; } n = cb->args[4]++; if (cb->args[4] < cb->args[3]) flags |= NOTIFY_CONTINUES; if (n < 1) { - notify_resource_state_change(skb, seq, state_change->resource, + err = notify_resource_state_change(skb, seq, state_change->resource, NOTIFY_EXISTS | flags); goto next; } n--; if (n < state_change->n_connections) { - notify_connection_state_change(skb, seq, &state_change->connections[n], + err = notify_connection_state_change(skb, seq, &state_change->connections[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_connections; if (n < state_change->n_devices) { - notify_device_state_change(skb, seq, &state_change->devices[n], + err = notify_device_state_change(skb, seq, &state_change->devices[n], NOTIFY_EXISTS | flags); goto next; } n -= state_change->n_devices; if (n < state_change->n_devices * state_change->n_connections) { - notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], + err = notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n], NOTIFY_EXISTS | flags); goto next; } @@ -4889,7 +4895,10 @@ static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) cb->args[4] = 0; } out: - return skb->len; + if (err) + return err; + else + return skb->len; } int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index b8a27818ab3f..4ee11aef6672 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1537,7 +1537,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device, return rv; } -void notify_resource_state_change(struct sk_buff *skb, +int notify_resource_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_resource_state_change *resource_state_change, enum drbd_notification_type type) @@ -1550,10 +1550,10 @@ void notify_resource_state_change(struct sk_buff *skb, .res_susp_fen = resource_state_change->susp_fen[NEW], }; - notify_resource_state(skb, seq, resource, &resource_info, type); + return notify_resource_state(skb, seq, resource, &resource_info, type); } -void notify_connection_state_change(struct sk_buff *skb, +int notify_connection_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_connection_state_change *connection_state_change, enum drbd_notification_type type) @@ -1564,10 +1564,10 @@ void notify_connection_state_change(struct sk_buff *skb, .conn_role = connection_state_change->peer_role[NEW], }; - notify_connection_state(skb, seq, connection, &connection_info, type); + return notify_connection_state(skb, seq, connection, &connection_info, type); } -void notify_device_state_change(struct sk_buff *skb, +int notify_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_device_state_change *device_state_change, enum drbd_notification_type type) @@ -1577,10 +1577,10 @@ void notify_device_state_change(struct sk_buff *skb, .dev_disk_state = device_state_change->disk_state[NEW], }; - notify_device_state(skb, seq, device, &device_info, type); + return notify_device_state(skb, seq, device, &device_info, type); } -void notify_peer_device_state_change(struct sk_buff *skb, +int notify_peer_device_state_change(struct sk_buff *skb, unsigned int seq, struct drbd_peer_device_state_change *p, enum drbd_notification_type type) @@ -1594,7 +1594,7 @@ void notify_peer_device_state_change(struct sk_buff *skb, .peer_resync_susp_dependency = p->resync_susp_dependency[NEW], }; - notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); + return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type); } static void broadcast_state_change(struct drbd_state_change *state_change) @@ -1602,7 +1602,7 @@ static void broadcast_state_change(struct drbd_state_change *state_change) struct drbd_resource_state_change *resource_state_change = &state_change->resource[0]; bool resource_state_has_changed; unsigned int n_device, n_connection, n_peer_device, n_peer_devices; - void (*last_func)(struct sk_buff *, unsigned int, void *, + int (*last_func)(struct sk_buff *, unsigned int, void *, enum drbd_notification_type) = NULL; void *last_arg = NULL; diff --git a/drivers/block/drbd/drbd_state_change.h b/drivers/block/drbd/drbd_state_change.h index ba80f612d6ab..d5b0479bc9a6 100644 --- a/drivers/block/drbd/drbd_state_change.h +++ b/drivers/block/drbd/drbd_state_change.h @@ -44,19 +44,19 @@ extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_ extern void copy_old_to_new_state_change(struct drbd_state_change *); extern void forget_state_change(struct drbd_state_change *); -extern void notify_resource_state_change(struct sk_buff *, +extern int notify_resource_state_change(struct sk_buff *, unsigned int, struct drbd_resource_state_change *, enum drbd_notification_type type); -extern void notify_connection_state_change(struct sk_buff *, +extern int notify_connection_state_change(struct sk_buff *, unsigned int, struct drbd_connection_state_change *, enum drbd_notification_type type); -extern void notify_device_state_change(struct sk_buff *, +extern int notify_device_state_change(struct sk_buff *, unsigned int, struct drbd_device_state_change *, enum drbd_notification_type type); -extern void notify_peer_device_state_change(struct sk_buff *, +extern int notify_peer_device_state_change(struct sk_buff *, unsigned int, struct drbd_peer_device_state_change *, enum drbd_notification_type type); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 7bd10d63ddbe..2dc9da683a13 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -1365,7 +1365,6 @@ static int cdrom_slot_status(struct cdrom_device_info *cdi, int slot) */ int cdrom_number_of_slots(struct cdrom_device_info *cdi) { - int status; int nslots = 1; struct cdrom_changer_info *info; @@ -1377,7 +1376,7 @@ int cdrom_number_of_slots(struct cdrom_device_info *cdi) if (!info) return -ENOMEM; - if ((status = cdrom_read_mech_status(cdi, info)) == 0) + if (cdrom_read_mech_status(cdi, info) == 0) nslots = info->hdr.nslots; kfree(info); diff --git a/drivers/char/random.c b/drivers/char/random.c index 1d8242969751..e15063d61460 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -437,11 +437,8 @@ static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], * This shouldn't be set by functions like add_device_randomness(), * where we can't trust the buffer passed to it is guaranteed to be * unpredictable (so it might not have any entropy at all). - * - * Returns the number of bytes processed from input, which is bounded - * by CRNG_INIT_CNT_THRESH if account is true. */ -static size_t crng_pre_init_inject(const void *input, size_t len, bool account) +static void crng_pre_init_inject(const void *input, size_t len, bool account) { static int crng_init_cnt = 0; struct blake2s_state hash; @@ -452,18 +449,15 @@ static size_t crng_pre_init_inject(const void *input, size_t len, bool account) spin_lock_irqsave(&base_crng.lock, flags); if (crng_init != 0) { spin_unlock_irqrestore(&base_crng.lock, flags); - return 0; + return; } - if (account) - len = min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); - blake2s_update(&hash, base_crng.key, sizeof(base_crng.key)); blake2s_update(&hash, input, len); blake2s_final(&hash, base_crng.key); if (account) { - crng_init_cnt += len; + crng_init_cnt += min_t(size_t, len, CRNG_INIT_CNT_THRESH - crng_init_cnt); if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { ++base_crng.generation; crng_init = 1; @@ -474,8 +468,6 @@ static size_t crng_pre_init_inject(const void *input, size_t len, bool account) if (crng_init == 1) pr_notice("fast init done\n"); - - return len; } static void _get_random_bytes(void *buf, size_t nbytes) @@ -531,7 +523,6 @@ EXPORT_SYMBOL(get_random_bytes); static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) { - bool large_request = nbytes > 256; ssize_t ret = 0; size_t len; u32 chacha_state[CHACHA_STATE_WORDS]; @@ -540,22 +531,23 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) if (!nbytes) return 0; - len = min_t(size_t, 32, nbytes); - crng_make_state(chacha_state, output, len); - - if (copy_to_user(buf, output, len)) - return -EFAULT; - nbytes -= len; - buf += len; - ret += len; - - while (nbytes) { - if (large_request && need_resched()) { - if (signal_pending(current)) - break; - schedule(); - } + /* + * Immediately overwrite the ChaCha key at index 4 with random + * bytes, in case userspace causes copy_to_user() below to sleep + * forever, so that we still retain forward secrecy in that case. + */ + crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE); + /* + * However, if we're doing a read of len <= 32, we don't need to + * use chacha_state after, so we can simply return those bytes to + * the user directly. + */ + if (nbytes <= CHACHA_KEY_SIZE) { + ret = copy_to_user(buf, &chacha_state[4], nbytes) ? -EFAULT : nbytes; + goto out_zero_chacha; + } + do { chacha20_block(chacha_state, output); if (unlikely(chacha_state[12] == 0)) ++chacha_state[13]; @@ -569,10 +561,18 @@ static ssize_t get_random_bytes_user(void __user *buf, size_t nbytes) nbytes -= len; buf += len; ret += len; - } - memzero_explicit(chacha_state, sizeof(chacha_state)); + BUILD_BUG_ON(PAGE_SIZE % CHACHA_BLOCK_SIZE != 0); + if (!(ret % PAGE_SIZE) && nbytes) { + if (signal_pending(current)) + break; + cond_resched(); + } + } while (nbytes); + memzero_explicit(output, sizeof(output)); +out_zero_chacha: + memzero_explicit(chacha_state, sizeof(chacha_state)); return ret; } @@ -1141,12 +1141,9 @@ void add_hwgenerator_randomness(const void *buffer, size_t count, size_t entropy) { if (unlikely(crng_init == 0 && entropy < POOL_MIN_BITS)) { - size_t ret = crng_pre_init_inject(buffer, count, true); - mix_pool_bytes(buffer, ret); - count -= ret; - buffer += ret; - if (!count || crng_init == 0) - return; + crng_pre_init_inject(buffer, count, true); + mix_pool_bytes(buffer, count); + return; } /* @@ -1545,6 +1542,13 @@ static ssize_t urandom_read(struct file *file, char __user *buf, size_t nbytes, { static int maxwarn = 10; + /* + * Opportunistically attempt to initialize the RNG on platforms that + * have fast cycle counters, but don't (for now) require it to succeed. + */ + if (!crng_ready()) + try_to_generate_entropy(); + if (!crng_ready() && maxwarn > 0) { maxwarn--; if (__ratelimit(&urandom_warning)) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 8a7267d116b7..3f2182d66829 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -436,7 +436,6 @@ static int wait_for_media_ready(struct cxl_dev_state *cxlds) for (i = mbox_ready_timeout; i; i--) { u32 temp; - int rc; rc = pci_read_config_dword( pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile index 511805dbeb75..4c9eb53ba3f8 100644 --- a/drivers/dma-buf/Makefile +++ b/drivers/dma-buf/Makefile @@ -12,6 +12,7 @@ dmabuf_selftests-y := \ selftest.o \ st-dma-fence.o \ st-dma-fence-chain.o \ + st-dma-fence-unwrap.o \ st-dma-resv.o obj-$(CONFIG_DMABUF_SELFTESTS) += dmabuf_selftests.o diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index df23239b04fc..79795857be3e 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -216,7 +216,8 @@ static bool dma_buf_poll_add_cb(struct dma_resv *resv, bool write, struct dma_fence *fence; int r; - dma_resv_for_each_fence(&cursor, resv, write, fence) { + dma_resv_for_each_fence(&cursor, resv, dma_resv_usage_rw(write), + fence) { dma_fence_get(fence); r = dma_fence_add_callback(fence, &dcb->cb, dma_buf_poll_cb); if (!r) @@ -443,7 +444,7 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) * as a file descriptor by calling dma_buf_fd(). * * 2. Userspace passes this file-descriptors to all drivers it wants this buffer - * to share with: First the filedescriptor is converted to a &dma_buf using + * to share with: First the file descriptor is converted to a &dma_buf using * dma_buf_get(). Then the buffer is attached to the device using * dma_buf_attach(). * @@ -660,12 +661,24 @@ static struct sg_table * __map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction direction) { struct sg_table *sg_table; + signed long ret; sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction); + if (IS_ERR_OR_NULL(sg_table)) + return sg_table; - if (!IS_ERR_OR_NULL(sg_table)) - mangle_sg_table(sg_table); + if (!dma_buf_attachment_is_dynamic(attach)) { + ret = dma_resv_wait_timeout(attach->dmabuf->resv, + DMA_RESV_USAGE_KERNEL, true, + MAX_SCHEDULE_TIMEOUT); + if (ret < 0) { + attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, + direction); + return ERR_PTR(ret); + } + } + mangle_sg_table(sg_table); return sg_table; } @@ -1124,7 +1137,8 @@ static int __dma_buf_begin_cpu_access(struct dma_buf *dmabuf, long ret; /* Wait on any implicit rendering fences */ - ret = dma_resv_wait_timeout(resv, write, true, MAX_SCHEDULE_TIMEOUT); + ret = dma_resv_wait_timeout(resv, dma_resv_usage_rw(write), + true, MAX_SCHEDULE_TIMEOUT); if (ret < 0) return ret; diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index cb1bacb5a42b..5c8a7084577b 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -159,6 +159,8 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence_array *array; size_t size = sizeof(*array); + WARN_ON(!num_fences || !fences); + /* Allocate the callback structures behind the array. */ size += num_fences * sizeof(struct dma_fence_array_cb); array = kzalloc(size, GFP_KERNEL); @@ -219,3 +221,33 @@ bool dma_fence_match_context(struct dma_fence *fence, u64 context) return true; } EXPORT_SYMBOL(dma_fence_match_context); + +struct dma_fence *dma_fence_array_first(struct dma_fence *head) +{ + struct dma_fence_array *array; + + if (!head) + return NULL; + + array = to_dma_fence_array(head); + if (!array) + return head; + + if (!array->num_fences) + return NULL; + + return array->fences[0]; +} +EXPORT_SYMBOL(dma_fence_array_first); + +struct dma_fence *dma_fence_array_next(struct dma_fence *head, + unsigned int index) +{ + struct dma_fence_array *array = to_dma_fence_array(head); + + if (!array || index >= array->num_fences) + return NULL; + + return array->fences[index]; +} +EXPORT_SYMBOL(dma_fence_array_next); diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index b51416405e86..0cce6e4ec946 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -34,6 +34,7 @@ */ #include +#include #include #include #include @@ -43,12 +44,12 @@ /** * DOC: Reservation Object Overview * - * The reservation object provides a mechanism to manage shared and - * exclusive fences associated with a buffer. A reservation object - * can have attached one exclusive fence (normally associated with - * write operations) or N shared fences (read operations). The RCU - * mechanism is used to protect read access to fences from locked - * write-side updates. + * The reservation object provides a mechanism to manage a container of + * dma_fence object associated with a resource. A reservation object + * can have any number of fences attaches to it. Each fence carries an usage + * parameter determining how the operation represented by the fence is using the + * resource. The RCU mechanism is used to protect read access to fences from + * locked write-side updates. * * See struct dma_resv for more details. */ @@ -56,33 +57,59 @@ DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class); -/** - * dma_resv_list_alloc - allocate fence list - * @shared_max: number of fences we need space for - * +/* Mask for the lower fence pointer bits */ +#define DMA_RESV_LIST_MASK 0x3 + +struct dma_resv_list { + struct rcu_head rcu; + u32 num_fences, max_fences; + struct dma_fence __rcu *table[]; +}; + +/* Extract the fence and usage flags from an RCU protected entry in the list. */ +static void dma_resv_list_entry(struct dma_resv_list *list, unsigned int index, + struct dma_resv *resv, struct dma_fence **fence, + enum dma_resv_usage *usage) +{ + long tmp; + + tmp = (long)rcu_dereference_check(list->table[index], + resv ? dma_resv_held(resv) : true); + *fence = (struct dma_fence *)(tmp & ~DMA_RESV_LIST_MASK); + if (usage) + *usage = tmp & DMA_RESV_LIST_MASK; +} + +/* Set the fence and usage flags at the specific index in the list. */ +static void dma_resv_list_set(struct dma_resv_list *list, + unsigned int index, + struct dma_fence *fence, + enum dma_resv_usage usage) +{ + long tmp = ((long)fence) | usage; + + RCU_INIT_POINTER(list->table[index], (struct dma_fence *)tmp); +} + +/* * Allocate a new dma_resv_list and make sure to correctly initialize - * shared_max. + * max_fences. */ -static struct dma_resv_list *dma_resv_list_alloc(unsigned int shared_max) +static struct dma_resv_list *dma_resv_list_alloc(unsigned int max_fences) { struct dma_resv_list *list; - list = kmalloc(struct_size(list, shared, shared_max), GFP_KERNEL); + list = kmalloc(struct_size(list, table, max_fences), GFP_KERNEL); if (!list) return NULL; - list->shared_max = (ksize(list) - offsetof(typeof(*list), shared)) / - sizeof(*list->shared); + list->max_fences = (ksize(list) - offsetof(typeof(*list), table)) / + sizeof(*list->table); return list; } -/** - * dma_resv_list_free - free fence list - * @list: list to free - * - * Free a dma_resv_list and make sure to drop all references. - */ +/* Free a dma_resv_list and make sure to drop all references. */ static void dma_resv_list_free(struct dma_resv_list *list) { unsigned int i; @@ -90,9 +117,12 @@ static void dma_resv_list_free(struct dma_resv_list *list) if (!list) return; - for (i = 0; i < list->shared_count; ++i) - dma_fence_put(rcu_dereference_protected(list->shared[i], true)); + for (i = 0; i < list->num_fences; ++i) { + struct dma_fence *fence; + dma_resv_list_entry(list, i, NULL, &fence, NULL); + dma_fence_put(fence); + } kfree_rcu(list, rcu); } @@ -103,10 +133,8 @@ static void dma_resv_list_free(struct dma_resv_list *list) void dma_resv_init(struct dma_resv *obj) { ww_mutex_init(&obj->lock, &reservation_ww_class); - seqcount_ww_mutex_init(&obj->seq, &obj->lock); - RCU_INIT_POINTER(obj->fence, NULL); - RCU_INIT_POINTER(obj->fence_excl, NULL); + RCU_INIT_POINTER(obj->fences, NULL); } EXPORT_SYMBOL(dma_resv_init); @@ -116,51 +144,48 @@ EXPORT_SYMBOL(dma_resv_init); */ void dma_resv_fini(struct dma_resv *obj) { - struct dma_resv_list *fobj; - struct dma_fence *excl; - /* * This object should be dead and all references must have * been released to it, so no need to be protected with rcu. */ - excl = rcu_dereference_protected(obj->fence_excl, 1); - if (excl) - dma_fence_put(excl); - - fobj = rcu_dereference_protected(obj->fence, 1); - dma_resv_list_free(fobj); + dma_resv_list_free(rcu_dereference_protected(obj->fences, true)); ww_mutex_destroy(&obj->lock); } EXPORT_SYMBOL(dma_resv_fini); +/* Dereference the fences while ensuring RCU rules */ +static inline struct dma_resv_list *dma_resv_fences_list(struct dma_resv *obj) +{ + return rcu_dereference_check(obj->fences, dma_resv_held(obj)); +} + /** - * dma_resv_reserve_shared - Reserve space to add shared fences to - * a dma_resv. + * dma_resv_reserve_fences - Reserve space to add fences to a dma_resv object. * @obj: reservation object * @num_fences: number of fences we want to add * - * Should be called before dma_resv_add_shared_fence(). Must - * be called with @obj locked through dma_resv_lock(). + * Should be called before dma_resv_add_fence(). Must be called with @obj + * locked through dma_resv_lock(). * * Note that the preallocated slots need to be re-reserved if @obj is unlocked - * at any time before calling dma_resv_add_shared_fence(). This is validated - * when CONFIG_DEBUG_MUTEXES is enabled. + * at any time before calling dma_resv_add_fence(). This is validated when + * CONFIG_DEBUG_MUTEXES is enabled. * * RETURNS * Zero for success, or -errno */ -int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences) +int dma_resv_reserve_fences(struct dma_resv *obj, unsigned int num_fences) { struct dma_resv_list *old, *new; unsigned int i, j, k, max; dma_resv_assert_held(obj); - old = dma_resv_shared_list(obj); - if (old && old->shared_max) { - if ((old->shared_count + num_fences) <= old->shared_max) + old = dma_resv_fences_list(obj); + if (old && old->max_fences) { + if ((old->num_fences + num_fences) <= old->max_fences) return 0; - max = max(old->shared_count + num_fences, old->shared_max * 2); + max = max(old->num_fences + num_fences, old->max_fences * 2); } else { max = max(4ul, roundup_pow_of_two(num_fences)); } @@ -175,27 +200,27 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences) * references from the old struct are carried over to * the new. */ - for (i = 0, j = 0, k = max; i < (old ? old->shared_count : 0); ++i) { + for (i = 0, j = 0, k = max; i < (old ? old->num_fences : 0); ++i) { + enum dma_resv_usage usage; struct dma_fence *fence; - fence = rcu_dereference_protected(old->shared[i], - dma_resv_held(obj)); + dma_resv_list_entry(old, i, obj, &fence, &usage); if (dma_fence_is_signaled(fence)) - RCU_INIT_POINTER(new->shared[--k], fence); + RCU_INIT_POINTER(new->table[--k], fence); else - RCU_INIT_POINTER(new->shared[j++], fence); + dma_resv_list_set(new, j++, fence, usage); } - new->shared_count = j; + new->num_fences = j; /* * We are not changing the effective set of fences here so can * merely update the pointer to the new array; both existing * readers and new readers will see exactly the same set of - * active (unsignaled) shared fences. Individual fences and the + * active (unsignaled) fences. Individual fences and the * old array are protected by RCU and so will not vanish under * the gaze of the rcu_read_lock() readers. */ - rcu_assign_pointer(obj->fence, new); + rcu_assign_pointer(obj->fences, new); if (!old) return 0; @@ -204,7 +229,7 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences) for (i = k; i < max; ++i) { struct dma_fence *fence; - fence = rcu_dereference_protected(new->shared[i], + fence = rcu_dereference_protected(new->table[i], dma_resv_held(obj)); dma_fence_put(fence); } @@ -212,41 +237,43 @@ int dma_resv_reserve_shared(struct dma_resv *obj, unsigned int num_fences) return 0; } -EXPORT_SYMBOL(dma_resv_reserve_shared); +EXPORT_SYMBOL(dma_resv_reserve_fences); #ifdef CONFIG_DEBUG_MUTEXES /** - * dma_resv_reset_shared_max - reset shared fences for debugging + * dma_resv_reset_max_fences - reset fences for debugging * @obj: the dma_resv object to reset * - * Reset the number of pre-reserved shared slots to test that drivers do - * correct slot allocation using dma_resv_reserve_shared(). See also - * &dma_resv_list.shared_max. + * Reset the number of pre-reserved fence slots to test that drivers do + * correct slot allocation using dma_resv_reserve_fences(). See also + * &dma_resv_list.max_fences. */ -void dma_resv_reset_shared_max(struct dma_resv *obj) +void dma_resv_reset_max_fences(struct dma_resv *obj) { - struct dma_resv_list *fences = dma_resv_shared_list(obj); + struct dma_resv_list *fences = dma_resv_fences_list(obj); dma_resv_assert_held(obj); - /* Test shared fence slot reservation */ + /* Test fence slot reservation */ if (fences) - fences->shared_max = fences->shared_count; + fences->max_fences = fences->num_fences; } -EXPORT_SYMBOL(dma_resv_reset_shared_max); +EXPORT_SYMBOL(dma_resv_reset_max_fences); #endif /** - * dma_resv_add_shared_fence - Add a fence to a shared slot + * dma_resv_add_fence - Add a fence to the dma_resv obj * @obj: the reservation object - * @fence: the shared fence to add + * @fence: the fence to add + * @usage: how the fence is used, see enum dma_resv_usage * - * Add a fence to a shared slot, @obj must be locked with dma_resv_lock(), and - * dma_resv_reserve_shared() has been called. + * Add a fence to a slot, @obj must be locked with dma_resv_lock(), and + * dma_resv_reserve_fences() has been called. * * See also &dma_resv.fence for a discussion of the semantics. */ -void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) +void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence, + enum dma_resv_usage usage) { struct dma_resv_list *fobj; struct dma_fence *old; @@ -261,118 +288,105 @@ void dma_resv_add_shared_fence(struct dma_resv *obj, struct dma_fence *fence) */ WARN_ON(dma_fence_is_container(fence)); - fobj = dma_resv_shared_list(obj); - count = fobj->shared_count; - - write_seqcount_begin(&obj->seq); + fobj = dma_resv_fences_list(obj); + count = fobj->num_fences; for (i = 0; i < count; ++i) { + enum dma_resv_usage old_usage; - old = rcu_dereference_protected(fobj->shared[i], - dma_resv_held(obj)); - if (old->context == fence->context || - dma_fence_is_signaled(old)) - goto replace; + dma_resv_list_entry(fobj, i, obj, &old, &old_usage); + if ((old->context == fence->context && old_usage >= usage) || + dma_fence_is_signaled(old)) { + dma_resv_list_set(fobj, i, fence, usage); + dma_fence_put(old); + return; + } } - BUG_ON(fobj->shared_count >= fobj->shared_max); - old = NULL; + BUG_ON(fobj->num_fences >= fobj->max_fences); count++; -replace: - RCU_INIT_POINTER(fobj->shared[i], fence); - /* pointer update must be visible before we extend the shared_count */ - smp_store_mb(fobj->shared_count, count); - - write_seqcount_end(&obj->seq); - dma_fence_put(old); + dma_resv_list_set(fobj, i, fence, usage); + /* pointer update must be visible before we extend the num_fences */ + smp_store_mb(fobj->num_fences, count); } -EXPORT_SYMBOL(dma_resv_add_shared_fence); +EXPORT_SYMBOL(dma_resv_add_fence); /** - * dma_resv_add_excl_fence - Add an exclusive fence. + * dma_resv_replace_fences - replace fences in the dma_resv obj * @obj: the reservation object - * @fence: the exclusive fence to add + * @context: the context of the fences to replace + * @replacement: the new fence to use instead + * @usage: how the new fence is used, see enum dma_resv_usage * - * Add a fence to the exclusive slot. @obj must be locked with dma_resv_lock(). - * Note that this function replaces all fences attached to @obj, see also - * &dma_resv.fence_excl for a discussion of the semantics. + * Replace fences with a specified context with a new fence. Only valid if the + * operation represented by the original fence has no longer access to the + * resources represented by the dma_resv object when the new fence completes. + * + * And example for using this is replacing a preemption fence with a page table + * update fence which makes the resource inaccessible. */ -void dma_resv_add_excl_fence(struct dma_resv *obj, struct dma_fence *fence) +void dma_resv_replace_fences(struct dma_resv *obj, uint64_t context, + struct dma_fence *replacement, + enum dma_resv_usage usage) { - struct dma_fence *old_fence = dma_resv_excl_fence(obj); - struct dma_resv_list *old; - u32 i = 0; + struct dma_resv_list *list; + unsigned int i; dma_resv_assert_held(obj); - old = dma_resv_shared_list(obj); - if (old) - i = old->shared_count; + list = dma_resv_fences_list(obj); + for (i = 0; list && i < list->num_fences; ++i) { + struct dma_fence *old; - dma_fence_get(fence); + dma_resv_list_entry(list, i, obj, &old, NULL); + if (old->context != context) + continue; - write_seqcount_begin(&obj->seq); - /* write_seqcount_begin provides the necessary memory barrier */ - RCU_INIT_POINTER(obj->fence_excl, fence); - if (old) - old->shared_count = 0; - write_seqcount_end(&obj->seq); - - /* inplace update, no shared fences */ - while (i--) - dma_fence_put(rcu_dereference_protected(old->shared[i], - dma_resv_held(obj))); - - dma_fence_put(old_fence); + dma_resv_list_set(list, i, replacement, usage); + dma_fence_put(old); + } } -EXPORT_SYMBOL(dma_resv_add_excl_fence); +EXPORT_SYMBOL(dma_resv_replace_fences); -/* Restart the iterator by initializing all the necessary fields, but not the - * relation to the dma_resv object. */ +/* Restart the unlocked iteration by initializing the cursor object. */ static void dma_resv_iter_restart_unlocked(struct dma_resv_iter *cursor) { - cursor->seq = read_seqcount_begin(&cursor->obj->seq); - cursor->index = -1; - cursor->shared_count = 0; - if (cursor->all_fences) { - cursor->fences = dma_resv_shared_list(cursor->obj); - if (cursor->fences) - cursor->shared_count = cursor->fences->shared_count; - } else { - cursor->fences = NULL; - } + cursor->index = 0; + cursor->num_fences = 0; + cursor->fences = dma_resv_fences_list(cursor->obj); + if (cursor->fences) + cursor->num_fences = cursor->fences->num_fences; cursor->is_restarted = true; } /* Walk to the next not signaled fence and grab a reference to it */ static void dma_resv_iter_walk_unlocked(struct dma_resv_iter *cursor) { - struct dma_resv *obj = cursor->obj; + if (!cursor->fences) + return; do { /* Drop the reference from the previous round */ dma_fence_put(cursor->fence); - if (cursor->index == -1) { - cursor->fence = dma_resv_excl_fence(obj); - cursor->index++; - if (!cursor->fence) - continue; - - } else if (!cursor->fences || - cursor->index >= cursor->shared_count) { + if (cursor->index >= cursor->num_fences) { cursor->fence = NULL; break; - } else { - struct dma_resv_list *fences = cursor->fences; - unsigned int idx = cursor->index++; - - cursor->fence = rcu_dereference(fences->shared[idx]); } + + dma_resv_list_entry(cursor->fences, cursor->index++, + cursor->obj, &cursor->fence, + &cursor->fence_usage); cursor->fence = dma_fence_get_rcu(cursor->fence); - if (!cursor->fence || !dma_fence_is_signaled(cursor->fence)) + if (!cursor->fence) { + dma_resv_iter_restart_unlocked(cursor); + continue; + } + + if (!dma_fence_is_signaled(cursor->fence) && + cursor->usage >= cursor->fence_usage) break; } while (true); } @@ -395,7 +409,7 @@ struct dma_fence *dma_resv_iter_first_unlocked(struct dma_resv_iter *cursor) do { dma_resv_iter_restart_unlocked(cursor); dma_resv_iter_walk_unlocked(cursor); - } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + } while (dma_resv_fences_list(cursor->obj) != cursor->fences); rcu_read_unlock(); return cursor->fence; @@ -418,13 +432,13 @@ struct dma_fence *dma_resv_iter_next_unlocked(struct dma_resv_iter *cursor) rcu_read_lock(); cursor->is_restarted = false; - restart = read_seqcount_retry(&cursor->obj->seq, cursor->seq); + restart = dma_resv_fences_list(cursor->obj) != cursor->fences; do { if (restart) dma_resv_iter_restart_unlocked(cursor); dma_resv_iter_walk_unlocked(cursor); restart = true; - } while (read_seqcount_retry(&cursor->obj->seq, cursor->seq)); + } while (dma_resv_fences_list(cursor->obj) != cursor->fences); rcu_read_unlock(); return cursor->fence; @@ -447,15 +461,9 @@ struct dma_fence *dma_resv_iter_first(struct dma_resv_iter *cursor) dma_resv_assert_held(cursor->obj); cursor->index = 0; - if (cursor->all_fences) - cursor->fences = dma_resv_shared_list(cursor->obj); - else - cursor->fences = NULL; - - fence = dma_resv_excl_fence(cursor->obj); - if (!fence) - fence = dma_resv_iter_next(cursor); + cursor->fences = dma_resv_fences_list(cursor->obj); + fence = dma_resv_iter_next(cursor); cursor->is_restarted = true; return fence; } @@ -470,17 +478,22 @@ EXPORT_SYMBOL_GPL(dma_resv_iter_first); */ struct dma_fence *dma_resv_iter_next(struct dma_resv_iter *cursor) { - unsigned int idx; + struct dma_fence *fence; dma_resv_assert_held(cursor->obj); cursor->is_restarted = false; - if (!cursor->fences || cursor->index >= cursor->fences->shared_count) - return NULL; - idx = cursor->index++; - return rcu_dereference_protected(cursor->fences->shared[idx], - dma_resv_held(cursor->obj)); + do { + if (!cursor->fences || + cursor->index >= cursor->fences->num_fences) + return NULL; + + dma_resv_list_entry(cursor->fences, cursor->index++, + cursor->obj, &fence, &cursor->fence_usage); + } while (cursor->fence_usage > cursor->usage); + + return fence; } EXPORT_SYMBOL_GPL(dma_resv_iter_next); @@ -495,60 +508,43 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src) { struct dma_resv_iter cursor; struct dma_resv_list *list; - struct dma_fence *f, *excl; + struct dma_fence *f; dma_resv_assert_held(dst); list = NULL; - excl = NULL; - dma_resv_iter_begin(&cursor, src, true); + dma_resv_iter_begin(&cursor, src, DMA_RESV_USAGE_BOOKKEEP); dma_resv_for_each_fence_unlocked(&cursor, f) { if (dma_resv_iter_is_restarted(&cursor)) { dma_resv_list_free(list); - dma_fence_put(excl); - if (cursor.shared_count) { - list = dma_resv_list_alloc(cursor.shared_count); - if (!list) { - dma_resv_iter_end(&cursor); - return -ENOMEM; - } - - list->shared_count = 0; - - } else { - list = NULL; + list = dma_resv_list_alloc(cursor.num_fences); + if (!list) { + dma_resv_iter_end(&cursor); + return -ENOMEM; } - excl = NULL; + list->num_fences = 0; } dma_fence_get(f); - if (dma_resv_iter_is_exclusive(&cursor)) - excl = f; - else - RCU_INIT_POINTER(list->shared[list->shared_count++], f); + dma_resv_list_set(list, list->num_fences++, f, + dma_resv_iter_usage(&cursor)); } dma_resv_iter_end(&cursor); - write_seqcount_begin(&dst->seq); - excl = rcu_replace_pointer(dst->fence_excl, excl, dma_resv_held(dst)); - list = rcu_replace_pointer(dst->fence, list, dma_resv_held(dst)); - write_seqcount_end(&dst->seq); - + list = rcu_replace_pointer(dst->fences, list, dma_resv_held(dst)); dma_resv_list_free(list); - dma_fence_put(excl); - return 0; } EXPORT_SYMBOL(dma_resv_copy_fences); /** - * dma_resv_get_fences - Get an object's shared and exclusive + * dma_resv_get_fences - Get an object's fences * fences without update side lock held * @obj: the reservation object - * @write: true if we should return all fences + * @usage: controls which fences to include, see enum dma_resv_usage. * @num_fences: the number of fences returned * @fences: the array of fence ptrs returned (array is krealloc'd to the * required size, and must be freed by caller) @@ -556,7 +552,7 @@ EXPORT_SYMBOL(dma_resv_copy_fences); * Retrieve all fences from the reservation object. * Returns either zero or -ENOMEM. */ -int dma_resv_get_fences(struct dma_resv *obj, bool write, +int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage, unsigned int *num_fences, struct dma_fence ***fences) { struct dma_resv_iter cursor; @@ -565,7 +561,7 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write, *num_fences = 0; *fences = NULL; - dma_resv_iter_begin(&cursor, obj, write); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (dma_resv_iter_is_restarted(&cursor)) { @@ -574,7 +570,7 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write, while (*num_fences) dma_fence_put((*fences)[--(*num_fences)]); - count = cursor.shared_count + 1; + count = cursor.num_fences + 1; /* Eventually re-allocate the array */ *fences = krealloc_array(*fences, count, @@ -595,10 +591,62 @@ int dma_resv_get_fences(struct dma_resv *obj, bool write, EXPORT_SYMBOL_GPL(dma_resv_get_fences); /** - * dma_resv_wait_timeout - Wait on reservation's objects - * shared and/or exclusive fences. + * dma_resv_get_singleton - Get a single fence for all the fences * @obj: the reservation object - * @wait_all: if true, wait on all fences, else wait on just exclusive fence + * @usage: controls which fences to include, see enum dma_resv_usage. + * @fence: the resulting fence + * + * Get a single fence representing all the fences inside the resv object. + * Returns either 0 for success or -ENOMEM. + * + * Warning: This can't be used like this when adding the fence back to the resv + * object since that can lead to stack corruption when finalizing the + * dma_fence_array. + * + * Returns 0 on success and negative error values on failure. + */ +int dma_resv_get_singleton(struct dma_resv *obj, enum dma_resv_usage usage, + struct dma_fence **fence) +{ + struct dma_fence_array *array; + struct dma_fence **fences; + unsigned count; + int r; + + r = dma_resv_get_fences(obj, usage, &count, &fences); + if (r) + return r; + + if (count == 0) { + *fence = NULL; + return 0; + } + + if (count == 1) { + *fence = fences[0]; + kfree(fences); + return 0; + } + + array = dma_fence_array_create(count, fences, + dma_fence_context_alloc(1), + 1, false); + if (!array) { + while (count--) + dma_fence_put(fences[count]); + kfree(fences); + return -ENOMEM; + } + + *fence = &array->base; + return 0; +} +EXPORT_SYMBOL_GPL(dma_resv_get_singleton); + +/** + * dma_resv_wait_timeout - Wait on reservation's objects fences + * @obj: the reservation object + * @usage: controls which fences to include, see enum dma_resv_usage. * @intr: if true, do interruptible wait * @timeout: timeout value in jiffies or zero to return immediately * @@ -608,14 +656,14 @@ EXPORT_SYMBOL_GPL(dma_resv_get_fences); * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or * greater than zer on success. */ -long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr, - unsigned long timeout) +long dma_resv_wait_timeout(struct dma_resv *obj, enum dma_resv_usage usage, + bool intr, unsigned long timeout) { long ret = timeout ? timeout : 1; struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, obj, wait_all); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { ret = dma_fence_wait_timeout(fence, intr, ret); @@ -635,8 +683,7 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); * dma_resv_test_signaled - Test if a reservation object's fences have been * signaled. * @obj: the reservation object - * @test_all: if true, test all fences, otherwise only test the exclusive - * fence + * @usage: controls which fences to include, see enum dma_resv_usage. * * Callers are not required to hold specific locks, but maybe hold * dma_resv_lock() already. @@ -645,12 +692,12 @@ EXPORT_SYMBOL_GPL(dma_resv_wait_timeout); * * True if all fences signaled, else false. */ -bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all) +bool dma_resv_test_signaled(struct dma_resv *obj, enum dma_resv_usage usage) { struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_iter_begin(&cursor, obj, test_all); + dma_resv_iter_begin(&cursor, obj, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { dma_resv_iter_end(&cursor); return false; @@ -670,13 +717,13 @@ EXPORT_SYMBOL_GPL(dma_resv_test_signaled); */ void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq) { + static const char *usage[] = { "kernel", "write", "read", "bookkeep" }; struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, obj, true, fence) { + dma_resv_for_each_fence(&cursor, obj, DMA_RESV_USAGE_READ, fence) { seq_printf(seq, "\t%s fence:", - dma_resv_iter_is_exclusive(&cursor) ? - "Exclusive" : "Shared"); + usage[dma_resv_iter_usage(&cursor)]); dma_fence_describe(fence, seq); } } diff --git a/drivers/dma-buf/selftests.h b/drivers/dma-buf/selftests.h index 97d73aaa31da..851965867d9c 100644 --- a/drivers/dma-buf/selftests.h +++ b/drivers/dma-buf/selftests.h @@ -12,4 +12,5 @@ selftest(sanitycheck, __sanitycheck__) /* keep first (igt selfcheck) */ selftest(dma_fence, dma_fence) selftest(dma_fence_chain, dma_fence_chain) +selftest(dma_fence_unwrap, dma_fence_unwrap) selftest(dma_resv, dma_resv) diff --git a/drivers/dma-buf/st-dma-fence-unwrap.c b/drivers/dma-buf/st-dma-fence-unwrap.c new file mode 100644 index 000000000000..039f016b57be --- /dev/null +++ b/drivers/dma-buf/st-dma-fence-unwrap.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: MIT + +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + */ + +#include +#if 0 +#include +#include +#include +#include +#include +#include +#include +#endif + +#include "selftest.h" + +#define CHAIN_SZ (4 << 10) + +static inline struct mock_fence { + struct dma_fence base; + spinlock_t lock; +} *to_mock_fence(struct dma_fence *f) { + return container_of(f, struct mock_fence, base); +} + +static const char *mock_name(struct dma_fence *f) +{ + return "mock"; +} + +static const struct dma_fence_ops mock_ops = { + .get_driver_name = mock_name, + .get_timeline_name = mock_name, +}; + +static struct dma_fence *mock_fence(void) +{ + struct mock_fence *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + spin_lock_init(&f->lock); + dma_fence_init(&f->base, &mock_ops, &f->lock, 0, 0); + + return &f->base; +} + +static struct dma_fence *mock_array(unsigned int num_fences, ...) +{ + struct dma_fence_array *array; + struct dma_fence **fences; + va_list valist; + int i; + + fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); + if (!fences) + return NULL; + + va_start(valist, num_fences); + for (i = 0; i < num_fences; ++i) + fences[i] = va_arg(valist, typeof(*fences)); + va_end(valist); + + array = dma_fence_array_create(num_fences, fences, + dma_fence_context_alloc(1), + 1, false); + if (!array) + goto cleanup; + return &array->base; + +cleanup: + for (i = 0; i < num_fences; ++i) + dma_fence_put(fences[i]); + kfree(fences); + return NULL; +} + +static struct dma_fence *mock_chain(struct dma_fence *prev, + struct dma_fence *fence) +{ + struct dma_fence_chain *f; + + f = dma_fence_chain_alloc(); + if (!f) { + dma_fence_put(prev); + dma_fence_put(fence); + return NULL; + } + + dma_fence_chain_init(f, prev, fence, 1); + return &f->base; +} + +static int sanitycheck(void *arg) +{ + struct dma_fence *f, *chain, *array; + int err = 0; + + f = mock_fence(); + if (!f) + return -ENOMEM; + + array = mock_array(1, f); + if (!array) + return -ENOMEM; + + chain = mock_chain(NULL, array); + if (!chain) + return -ENOMEM; + + dma_fence_signal(f); + dma_fence_put(chain); + return err; +} + +static int unwrap_array(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *array; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + array = mock_array(2, f1, f2); + if (!array) + return -ENOMEM; + + dma_fence_unwrap_for_each(fence, &iter, array) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_signal(f1); + dma_fence_signal(f2); + dma_fence_put(array); + return 0; +} + +static int unwrap_chain(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *chain; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + chain = mock_chain(f1, f2); + if (!chain) + return -ENOMEM; + + dma_fence_unwrap_for_each(fence, &iter, chain) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_signal(f1); + dma_fence_signal(f2); + dma_fence_put(chain); + return 0; +} + +static int unwrap_chain_array(void *arg) +{ + struct dma_fence *fence, *f1, *f2, *array, *chain; + struct dma_fence_unwrap iter; + int err = 0; + + f1 = mock_fence(); + if (!f1) + return -ENOMEM; + + f2 = mock_fence(); + if (!f2) { + dma_fence_put(f1); + return -ENOMEM; + } + + array = mock_array(2, f1, f2); + if (!array) + return -ENOMEM; + + chain = mock_chain(NULL, array); + if (!chain) + return -ENOMEM; + + dma_fence_unwrap_for_each(fence, &iter, chain) { + if (fence == f1) { + f1 = NULL; + } else if (fence == f2) { + f2 = NULL; + } else { + pr_err("Unexpected fence!\n"); + err = -EINVAL; + } + } + + if (f1 || f2) { + pr_err("Not all fences seen!\n"); + err = -EINVAL; + } + + dma_fence_signal(f1); + dma_fence_signal(f2); + dma_fence_put(chain); + return 0; +} + +int dma_fence_unwrap(void) +{ + static const struct subtest tests[] = { + SUBTEST(sanitycheck), + SUBTEST(unwrap_array), + SUBTEST(unwrap_chain), + SUBTEST(unwrap_chain_array), + }; + + return subtests(tests, NULL); +} diff --git a/drivers/dma-buf/st-dma-resv.c b/drivers/dma-buf/st-dma-resv.c index cbe999c6e7a6..813779e3c9be 100644 --- a/drivers/dma-buf/st-dma-resv.c +++ b/drivers/dma-buf/st-dma-resv.c @@ -58,8 +58,9 @@ static int sanitycheck(void *arg) return r; } -static int test_signaling(void *arg, bool shared) +static int test_signaling(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv resv; struct dma_fence *f; int r; @@ -75,25 +76,20 @@ static int test_signaling(void *arg, bool shared) goto err_free; } - if (shared) { - r = dma_resv_reserve_shared(&resv, 1); - if (r) { - pr_err("Resv shared slot allocation failed\n"); - goto err_unlock; - } - - dma_resv_add_shared_fence(&resv, f); - } else { - dma_resv_add_excl_fence(&resv, f); + r = dma_resv_reserve_fences(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + goto err_unlock; } - if (dma_resv_test_signaled(&resv, shared)) { + dma_resv_add_fence(&resv, f, usage); + if (dma_resv_test_signaled(&resv, usage)) { pr_err("Resv unexpectedly signaled\n"); r = -EINVAL; goto err_unlock; } dma_fence_signal(f); - if (!dma_resv_test_signaled(&resv, shared)) { + if (!dma_resv_test_signaled(&resv, usage)) { pr_err("Resv not reporting signaled\n"); r = -EINVAL; goto err_unlock; @@ -106,18 +102,9 @@ static int test_signaling(void *arg, bool shared) return r; } -static int test_excl_signaling(void *arg) -{ - return test_signaling(arg, false); -} - -static int test_shared_signaling(void *arg) -{ - return test_signaling(arg, true); -} - -static int test_for_each(void *arg, bool shared) +static int test_for_each(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv_iter cursor; struct dma_fence *f, *fence; struct dma_resv resv; @@ -134,20 +121,16 @@ static int test_for_each(void *arg, bool shared) goto err_free; } - if (shared) { - r = dma_resv_reserve_shared(&resv, 1); - if (r) { - pr_err("Resv shared slot allocation failed\n"); - goto err_unlock; - } - - dma_resv_add_shared_fence(&resv, f); - } else { - dma_resv_add_excl_fence(&resv, f); + r = dma_resv_reserve_fences(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + goto err_unlock; } + dma_resv_add_fence(&resv, f, usage); + r = -ENOENT; - dma_resv_for_each_fence(&cursor, &resv, shared, fence) { + dma_resv_for_each_fence(&cursor, &resv, usage, fence) { if (!r) { pr_err("More than one fence found\n"); r = -EINVAL; @@ -158,7 +141,7 @@ static int test_for_each(void *arg, bool shared) r = -EINVAL; goto err_unlock; } - if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + if (dma_resv_iter_usage(&cursor) != usage) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_unlock; @@ -178,18 +161,9 @@ static int test_for_each(void *arg, bool shared) return r; } -static int test_excl_for_each(void *arg) -{ - return test_for_each(arg, false); -} - -static int test_shared_for_each(void *arg) -{ - return test_for_each(arg, true); -} - -static int test_for_each_unlocked(void *arg, bool shared) +static int test_for_each_unlocked(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_resv_iter cursor; struct dma_fence *f, *fence; struct dma_resv resv; @@ -206,22 +180,18 @@ static int test_for_each_unlocked(void *arg, bool shared) goto err_free; } - if (shared) { - r = dma_resv_reserve_shared(&resv, 1); - if (r) { - pr_err("Resv shared slot allocation failed\n"); - dma_resv_unlock(&resv); - goto err_free; - } - - dma_resv_add_shared_fence(&resv, f); - } else { - dma_resv_add_excl_fence(&resv, f); + r = dma_resv_reserve_fences(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + dma_resv_unlock(&resv); + goto err_free; } + + dma_resv_add_fence(&resv, f, usage); dma_resv_unlock(&resv); r = -ENOENT; - dma_resv_iter_begin(&cursor, &resv, shared); + dma_resv_iter_begin(&cursor, &resv, usage); dma_resv_for_each_fence_unlocked(&cursor, fence) { if (!r) { pr_err("More than one fence found\n"); @@ -237,7 +207,7 @@ static int test_for_each_unlocked(void *arg, bool shared) r = -EINVAL; goto err_iter_end; } - if (dma_resv_iter_is_exclusive(&cursor) != !shared) { + if (dma_resv_iter_usage(&cursor) != usage) { pr_err("Unexpected fence usage\n"); r = -EINVAL; goto err_iter_end; @@ -247,7 +217,7 @@ static int test_for_each_unlocked(void *arg, bool shared) if (r == -ENOENT) { r = -EINVAL; /* That should trigger an restart */ - cursor.seq--; + cursor.fences = (void*)~0; } else if (r == -EINVAL) { r = 0; } @@ -263,18 +233,9 @@ static int test_for_each_unlocked(void *arg, bool shared) return r; } -static int test_excl_for_each_unlocked(void *arg) -{ - return test_for_each_unlocked(arg, false); -} - -static int test_shared_for_each_unlocked(void *arg) -{ - return test_for_each_unlocked(arg, true); -} - -static int test_get_fences(void *arg, bool shared) +static int test_get_fences(void *arg) { + enum dma_resv_usage usage = (unsigned long)arg; struct dma_fence *f, **fences = NULL; struct dma_resv resv; int r, i; @@ -290,21 +251,17 @@ static int test_get_fences(void *arg, bool shared) goto err_resv; } - if (shared) { - r = dma_resv_reserve_shared(&resv, 1); - if (r) { - pr_err("Resv shared slot allocation failed\n"); - dma_resv_unlock(&resv); - goto err_resv; - } - - dma_resv_add_shared_fence(&resv, f); - } else { - dma_resv_add_excl_fence(&resv, f); + r = dma_resv_reserve_fences(&resv, 1); + if (r) { + pr_err("Resv shared slot allocation failed\n"); + dma_resv_unlock(&resv); + goto err_resv; } + + dma_resv_add_fence(&resv, f, usage); dma_resv_unlock(&resv); - r = dma_resv_get_fences(&resv, shared, &i, &fences); + r = dma_resv_get_fences(&resv, usage, &i, &fences); if (r) { pr_err("get_fences failed\n"); goto err_free; @@ -326,30 +283,24 @@ static int test_get_fences(void *arg, bool shared) return r; } -static int test_excl_get_fences(void *arg) -{ - return test_get_fences(arg, false); -} - -static int test_shared_get_fences(void *arg) -{ - return test_get_fences(arg, true); -} - int dma_resv(void) { static const struct subtest tests[] = { SUBTEST(sanitycheck), - SUBTEST(test_excl_signaling), - SUBTEST(test_shared_signaling), - SUBTEST(test_excl_for_each), - SUBTEST(test_shared_for_each), - SUBTEST(test_excl_for_each_unlocked), - SUBTEST(test_shared_for_each_unlocked), - SUBTEST(test_excl_get_fences), - SUBTEST(test_shared_get_fences), + SUBTEST(test_signaling), + SUBTEST(test_for_each), + SUBTEST(test_for_each_unlocked), + SUBTEST(test_get_fences), }; + enum dma_resv_usage usage; + int r; spin_lock_init(&fence_lock); - return subtests(tests, NULL); + for (usage = DMA_RESV_USAGE_KERNEL; usage <= DMA_RESV_USAGE_BOOKKEEP; + ++usage) { + r = subtests(tests, (void *)(unsigned long)usage); + if (r) + return r; + } + return 0; } diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index 394e6e1e9686..514d213261df 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -5,6 +5,7 @@ * Copyright (C) 2012 Google, Inc. */ +#include #include #include #include @@ -172,20 +173,6 @@ static int sync_file_set_fence(struct sync_file *sync_file, return 0; } -static struct dma_fence **get_fences(struct sync_file *sync_file, - int *num_fences) -{ - if (dma_fence_is_array(sync_file->fence)) { - struct dma_fence_array *array = to_dma_fence_array(sync_file->fence); - - *num_fences = array->num_fences; - return array->fences; - } - - *num_fences = 1; - return &sync_file->fence; -} - static void add_fence(struct dma_fence **fences, int *i, struct dma_fence *fence) { @@ -210,86 +197,97 @@ static void add_fence(struct dma_fence **fences, static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, struct sync_file *b) { + struct dma_fence *a_fence, *b_fence, **fences; + struct dma_fence_unwrap a_iter, b_iter; + unsigned int index, num_fences; struct sync_file *sync_file; - struct dma_fence **fences = NULL, **nfences, **a_fences, **b_fences; - int i = 0, i_a, i_b, num_fences, a_num_fences, b_num_fences; sync_file = sync_file_alloc(); if (!sync_file) return NULL; - a_fences = get_fences(a, &a_num_fences); - b_fences = get_fences(b, &b_num_fences); - if (a_num_fences > INT_MAX - b_num_fences) - goto err; + num_fences = 0; + dma_fence_unwrap_for_each(a_fence, &a_iter, a->fence) + ++num_fences; + dma_fence_unwrap_for_each(b_fence, &b_iter, b->fence) + ++num_fences; - num_fences = a_num_fences + b_num_fences; + if (num_fences > INT_MAX) + goto err_free_sync_file; fences = kcalloc(num_fences, sizeof(*fences), GFP_KERNEL); if (!fences) - goto err; + goto err_free_sync_file; /* - * Assume sync_file a and b are both ordered and have no - * duplicates with the same context. + * We can't guarantee that fences in both a and b are ordered, but it is + * still quite likely. * - * If a sync_file can only be created with sync_file_merge - * and sync_file_create, this is a reasonable assumption. + * So attempt to order the fences as we pass over them and merge fences + * with the same context. */ - for (i_a = i_b = 0; i_a < a_num_fences && i_b < b_num_fences; ) { - struct dma_fence *pt_a = a_fences[i_a]; - struct dma_fence *pt_b = b_fences[i_b]; - if (pt_a->context < pt_b->context) { - add_fence(fences, &i, pt_a); + index = 0; + for (a_fence = dma_fence_unwrap_first(a->fence, &a_iter), + b_fence = dma_fence_unwrap_first(b->fence, &b_iter); + a_fence || b_fence; ) { - i_a++; - } else if (pt_a->context > pt_b->context) { - add_fence(fences, &i, pt_b); + if (!b_fence) { + add_fence(fences, &index, a_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + + } else if (!a_fence) { + add_fence(fences, &index, b_fence); + b_fence = dma_fence_unwrap_next(&b_iter); + + } else if (a_fence->context < b_fence->context) { + add_fence(fences, &index, a_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + + } else if (b_fence->context < a_fence->context) { + add_fence(fences, &index, b_fence); + b_fence = dma_fence_unwrap_next(&b_iter); + + } else if (__dma_fence_is_later(a_fence->seqno, b_fence->seqno, + a_fence->ops)) { + add_fence(fences, &index, a_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + b_fence = dma_fence_unwrap_next(&b_iter); - i_b++; } else { - if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno, - pt_a->ops)) - add_fence(fences, &i, pt_a); - else - add_fence(fences, &i, pt_b); - - i_a++; - i_b++; + add_fence(fences, &index, b_fence); + a_fence = dma_fence_unwrap_next(&a_iter); + b_fence = dma_fence_unwrap_next(&b_iter); } } - for (; i_a < a_num_fences; i_a++) - add_fence(fences, &i, a_fences[i_a]); + if (index == 0) + fences[index++] = dma_fence_get_stub(); - for (; i_b < b_num_fences; i_b++) - add_fence(fences, &i, b_fences[i_b]); + if (num_fences > index) { + struct dma_fence **tmp; - if (i == 0) - fences[i++] = dma_fence_get(a_fences[0]); - - if (num_fences > i) { - nfences = krealloc_array(fences, i, sizeof(*fences), GFP_KERNEL); - if (!nfences) - goto err; - - fences = nfences; + /* Keep going even when reducing the size failed */ + tmp = krealloc_array(fences, index, sizeof(*fences), + GFP_KERNEL); + if (tmp) + fences = tmp; } - if (sync_file_set_fence(sync_file, fences, i) < 0) - goto err; + if (sync_file_set_fence(sync_file, fences, index) < 0) + goto err_put_fences; strlcpy(sync_file->user_name, name, sizeof(sync_file->user_name)); return sync_file; -err: - while (i) - dma_fence_put(fences[--i]); +err_put_fences: + while (index) + dma_fence_put(fences[--index]); kfree(fences); + +err_free_sync_file: fput(sync_file->file); return NULL; - } static int sync_file_release(struct inode *inode, struct file *file) @@ -398,11 +396,13 @@ static int sync_fill_fence_info(struct dma_fence *fence, static long sync_file_ioctl_fence_info(struct sync_file *sync_file, unsigned long arg) { - struct sync_file_info info; struct sync_fence_info *fence_info = NULL; - struct dma_fence **fences; + struct dma_fence_unwrap iter; + struct sync_file_info info; + unsigned int num_fences; + struct dma_fence *fence; + int ret; __u32 size; - int num_fences, ret, i; if (copy_from_user(&info, (void __user *)arg, sizeof(info))) return -EFAULT; @@ -410,7 +410,9 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (info.flags || info.pad) return -EINVAL; - fences = get_fences(sync_file, &num_fences); + num_fences = 0; + dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) + ++num_fences; /* * Passing num_fences = 0 means that userspace doesn't want to @@ -433,8 +435,11 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file, if (!fence_info) return -ENOMEM; - for (i = 0; i < num_fences; i++) { - int status = sync_fill_fence_info(fences[i], &fence_info[i]); + num_fences = 0; + dma_fence_unwrap_for_each(fence, &iter, sync_file->fence) { + int status; + + status = sync_fill_fence_info(fence, &fence_info[num_fences++]); info.status = info.status <= 0 ? info.status : status; } diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index e5cfb01353d8..d65964996e8d 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -219,12 +219,12 @@ config QCOM_SCM_DOWNLOAD_MODE_DEFAULT config SYSFB bool - default y - depends on X86 || EFI + select BOOT_VESA_SUPPORT config SYSFB_SIMPLEFB bool "Mark VGA/VBE/EFI FB as generic system framebuffer" - depends on SYSFB + depends on X86 || EFI + select SYSFB help Firmwares often provide initial graphics framebuffers so the BIOS, bootloader or kernel can show basic video-output during boot for diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index e59884cc12a7..085348e08986 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1404,6 +1404,16 @@ static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset) { struct irq_domain *domain = gc->irq.domain; +#ifdef CONFIG_GPIOLIB_IRQCHIP + /* + * Avoid race condition with other code, which tries to lookup + * an IRQ before the irqchip has been properly registered, + * i.e. while gpiochip is still being brought up. + */ + if (!gc->irq.initialized) + return -EPROBE_DEFER; +#endif + if (!gpiochip_irqchip_irq_valid(gc, offset)) return -ENXIO; @@ -1593,6 +1603,15 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc, acpi_gpiochip_request_interrupts(gc); + /* + * Using barrier() here to prevent compiler from reordering + * gc->irq.initialized before initialization of above + * GPIO chip irq members. + */ + barrier(); + + gc->irq.initialized = true; + return 0; } diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index f1422bee3dcc..5133c3f028ab 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -280,6 +280,7 @@ config DRM_AMDGPU select HWMON select BACKLIGHT_CLASS_DEVICE select INTERVAL_TREE + select DRM_BUDDY help Choose this option if you have a recent AMD Radeon graphics card. diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 5b393622f592..a0f0a17e224f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -119,6 +119,7 @@ #define CONNECTOR_OBJECT_ID_eDP 0x14 #define CONNECTOR_OBJECT_ID_MXM 0x15 #define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16 +#define CONNECTOR_OBJECT_ID_USBC 0x17 /* deleted */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index cd89d2e46852..3dc5ab2764ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -253,53 +253,18 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct amdgpu_amdkfd_fence *ef) { - struct dma_resv *resv = bo->tbo.base.resv; - struct dma_resv_list *old, *new; - unsigned int i, j, k; + struct dma_fence *replacement; if (!ef) return -EINVAL; - old = dma_resv_shared_list(resv); - if (!old) - return 0; - - new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL); - if (!new) - return -ENOMEM; - - /* Go through all the shared fences in the resevation object and sort - * the interesting ones to the end of the list. + /* TODO: Instead of block before we should use the fence of the page + * table update and TLB flush here directly. */ - for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) { - struct dma_fence *f; - - f = rcu_dereference_protected(old->shared[i], - dma_resv_held(resv)); - - if (f->context == ef->base.context) - RCU_INIT_POINTER(new->shared[--j], f); - else - RCU_INIT_POINTER(new->shared[k++], f); - } - new->shared_max = old->shared_max; - new->shared_count = k; - - /* Install the new fence list, seqcount provides the barriers */ - write_seqcount_begin(&resv->seq); - RCU_INIT_POINTER(resv->fence, new); - write_seqcount_end(&resv->seq); - - /* Drop the references to the removed fences or move them to ef_list */ - for (i = j; i < old->shared_count; ++i) { - struct dma_fence *f; - - f = rcu_dereference_protected(new->shared[i], - dma_resv_held(resv)); - dma_fence_put(f); - } - kfree_rcu(old, rcu); - + replacement = dma_fence_get_stub(); + dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, + replacement, DMA_RESV_USAGE_READ); + dma_fence_put(replacement); return 0; } @@ -1268,7 +1233,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; - ret = dma_resv_reserve_shared(vm->root.bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(vm->root.bo, @@ -2482,6 +2447,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; struct kfd_mem_attachment *attachment; + struct dma_resv_iter cursor; + struct dma_fence *fence; total_size += amdgpu_bo_size(bo); @@ -2496,10 +2463,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } } - ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); - if (ret) { - pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); - goto validate_map_fail; + dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, fence) { + ret = amdgpu_sync_fence(&sync_obj, fence); + if (ret) { + pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); + goto validate_map_fail; + } } list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) @@ -2606,7 +2576,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem * Add process eviction fence to bo so they can * evict each other. */ - ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 044b41f0bfd9..529d52a204cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -34,7 +34,6 @@ struct amdgpu_fpriv; struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; - struct dma_fence_chain *chain; uint32_t priority; struct page **user_pages; bool user_invalidated; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 970b065e9a6b..8de283997769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -55,8 +55,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - /* One for TTM and one for the CS job */ - p->uf_entry.tv.num_shared = 2; + /* One for TTM and two for the CS job */ + p->uf_entry.tv.num_shared = 3; drm_gem_object_put(gobj); @@ -574,14 +574,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); e->bo_va = amdgpu_vm_bo_find(vm, bo); - - if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) { - e->chain = dma_fence_chain_alloc(); - if (!e->chain) { - r = -ENOMEM; - goto error_validate; - } - } } /* Move fence waiting after getting reservation lock of @@ -642,13 +634,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } error_validate: - if (r) { - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; - } + if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - } out: return r; } @@ -688,17 +675,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) { - struct amdgpu_bo_list_entry *e; - - amdgpu_bo_list_for_each_entry(e, parser->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; - } - + if (error && backoff) ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -1272,24 +1251,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct dma_resv *resv = e->tv.bo->base.resv; - struct dma_fence_chain *chain = e->chain; - - if (!chain) - continue; - - /* - * Work around dma_resv shortcomings by wrapping up the - * submission in a dma_fence_chain and add it as exclusive - * fence. - */ - dma_fence_chain_init(chain, dma_resv_excl_fence(resv), - dma_fence_get(p->fence), 1); - - rcu_assign_pointer(resv->fence_excl, &chain->base); - e->chain = NULL; - } + /* Make sure all BOs are remembered as writers */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 0; ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3987ecb24ef4..49f734137f15 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5733,7 +5733,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) @@ -5749,7 +5749,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index fae5c1debfad..7a6908d71820 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -200,8 +200,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - /* TODO: Unify this with other drivers */ - r = dma_resv_get_fences(new_abo->tbo.base.resv, true, + r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE, &work->shared_count, &work->shared); if (unlikely(r != 0)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..782cbca37538 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -102,21 +102,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int r; /* pin buffer into GTT */ - r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); - if (r) - return r; - - if (bo->tbo.moving) { - r = dma_fence_wait(bo->tbo.moving, true); - if (r) { - amdgpu_bo_unpin(bo); - return r; - } - } - return 0; + return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bb1c025d9001..b03663f42cc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -680,7 +680,7 @@ MODULE_PARM_DESC(sched_policy, * Maximum number of processes that HWS can schedule concurrently. The maximum is the * number of VMIDs assigned to the HWS, which is also the default. */ -int hws_max_conc_proc = 8; +int hws_max_conc_proc = -1; module_param(hws_max_conc_proc, int, 0444); MODULE_PARM_DESC(hws_max_conc_proc, "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 57b74d35052f..84a53758e18e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -526,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + true, timeout); /* ret == 0 means not signaled, * ret > 0 means signaled diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 8fe939976224..28a736c507bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -266,7 +266,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - while (queue_bit-- >= 0) { + while (--queue_bit >= 0) { if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ca2cfb65f976..a66a0881a934 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -561,9 +561,15 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 3, 0): case IP_VERSION(9, 4, 0): case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): + case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index dd78402e3cb0..8c6b2284cf56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -26,23 +26,12 @@ #include "amdgpu.h" -struct amdgpu_gtt_node { - struct ttm_buffer_object *tbo; - struct ttm_range_mgr_node base; -}; - static inline struct amdgpu_gtt_mgr * to_gtt_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_gtt_mgr, manager); } -static inline struct amdgpu_gtt_node * -to_amdgpu_gtt_node(struct ttm_resource *res) -{ - return container_of(res, struct amdgpu_gtt_node, base.base); -} - /** * DOC: mem_info_gtt_total * @@ -106,9 +95,9 @@ const struct attribute_group amdgpu_gtt_mgr_attr_group = { */ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res) { - struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res); + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); - return drm_mm_node_allocated(&node->base.mm_nodes[0]); + return drm_mm_node_allocated(&node->mm_nodes[0]); } /** @@ -128,15 +117,14 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); uint32_t num_pages = PFN_UP(tbo->base.size); - struct amdgpu_gtt_node *node; + struct ttm_range_mgr_node *node; int r; - node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); + node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL); if (!node) return -ENOMEM; - node->tbo = tbo; - ttm_resource_init(tbo, place, &node->base.base); + ttm_resource_init(tbo, place, &node->base); if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && ttm_resource_manager_usage(man) > man->size) { r = -ENOSPC; @@ -145,8 +133,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, if (place->lpfn) { spin_lock(&mgr->lock); - r = drm_mm_insert_node_in_range(&mgr->mm, - &node->base.mm_nodes[0], + r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0], num_pages, tbo->page_alignment, 0, place->fpfn, place->lpfn, DRM_MM_INSERT_BEST); @@ -154,18 +141,18 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, if (unlikely(r)) goto err_free; - node->base.base.start = node->base.mm_nodes[0].start; + node->base.start = node->mm_nodes[0].start; } else { - node->base.mm_nodes[0].start = 0; - node->base.mm_nodes[0].size = node->base.base.num_pages; - node->base.base.start = AMDGPU_BO_INVALID_OFFSET; + node->mm_nodes[0].start = 0; + node->mm_nodes[0].size = node->base.num_pages; + node->base.start = AMDGPU_BO_INVALID_OFFSET; } - *res = &node->base.base; + *res = &node->base; return 0; err_free: - ttm_resource_fini(man, &node->base.base); + ttm_resource_fini(man, &node->base); kfree(node); return r; } @@ -181,12 +168,12 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, struct ttm_resource *res) { - struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res); + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); spin_lock(&mgr->lock); - if (drm_mm_node_allocated(&node->base.mm_nodes[0])) - drm_mm_remove_node(&node->base.mm_nodes[0]); + if (drm_mm_node_allocated(&node->mm_nodes[0])) + drm_mm_remove_node(&node->mm_nodes[0]); spin_unlock(&mgr->lock); ttm_resource_fini(man, res); @@ -202,15 +189,15 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, */ void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr) { - struct amdgpu_gtt_node *node; + struct ttm_range_mgr_node *node; struct drm_mm_node *mm_node; struct amdgpu_device *adev; adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); spin_lock(&mgr->lock); drm_mm_for_each_node(mm_node, &mgr->mm) { - node = container_of(mm_node, typeof(*node), base.mm_nodes[0]); - amdgpu_ttm_recover_gart(node->tbo); + node = container_of(mm_node, typeof(*node), mm_nodes[0]); + amdgpu_ttm_recover_gart(node->base.bo); } spin_unlock(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 92a70fb57fa3..4ba4b54092f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -107,36 +107,19 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence, void amdgpu_pasid_free_delayed(struct dma_resv *resv, u32 pasid) { - struct dma_fence *fence, **fences; struct amdgpu_pasid_cb *cb; - unsigned count; + struct dma_fence *fence; int r; - r = dma_resv_get_fences(resv, true, &count, &fences); + r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence); if (r) goto fallback; - if (count == 0) { + if (!fence) { amdgpu_pasid_free(pasid); return; } - if (count == 1) { - fence = fences[0]; - kfree(fences); - } else { - uint64_t context = dma_fence_context_alloc(1); - struct dma_fence_array *array; - - array = dma_fence_array_create(count, fences, context, - 1, false); - if (!array) { - kfree(fences); - goto fallback; - } - fence = &array->base; - } - cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (!cb) { /* Last resort when we are OOM */ @@ -156,7 +139,8 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 4b153daf283d..b86c0b8252a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 25731719c627..e92ecabfa7bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -612,9 +612,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (unlikely(r)) goto fail_unreserve; - amdgpu_bo_fence(bo, fence, false); - dma_fence_put(bo->tbo.moving); - bo->tbo.moving = dma_fence_get(fence); + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } if (!bp->resv) @@ -761,6 +760,11 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; + kptr = amdgpu_bo_kptr(bo); if (kptr) { if (ptr) @@ -768,11 +772,6 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); - if (r < 0) - return r; - r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap); if (r) return r; @@ -1284,6 +1283,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, */ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct dma_fence *fence = NULL; struct amdgpu_bo *abo; int r; @@ -1303,7 +1303,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); if (bo->resource->mem_type != TTM_PL_VRAM || - !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) + !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) || + adev->in_suspend || adev->shutdown) return; if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) @@ -1388,11 +1389,17 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared) { struct dma_resv *resv = bo->tbo.base.resv; + int r; - if (shared) - dma_resv_add_shared_fence(resv, fence); - else - dma_resv_add_excl_fence(resv, fence); + r = dma_resv_reserve_fences(resv, 1); + if (r) { + /* As last resort on OOM we block for the fence */ + dma_fence_wait(fence, false); + return; + } + + dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ : + DMA_RESV_USAGE_WRITE); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index acfa207cf970..6546552e596c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -30,12 +30,15 @@ #include #include +#include "amdgpu_vram_mgr.h" + /* state back for walking over vram_mgr and gtt_mgr allocations */ struct amdgpu_res_cursor { uint64_t start; uint64_t size; uint64_t remaining; - struct drm_mm_node *node; + void *node; + uint32_t mem_type; }; /** @@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource *res, uint64_t start, uint64_t size, struct amdgpu_res_cursor *cur) { + struct drm_buddy_block *block; + struct list_head *head, *next; struct drm_mm_node *node; - if (!res || res->mem_type == TTM_PL_SYSTEM) { - cur->start = start; - cur->size = size; - cur->remaining = size; - cur->node = NULL; - WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); - return; - } + if (!res) + goto fallback; BUG_ON(start + size > res->num_pages << PAGE_SHIFT); - node = to_ttm_range_mgr_node(res)->mm_nodes; - while (start >= node->size << PAGE_SHIFT) - start -= node++->size << PAGE_SHIFT; + cur->mem_type = res->mem_type; - cur->start = (node->start << PAGE_SHIFT) + start; - cur->size = min((node->size << PAGE_SHIFT) - start, size); + switch (cur->mem_type) { + case TTM_PL_VRAM: + head = &to_amdgpu_vram_mgr_resource(res)->blocks; + + block = list_first_entry_or_null(head, + struct drm_buddy_block, + link); + if (!block) + goto fallback; + + while (start >= amdgpu_vram_mgr_block_size(block)) { + start -= amdgpu_vram_mgr_block_size(block); + + next = block->link.next; + if (next != head) + block = list_entry(next, struct drm_buddy_block, link); + } + + cur->start = amdgpu_vram_mgr_block_start(block) + start; + cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size); + cur->remaining = size; + cur->node = block; + break; + case TTM_PL_TT: + node = to_ttm_range_mgr_node(res)->mm_nodes; + while (start >= node->size << PAGE_SHIFT) + start -= node++->size << PAGE_SHIFT; + + cur->start = (node->start << PAGE_SHIFT) + start; + cur->size = min((node->size << PAGE_SHIFT) - start, size); + cur->remaining = size; + cur->node = node; + break; + default: + goto fallback; + } + + return; + +fallback: + cur->start = start; + cur->size = size; cur->remaining = size; - cur->node = node; + cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); + return; } /** @@ -85,7 +124,9 @@ static inline void amdgpu_res_first(struct ttm_resource *res, */ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) { - struct drm_mm_node *node = cur->node; + struct drm_buddy_block *block; + struct drm_mm_node *node; + struct list_head *next; BUG_ON(size > cur->remaining); @@ -99,9 +140,27 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) return; } - cur->node = ++node; - cur->start = node->start << PAGE_SHIFT; - cur->size = min(node->size << PAGE_SHIFT, cur->remaining); + switch (cur->mem_type) { + case TTM_PL_VRAM: + block = cur->node; + + next = block->link.next; + block = list_entry(next, struct drm_buddy_block, link); + + cur->node = block; + cur->start = amdgpu_vram_mgr_block_start(block); + cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining); + break; + case TTM_PL_TT: + node = cur->node; + + cur->node = ++node; + cur->start = node->start << PAGE_SHIFT; + cur->size = min(node->size << PAGE_SHIFT, cur->remaining); + break; + default: + return; + } } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 5320bb0883d8..317d80209e95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -300,8 +300,8 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned int ring_size, struct amdgpu_irq_src *irq_src, - unsigned int irq_type, unsigned int prio, + unsigned int max_dw, struct amdgpu_irq_src *irq_src, + unsigned int irq_type, unsigned int hw_prio, atomic_t *sched_score); void amdgpu_ring_fini(struct amdgpu_ring *ring); void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 40e06745fae9..11c46b3e4c60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -259,7 +259,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, if (resv == NULL) return -EINVAL; - dma_resv_for_each_fence(&cursor, resv, true, f) { + /* TODO: Use DMA_RESV_USAGE_READ here */ + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) { dma_fence_chain_for_each(f, f) { struct dma_fence *tmp = dma_fence_chain_contained(f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4b9ee6e27f74..ec26edd4f4d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1344,7 +1344,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, f) { if (amdkfd_fence_check_mm(f, current->mm)) return false; } @@ -1547,7 +1548,6 @@ static struct ttm_device_funcs amdgpu_bo_driver = { .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_pfn = amdgpu_ttm_io_mem_pfn, .access_memory = &amdgpu_ttm_access_memory, - .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify }; /* @@ -2161,17 +2161,6 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) #if defined(CONFIG_DEBUG_FS) -static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - TTM_PL_VRAM); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) { struct amdgpu_device *adev = (struct amdgpu_device *)m->private; @@ -2179,55 +2168,6 @@ static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) return ttm_pool_debugfs(&adev->mman.bdev.pool, m); } -static int amdgpu_mm_tt_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - TTM_PL_TT); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - -static int amdgpu_mm_gds_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - AMDGPU_PL_GDS); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - -static int amdgpu_mm_gws_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - AMDGPU_PL_GWS); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - -static int amdgpu_mm_oa_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - AMDGPU_PL_OA); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_vram_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_tt_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gds_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gws_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_oa_table); DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool); /* @@ -2437,17 +2377,23 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size); debugfs_create_file("amdgpu_iomem", 0444, root, adev, &amdgpu_ttm_iomem_fops); - debugfs_create_file("amdgpu_vram_mm", 0444, root, adev, - &amdgpu_mm_vram_table_fops); - debugfs_create_file("amdgpu_gtt_mm", 0444, root, adev, - &amdgpu_mm_tt_table_fops); - debugfs_create_file("amdgpu_gds_mm", 0444, root, adev, - &amdgpu_mm_gds_table_fops); - debugfs_create_file("amdgpu_gws_mm", 0444, root, adev, - &amdgpu_mm_gws_table_fops); - debugfs_create_file("amdgpu_oa_mm", 0444, root, adev, - &amdgpu_mm_oa_table_fops); debugfs_create_file("ttm_page_pool", 0444, root, adev, &amdgpu_ttm_page_pool_fops); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + TTM_PL_VRAM), + root, "amdgpu_vram_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + TTM_PL_TT), + root, "amdgpu_gtt_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_GDS), + root, "amdgpu_gds_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_GWS), + root, "amdgpu_gws_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_OA), + root, "amdgpu_oa_mm"); + #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 9120ae80ef52..6a70818039dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -26,6 +26,7 @@ #include #include +#include "amdgpu_vram_mgr.h" #include "amdgpu.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) @@ -38,15 +39,6 @@ #define AMDGPU_POISON 0xd0bed0be -struct amdgpu_vram_mgr { - struct ttm_resource_manager manager; - struct drm_mm mm; - spinlock_t lock; - struct list_head reservations_pending; - struct list_head reserved_pages; - atomic64_t vis_usage; -}; - struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 39c74d9fa7cc..6eac649499d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1163,7 +1163,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, false, msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index e2fde88aaf5e..f06fb7f882e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -159,6 +159,7 @@ #define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8) #define AMDGPU_VCN_SW_RING_FLAG (1 << 9) #define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10) +#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -279,6 +280,11 @@ struct amdgpu_fw_shared_fw_logging { uint32_t size; }; +struct amdgpu_fw_shared_smu_interface_info { + uint8_t smu_interface_type; + uint8_t padding[3]; +}; + struct amdgpu_fw_shared { uint32_t present_flag_0; uint8_t pad[44]; @@ -287,6 +293,7 @@ struct amdgpu_fw_shared { struct amdgpu_fw_shared_multi_queue multi_queue; struct amdgpu_fw_shared_sw_ring sw_ring; struct amdgpu_fw_shared_fw_logging fw_log; + struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; struct amdgpu_vcn_fwlog { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fc4563cf2828..5277c10d901d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -377,7 +377,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, dma_resv_assert_held(vm->root.bo->tbo.base.resv); - vm->bulk_moveable = false; + ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move); if (bo->tbo.type == ttm_bo_type_kernel && bo->parent) amdgpu_vm_bo_relocated(base); else @@ -639,36 +639,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, list_add(&entry->tv.head, validated); } -/** - * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag - * - * @bo: BO which was removed from the LRU - * - * Make sure the bulk_moveable flag is updated when a BO is removed from the - * LRU. - */ -void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) -{ - struct amdgpu_bo *abo; - struct amdgpu_vm_bo_base *bo_base; - - if (!amdgpu_bo_is_amdgpu_bo(bo)) - return; - - if (bo->pin_count) - return; - - abo = ttm_to_amdgpu_bo(bo); - if (!abo->parent) - return; - for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { - struct amdgpu_vm *vm = bo_base->vm; - - if (abo->tbo.base.resv == vm->root.bo->tbo.base.resv) - vm->bulk_moveable = false; - } - -} /** * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU * @@ -681,35 +651,9 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct amdgpu_vm_bo_base *bo_base; - - if (vm->bulk_moveable) { - spin_lock(&adev->mman.bdev.lru_lock); - ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); - spin_unlock(&adev->mman.bdev.lru_lock); - return; - } - - memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); - spin_lock(&adev->mman.bdev.lru_lock); - list_for_each_entry(bo_base, &vm->idle, vm_status) { - struct amdgpu_bo *bo = bo_base->bo; - struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo); - - if (!bo->parent) - continue; - - ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource, - &vm->lru_bulk_move); - if (shadow) - ttm_bo_move_to_lru_tail(&shadow->tbo, - shadow->tbo.resource, - &vm->lru_bulk_move); - } + ttm_lru_bulk_move_tail(&vm->lru_bulk_move); spin_unlock(&adev->mman.bdev.lru_lock); - - vm->bulk_moveable = true; } /** @@ -732,8 +676,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_bo_base *bo_base, *tmp; int r; - vm->bulk_moveable &= list_empty(&vm->evicted); - list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { struct amdgpu_bo *bo = bo_base->bo; struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo); @@ -1057,10 +999,16 @@ static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + shadow = amdgpu_bo_shadowed(entry->bo); + if (shadow) { + ttm_bo_set_bulk_move(&shadow->tbo, NULL); + amdgpu_bo_unref(&shadow); + } + + ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); entry->bo->vm_bo = NULL; list_del(&entry->vm_status); - amdgpu_bo_unref(&shadow); amdgpu_bo_unref(&entry->bo); } @@ -1080,8 +1028,6 @@ static void amdgpu_vm_free_pts(struct amdgpu_device *adev, struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_bo_base *entry; - vm->bulk_moveable = false; - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) amdgpu_vm_free_table(entry); @@ -2113,7 +2059,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, resv, true, fence) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) { /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); amdgpu_vm_add_prt_cb(adev, fence); @@ -2665,7 +2611,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, if (bo) { dma_resv_assert_held(bo->tbo.base.resv); if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) - vm->bulk_moveable = false; + ttm_bo_set_bulk_move(&bo->tbo, NULL); for (base = &bo_va->base.bo->vm_bo; *base; base = &(*base)->next) { @@ -2719,7 +2665,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true; /* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled(bo->tbo.base.resv, true)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP)) return false; /* Try to block ongoing updates */ @@ -2899,7 +2845,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true, + timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, + DMA_RESV_USAGE_BOOKKEEP, true, timeout); if (timeout <= 0) return timeout; @@ -2980,7 +2927,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) goto error_free_root; - r = dma_resv_reserve_shared(root_bo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); if (r) goto error_unreserve; @@ -3423,7 +3370,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = 0; } - r = dma_resv_reserve_shared(root->tbo.base.resv, 1); + r = dma_resv_reserve_fences(root->tbo.base.resv, 1); if (r) { pr_debug("failed %d to reserve fence slot\n", r); goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a40a6a993bb0..bd7892482bbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -317,8 +317,6 @@ struct amdgpu_vm { /* Store positions of group of BOs */ struct ttm_lru_bulk_move lru_bulk_move; - /* mark whether can do the bulk move */ - bool bulk_moveable; /* Flag to indicate if VM is used for compute */ bool is_compute_context; }; @@ -454,7 +452,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); -void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, uint64_t *gtt_mem, uint64_t *cpu_mem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index e3fbf0f10add..31913ae86de6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -74,13 +74,12 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, { unsigned int i; uint64_t value; - int r; + long r; - if (vmbo->bo.tbo.moving) { - r = dma_fence_wait(vmbo->bo.tbo.moving, true); - if (r) - return r; - } + r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL, + true, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index dbb551762805..bdb44cee19d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -204,14 +204,19 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, struct amdgpu_bo *bo = &vmbo->bo; enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE : AMDGPU_IB_POOL_DELAYED; + struct dma_resv_iter cursor; unsigned int i, ndw, nptes; + struct dma_fence *fence; uint64_t *pte; int r; /* Wait for PD/PT moves to be completed */ - r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving); - if (r) - return r; + dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, fence) { + r = amdgpu_sync_fence(&p->job->sync, fence); + if (r) + return r; + } do { ndw = p->num_dw_left; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 0a7611648573..49e4092f447f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -32,8 +32,10 @@ #include "atom.h" struct amdgpu_vram_reservation { - struct list_head node; - struct drm_mm_node mm_node; + u64 start; + u64 size; + struct list_head allocated; + struct list_head blocks; }; static inline struct amdgpu_vram_mgr * @@ -186,18 +188,18 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = { }; /** - * amdgpu_vram_mgr_vis_size - Calculate visible node size + * amdgpu_vram_mgr_vis_size - Calculate visible block size * * @adev: amdgpu_device pointer - * @node: MM node structure + * @block: DRM BUDDY block structure * - * Calculate how many bytes of the MM node are inside visible VRAM + * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM */ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, - struct drm_mm_node *node) + struct drm_buddy_block *block) { - uint64_t start = node->start << PAGE_SHIFT; - uint64_t end = (node->size + node->start) << PAGE_SHIFT; + u64 start = amdgpu_vram_mgr_block_start(block); + u64 end = start + amdgpu_vram_mgr_block_size(block); if (start >= adev->gmc.visible_vram_size) return 0; @@ -218,9 +220,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_resource *res = bo->tbo.resource; - unsigned pages = res->num_pages; - struct drm_mm_node *mm; - u64 usage; + struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); + struct drm_buddy_block *block; + u64 usage = 0; if (amdgpu_gmc_vram_full_visible(&adev->gmc)) return amdgpu_bo_size(bo); @@ -228,9 +230,8 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) return 0; - mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0]; - for (usage = 0; pages; pages -= mm->size, mm++) - usage += amdgpu_vram_mgr_vis_size(adev, mm); + list_for_each_entry(block, &vres->blocks, link) + usage += amdgpu_vram_mgr_vis_size(adev, block); return usage; } @@ -240,23 +241,30 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_mm *mm = &mgr->mm; + struct drm_buddy *mm = &mgr->mm; struct amdgpu_vram_reservation *rsv, *temp; + struct drm_buddy_block *block; uint64_t vis_usage; - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { - if (drm_mm_reserve_node(mm, &rsv->mm_node)) + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) { + if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size, + rsv->size, mm->chunk_size, &rsv->allocated, + DRM_BUDDY_RANGE_ALLOCATION)) + continue; + + block = amdgpu_vram_mgr_first_block(&rsv->allocated); + if (!block) continue; dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n", - rsv->mm_node.start, rsv->mm_node.size); + rsv->start, rsv->size); - vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); + vis_usage = amdgpu_vram_mgr_vis_size(adev, block); atomic64_add(vis_usage, &mgr->vis_usage); spin_lock(&man->bdev->lru_lock); - man->usage += rsv->mm_node.size << PAGE_SHIFT; + man->usage += rsv->size; spin_unlock(&man->bdev->lru_lock); - list_move(&rsv->node, &mgr->reserved_pages); + list_move(&rsv->blocks, &mgr->reserved_pages); } } @@ -278,14 +286,16 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, if (!rsv) return -ENOMEM; - INIT_LIST_HEAD(&rsv->node); - rsv->mm_node.start = start >> PAGE_SHIFT; - rsv->mm_node.size = size >> PAGE_SHIFT; + INIT_LIST_HEAD(&rsv->allocated); + INIT_LIST_HEAD(&rsv->blocks); - spin_lock(&mgr->lock); - list_add_tail(&rsv->node, &mgr->reservations_pending); + rsv->start = start; + rsv->size = size; + + mutex_lock(&mgr->lock); + list_add_tail(&rsv->blocks, &mgr->reservations_pending); amdgpu_vram_mgr_do_reserve(&mgr->manager); - spin_unlock(&mgr->lock); + mutex_unlock(&mgr->lock); return 0; } @@ -307,19 +317,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, struct amdgpu_vram_reservation *rsv; int ret; - spin_lock(&mgr->lock); + mutex_lock(&mgr->lock); - list_for_each_entry(rsv, &mgr->reservations_pending, node) { - if ((rsv->mm_node.start <= start) && - (start < (rsv->mm_node.start + rsv->mm_node.size))) { + list_for_each_entry(rsv, &mgr->reservations_pending, blocks) { + if (rsv->start <= start && + (start < (rsv->start + rsv->size))) { ret = -EBUSY; goto out; } } - list_for_each_entry(rsv, &mgr->reserved_pages, node) { - if ((rsv->mm_node.start <= start) && - (start < (rsv->mm_node.start + rsv->mm_node.size))) { + list_for_each_entry(rsv, &mgr->reserved_pages, blocks) { + if (rsv->start <= start && + (start < (rsv->start + rsv->size))) { ret = 0; goto out; } @@ -327,32 +337,10 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, ret = -ENOENT; out: - spin_unlock(&mgr->lock); + mutex_unlock(&mgr->lock); return ret; } -/** - * amdgpu_vram_mgr_virt_start - update virtual start address - * - * @mem: ttm_resource to update - * @node: just allocated node - * - * Calculate a virtual BO start address to easily check if everything is CPU - * accessible. - */ -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, - struct drm_mm_node *node) -{ - unsigned long start; - - start = node->start + node->size; - if (start > mem->num_pages) - start -= mem->num_pages; - else - start = 0; - mem->start = max(mem->start, start); -} - /** * amdgpu_vram_mgr_new - allocate new ranges * @@ -368,46 +356,44 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource **res) { - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; + u64 vis_usage = 0, max_bytes, cur_size, min_block_size; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - uint64_t vis_usage = 0, mem_bytes, max_bytes; - struct ttm_range_mgr_node *node; - struct drm_mm *mm = &mgr->mm; - enum drm_mm_insert_mode mode; - unsigned i; + struct amdgpu_vram_mgr_resource *vres; + u64 size, remaining_size, lpfn, fpfn; + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + unsigned long pages_per_block; int r; - lpfn = place->lpfn; + lpfn = place->lpfn << PAGE_SHIFT; if (!lpfn) - lpfn = man->size >> PAGE_SHIFT; + lpfn = man->size; + + fpfn = place->fpfn << PAGE_SHIFT; max_bytes = adev->gmc.mc_vram_size; if (tbo->type != ttm_bo_type_kernel) max_bytes -= AMDGPU_VM_RESERVED_VRAM; - mem_bytes = tbo->base.size; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - pages_per_node = ~0ul; - num_nodes = 1; + pages_per_block = ~0ul; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - pages_per_node = HPAGE_PMD_NR; + pages_per_block = HPAGE_PMD_NR; #else /* default to 2MB */ - pages_per_node = 2UL << (20UL - PAGE_SHIFT); + pages_per_block = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_node = max_t(uint32_t, pages_per_node, - tbo->page_alignment); - num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node); + pages_per_block = max_t(uint32_t, pages_per_block, + tbo->page_alignment); } - node = kvmalloc(struct_size(node, mm_nodes, num_nodes), - GFP_KERNEL | __GFP_ZERO); - if (!node) + vres = kzalloc(sizeof(*vres), GFP_KERNEL); + if (!vres) return -ENOMEM; - ttm_resource_init(tbo, place, &node->base); + ttm_resource_init(tbo, place, &vres->base); /* bail out quickly if there's likely not enough VRAM for this BO */ if (ttm_resource_manager_usage(man) > max_bytes) { @@ -415,66 +401,130 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, goto error_fini; } - mode = DRM_MM_INSERT_BEST; + INIT_LIST_HEAD(&vres->blocks); + if (place->flags & TTM_PL_FLAG_TOPDOWN) - mode = DRM_MM_INSERT_HIGH; + vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; - pages_left = node->base.num_pages; + if (fpfn || lpfn != man->size) + /* Allocate blocks in desired range */ + vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; - /* Limit maximum size to 2GB due to SG table limitations */ - pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); + remaining_size = vres->base.num_pages << PAGE_SHIFT; - i = 0; - spin_lock(&mgr->lock); - while (pages_left) { - uint32_t alignment = tbo->page_alignment; + mutex_lock(&mgr->lock); + while (remaining_size) { + if (tbo->page_alignment) + min_block_size = tbo->page_alignment << PAGE_SHIFT; + else + min_block_size = mgr->default_page_size; - if (pages >= pages_per_node) - alignment = pages_per_node; + BUG_ON(min_block_size < mm->chunk_size); - r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages, - alignment, 0, place->fpfn, - lpfn, mode); - if (unlikely(r)) { - if (pages > pages_per_node) { - if (is_power_of_2(pages)) - pages = pages / 2; - else - pages = rounddown_pow_of_two(pages); - continue; + /* Limit maximum size to 2GiB due to SG table limitations */ + size = min(remaining_size, 2ULL << 30); + + if (size >= pages_per_block << PAGE_SHIFT) + min_block_size = pages_per_block << PAGE_SHIFT; + + cur_size = size; + + if (fpfn + size != place->lpfn << PAGE_SHIFT) { + /* + * Except for actual range allocation, modify the size and + * min_block_size conforming to continuous flag enablement + */ + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_block_size = size; + /* + * Modify the size value if size is not + * aligned with min_block_size + */ + } else if (!IS_ALIGNED(size, min_block_size)) { + size = round_up(size, min_block_size); } - goto error_free; } - vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]); - amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]); - pages_left -= pages; - ++i; + r = drm_buddy_alloc_blocks(mm, fpfn, + lpfn, + size, + min_block_size, + &vres->blocks, + vres->flags); + if (unlikely(r)) + goto error_free_blocks; - if (pages > pages_left) - pages = pages_left; + if (size > remaining_size) + remaining_size = 0; + else + remaining_size -= size; } - spin_unlock(&mgr->lock); + mutex_unlock(&mgr->lock); - if (i == 1) - node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + if (cur_size != size) { + struct drm_buddy_block *block; + struct list_head *trim_list; + u64 original_size; + LIST_HEAD(temp); + + trim_list = &vres->blocks; + original_size = vres->base.num_pages << PAGE_SHIFT; + + /* + * If size value is rounded up to min_block_size, trim the last + * block to the required size + */ + if (!list_is_singular(&vres->blocks)) { + block = list_last_entry(&vres->blocks, typeof(*block), link); + list_move_tail(&block->link, &temp); + trim_list = &temp; + /* + * Compute the original_size value by subtracting the + * last block size with (aligned size - original size) + */ + original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size); + } + + mutex_lock(&mgr->lock); + drm_buddy_block_trim(mm, + original_size, + trim_list); + mutex_unlock(&mgr->lock); + + if (!list_empty(&temp)) + list_splice_tail(trim_list, &vres->blocks); + } + + list_for_each_entry(block, &vres->blocks, link) + vis_usage += amdgpu_vram_mgr_vis_size(adev, block); + + block = amdgpu_vram_mgr_first_block(&vres->blocks); + if (!block) { + r = -EINVAL; + goto error_fini; + } + + vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT; + + if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks)) + vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; if (adev->gmc.xgmi.connected_to_cpu) - node->base.bus.caching = ttm_cached; + vres->base.bus.caching = ttm_cached; else - node->base.bus.caching = ttm_write_combined; + vres->base.bus.caching = ttm_write_combined; atomic64_add(vis_usage, &mgr->vis_usage); - *res = &node->base; + *res = &vres->base; return 0; -error_free: - while (i--) - drm_mm_remove_node(&node->mm_nodes[i]); - spin_unlock(&mgr->lock); +error_free_blocks: + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); error_fini: - ttm_resource_fini(man, &node->base); - kvfree(node); + ttm_resource_fini(man, &vres->base); + kfree(vres); return r; } @@ -490,27 +540,26 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, struct ttm_resource *res) { - struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); + struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; uint64_t vis_usage = 0; - unsigned i, pages; - spin_lock(&mgr->lock); - for (i = 0, pages = res->num_pages; pages; - pages -= node->mm_nodes[i].size, ++i) { - struct drm_mm_node *mm = &node->mm_nodes[i]; + mutex_lock(&mgr->lock); + list_for_each_entry(block, &vres->blocks, link) + vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - drm_mm_remove_node(mm); - vis_usage += amdgpu_vram_mgr_vis_size(adev, mm); - } amdgpu_vram_mgr_do_reserve(man); - spin_unlock(&mgr->lock); + + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); ttm_resource_fini(man, res); - kvfree(node); + kfree(vres); } /** @@ -542,7 +591,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, if (!*sgt) return -ENOMEM; - /* Determine the number of DRM_MM nodes to export */ + /* Determine the number of DRM_BUDDY blocks to export */ amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; @@ -558,10 +607,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, sg->length = 0; /* - * Walk down DRM_MM nodes to populate scatterlist nodes - * @note: Use iterator api to get first the DRM_MM node + * Walk down DRM_BUDDY blocks to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_BUDDY block * and the number of bytes from it. Access the following - * DRM_MM node(s) if more buffer needs to exported + * DRM_BUDDY block(s) if more buffer needs to exported */ amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { @@ -648,13 +697,22 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; drm_printf(printer, " vis usage:%llu\n", amdgpu_vram_mgr_vis_usage(mgr)); - spin_lock(&mgr->lock); - drm_mm_print(&mgr->mm, printer); - spin_unlock(&mgr->lock); + mutex_lock(&mgr->lock); + drm_printf(printer, "default_page_size: %lluKiB\n", + mgr->default_page_size >> 10); + + drm_buddy_print(mm, printer); + + drm_printf(printer, "reserved:\n"); + list_for_each_entry(block, &mgr->reserved_pages, link) + drm_buddy_block_print(mm, block, printer); + mutex_unlock(&mgr->lock); } static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { @@ -674,16 +732,21 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) { struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; + int err; ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); man->func = &amdgpu_vram_mgr_func; - drm_mm_init(&mgr->mm, 0, man->size >> PAGE_SHIFT); - spin_lock_init(&mgr->lock); + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); + if (err) + return err; + + mutex_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); + mgr->default_page_size = PAGE_SIZE; ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_resource_manager_set_used(man, true); @@ -711,16 +774,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) if (ret) return; - spin_lock(&mgr->lock); - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) + mutex_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) kfree(rsv); - list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { - drm_mm_remove_node(&rsv->mm_node); + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { + drm_buddy_free_list(&mgr->mm, &rsv->blocks); kfree(rsv); } - drm_mm_takedown(&mgr->mm); - spin_unlock(&mgr->lock); + drm_buddy_fini(&mgr->mm); + mutex_unlock(&mgr->lock); ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h new file mode 100644 index 000000000000..9a2db87186c7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: MIT + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_VRAM_MGR_H__ +#define __AMDGPU_VRAM_MGR_H__ + +#include + +struct amdgpu_vram_mgr { + struct ttm_resource_manager manager; + struct drm_buddy mm; + /* protects access to buffer objects */ + struct mutex lock; + struct list_head reservations_pending; + struct list_head reserved_pages; + atomic64_t vis_usage; + u64 default_page_size; +}; + +struct amdgpu_vram_mgr_resource { + struct ttm_resource base; + struct list_head blocks; + unsigned long flags; +}; + +static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) +{ + return drm_buddy_block_offset(block); +} + +static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) +{ + return PAGE_SIZE << drm_buddy_block_order(block); +} + +static inline struct drm_buddy_block * +amdgpu_vram_mgr_first_block(struct list_head *list) +{ + return list_first_entry_or_null(list, struct drm_buddy_block, link); +} + +static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 start, size; + + block = amdgpu_vram_mgr_first_block(head); + if (!block) + return false; + + while (head != block->link.next) { + start = amdgpu_vram_mgr_block_start(block); + size = amdgpu_vram_mgr_block_size(block); + + block = list_entry(block->link.next, struct drm_buddy_block, link); + if (start + size != amdgpu_vram_mgr_block_start(block)) + return false; + } + + return true; +} + +static inline struct amdgpu_vram_mgr_resource * +to_amdgpu_vram_mgr_resource(struct ttm_resource *res) +{ + return container_of(res, struct amdgpu_vram_mgr_resource, base); +} + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f4c6accd3226..9426e252d8aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3293,7 +3293,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), @@ -3429,7 +3429,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), @@ -3454,7 +3454,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), @@ -7689,6 +7689,7 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): preempt_disable(); clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh); clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 3c1d440824a7..5228421b0f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -814,7 +814,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 344d819b4c1b..979da6f510e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -381,8 +381,9 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU && - adev->gmc.real_vram_size > adev->gmc.aper_size) { + if ((adev->flags & AMD_IS_APU) && + adev->gmc.real_vram_size > adev->gmc.aper_size && + !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index ca9841d5669f..1932a3e4af7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -581,7 +581,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 431742eb7811..6009fbfdcc19 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1456,7 +1456,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) */ /* check whether both host-gpu and gpu-gpu xgmi links exist */ - if ((adev->flags & AMD_IS_APU) || + if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)) { adev->gmc.aper_base = @@ -1721,7 +1721,7 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); amdgpu_gart_table_vram_free(adev); - amdgpu_bo_unref(&adev->gmc.pdb0_bo); + amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index dff54190b96c..f0fbcda76f5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -24,6 +24,7 @@ #include #include "amdgpu.h" +#include "amdgpu_cs.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" #include "soc15.h" @@ -1900,6 +1901,75 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .set_powergating_state = vcn_v1_0_set_powergating_state, }; +/* + * It is a hardware issue that VCN can't handle a GTT TMZ buffer on + * CHIP_RAVEN series ASIC. Move such a GTT TMZ buffer to VRAM domain + * before command submission as a workaround. + */ +static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo *bo; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + mapping = amdgpu_vm_bo_lookup_mapping(vm, addr/AMDGPU_GPU_PAGE_SIZE); + if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) + return -EINVAL; + + bo = mapping->bo_va->base.bo; + if (!(bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED)) + return 0; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed to validate the VCN message BO (%d)!\n", r); + return r; + } + + return r; +} + +static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t msg_lo = 0, msg_hi = 0; + int i, r; + + if (!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) + return 0; + + for (i = 0; i < ib->length_dw; i += 2) { + uint32_t reg = amdgpu_ib_get_value(ib, i); + uint32_t val = amdgpu_ib_get_value(ib, i + 1); + + if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { + msg_lo = val; + } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) { + msg_hi = val; + } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0)) { + r = vcn_v1_0_validate_bo(p, job, + ((u64)msg_hi) << 32 | msg_lo); + if (r) + return r; + } + } + + return 0; +} + static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, @@ -1910,6 +1980,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, + .patch_cs_in_place = vcn_v1_0_ring_patch_cs_in_place, .emit_frame_size = 6 + 6 + /* hdp invalidate / flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index c87263ed20ec..cb5f0a12333f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -219,6 +219,11 @@ static int vcn_v3_0_sw_init(void *handle) cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) | cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB); fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); + fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG; + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 2)) + fw_shared->smu_interface_info.smu_interface_type = 2; + else if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 1)) + fw_shared->smu_interface_info.smu_interface_type = 1; if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); @@ -575,8 +580,8 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( - UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) @@ -1480,8 +1485,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 339e12c94cff..62aa6c9d5123 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -483,15 +483,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } /* Verify module parameters regarding mapped process number*/ - if ((hws_max_conc_proc < 0) - || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { - dev_err(kfd_device, - "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", - hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, - kfd->vm_info.vmid_num_kfd); + if (hws_max_conc_proc >= 0) + kfd->max_proc_per_quantum = min((u32)hws_max_conc_proc, kfd->vm_info.vmid_num_kfd); + else kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; - } else - kfd->max_proc_per_quantum = hws_max_conc_proc; /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * @@ -536,7 +531,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_doorbell_error; } - kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; + if (amdgpu_use_xgmi_p2p) + kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; kfd->noretry = kfd->adev->gmc.noretry; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index deecccebe5b6..64f4a51cc880 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -749,6 +749,8 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) event_waiters = kmalloc_array(num_events, sizeof(struct kfd_event_waiter), GFP_KERNEL); + if (!event_waiters) + return NULL; for (i = 0; (event_waiters) && (i < num_events) ; i++) { init_wait(&event_waiters[i].wait); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index e4beebb1c80a..f2e1d506ba21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -247,15 +247,6 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) return ret; } - ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, - O_RDWR); - if (ret < 0) { - kfifo_free(&client->fifo); - kfree(client); - return ret; - } - *fd = ret; - init_waitqueue_head(&client->wait_queue); spin_lock_init(&client->lock); client->events = 0; @@ -265,5 +256,20 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) list_add_rcu(&client->list, &dev->smi_clients); spin_unlock(&dev->smi_lock); + ret = anon_inode_getfd(kfd_smi_name, &kfd_smi_ev_fops, (void *)client, + O_RDWR); + if (ret < 0) { + spin_lock(&dev->smi_lock); + list_del_rcu(&client->list); + spin_unlock(&dev->smi_lock); + + synchronize_rcu(); + + kfifo_free(&client->fifo); + kfree(client); + return ret; + } + *fd = ret; + return 0; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3b8856b4cece..b3fc3e958227 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -548,7 +548,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, goto reserve_bo_failed; } - r = dma_resv_reserve_shared(bo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(bo->tbo.base.resv, 1); if (r) { pr_debug("failed %d to reserve bo\n", r); amdgpu_bo_unreserve(bo); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index b30656959fd8..73423b805b54 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2714,7 +2714,8 @@ static int dm_resume(void *handle) * this is the case when traversing through already created * MST connectors, should be skipped */ - if (aconnector->mst_port) + if (aconnector->dc_link && + aconnector->dc_link->type == dc_connection_mst_branch) continue; mutex_lock(&aconnector->hpd_lock); @@ -3972,7 +3973,7 @@ static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *cap max - min); } -static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, +static void amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, int bl_idx, u32 user_brightness) { @@ -4003,7 +4004,8 @@ static int amdgpu_dm_backlight_set_level(struct amdgpu_display_manager *dm, DRM_DEBUG("DM: Failed to update backlight on eDP[%d]\n", bl_idx); } - return rc ? 0 : 1; + if (rc) + dm->actual_brightness[bl_idx] = user_brightness; } static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) @@ -9236,7 +9238,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, * deadlock during GPU reset when this fence will not signal * but we hold reservation lock for the BO. */ - r = dma_resv_wait_timeout(abo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(abo->tbo.base.resv, + DMA_RESV_USAGE_WRITE, false, msecs_to_jiffies(5000)); if (unlikely(r <= 0)) DRM_ERROR("Waiting for fences timed out!"); @@ -9947,7 +9950,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) /* restore the backlight level */ for (i = 0; i < dm->num_of_edps; i++) { if (dm->backlight_dev[i] && - (amdgpu_dm_backlight_get_level(dm, i) != dm->brightness[i])) + (dm->actual_brightness[i] != dm->brightness[i])) amdgpu_dm_backlight_set_level(dm, i, dm->brightness[i]); } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 6a908d736d6a..7e44b0429448 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -540,6 +540,12 @@ struct amdgpu_display_manager { * cached backlight values. */ u32 brightness[AMDGPU_DM_MAX_NUM_EDP]; + /** + * @actual_brightness: + * + * last successfully applied backlight values. + */ + u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP]; }; enum dsc_clock_force_state { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c index edda572dc570..8be4c1970628 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c @@ -436,57 +436,84 @@ static void dcn315_clk_mgr_helper_populate_bw_params( struct integrated_info *bios_info, const DpmClocks_315_t *clock_table) { - int i, j; + int i; struct clk_bw_params *bw_params = clk_mgr->base.bw_params; - uint32_t max_dispclk = 0, max_dppclk = 0; + uint32_t max_dispclk, max_dppclk, max_pstate, max_socclk, max_fclk = 0, min_pstate = 0; + struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1]; - j = -1; + max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); + max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); + max_socclk = find_max_clk_value(clock_table->SocClocks, clock_table->NumSocClkLevelsEnabled); - ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL); - - /* Find lowest DPM, FCLK is filled in reverse order*/ - - for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) { - if (clock_table->DfPstateTable[i].FClk != 0) { - j = i; - break; + /* Find highest fclk pstate */ + for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) { + if (clock_table->DfPstateTable[i].FClk > max_fclk) { + max_fclk = clock_table->DfPstateTable[i].FClk; + max_pstate = i; } } - if (j == -1) { - /* clock table is all 0s, just use our own hardcode */ - ASSERT(0); - return; - } + /* For 315 we want to base clock table on dcfclk, need at least one entry regardless of pmfw table */ + for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) { + int j; + uint32_t min_fclk = clock_table->DfPstateTable[0].FClk; - bw_params->clk_table.num_entries = j + 1; + for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) { + if (clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i] + && clock_table->DfPstateTable[j].FClk < min_fclk) { + min_fclk = clock_table->DfPstateTable[j].FClk; + min_pstate = j; + } + } - /* dispclk and dppclk can be max at any voltage, same number of levels for both */ - if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS && - clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) { - max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled); - max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled); - } else { - ASSERT(0); - } - - for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) { - int temp; - - bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk; - bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk; - bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage; - bw_params->clk_table.entries[i].wck_ratio = 1; - temp = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage); - if (temp) - bw_params->clk_table.entries[i].dcfclk_mhz = temp; - temp = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage); - if (temp) - bw_params->clk_table.entries[i].socclk_mhz = temp; + bw_params->clk_table.entries[i].fclk_mhz = min_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i]; + bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i]; bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; - } + bw_params->clk_table.entries[i].wck_ratio = 1; + }; + /* Make sure to include at least one entry and highest pstate */ + if (max_pstate != min_pstate) { + bw_params->clk_table.entries[i].fclk_mhz = max_fclk; + bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk; + bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage; + bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage( + clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[max_pstate].Voltage); + bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage( + clock_table, clock_table->SocClocks, clock_table->DfPstateTable[max_pstate].Voltage); + bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk; + bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk; + bw_params->clk_table.entries[i].wck_ratio = 1; + i++; + } + bw_params->clk_table.num_entries = i; + + /* Include highest socclk */ + if (bw_params->clk_table.entries[i-1].socclk_mhz < max_socclk) + bw_params->clk_table.entries[i-1].socclk_mhz = max_socclk; + + /* Set any 0 clocks to max default setting. Not an issue for + * power since we aren't doing switching in such case anyway + */ + for (i = 0; i < bw_params->clk_table.num_entries; i++) { + if (!bw_params->clk_table.entries[i].fclk_mhz) { + bw_params->clk_table.entries[i].fclk_mhz = def_max.fclk_mhz; + bw_params->clk_table.entries[i].memclk_mhz = def_max.memclk_mhz; + bw_params->clk_table.entries[i].voltage = def_max.voltage; + } + if (!bw_params->clk_table.entries[i].dcfclk_mhz) + bw_params->clk_table.entries[i].dcfclk_mhz = def_max.dcfclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz) + bw_params->clk_table.entries[i].socclk_mhz = def_max.socclk_mhz; + if (!bw_params->clk_table.entries[i].dispclk_mhz) + bw_params->clk_table.entries[i].dispclk_mhz = def_max.dispclk_mhz; + if (!bw_params->clk_table.entries[i].dppclk_mhz) + bw_params->clk_table.entries[i].dppclk_mhz = def_max.dppclk_mhz; + } bw_params->vram_type = bios_info->memory_type; bw_params->num_channels = bios_info->ma_channel_number; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c index 880ffea2afc6..2600313fea57 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.c @@ -80,8 +80,8 @@ static const struct IP_BASE NBIO_BASE = { { { { 0x00000000, 0x00000014, 0x00000D #define VBIOSSMC_MSG_SetDppclkFreq 0x06 ///< Set DPP clock frequency in MHZ #define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x07 ///< Set DCF clock frequency hard min in MHZ #define VBIOSSMC_MSG_SetMinDeepSleepDcfclk 0x08 ///< Set DCF clock minimum frequency in deep sleep in MHZ -#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq 0x09 ///< Set display phy clock frequency in MHZ in case VMIN does not support phy frequency -#define VBIOSSMC_MSG_GetFclkFrequency 0x0A ///< Get FCLK frequency, return frequemcy in MHZ +#define VBIOSSMC_MSG_GetDtbclkFreq 0x09 ///< Get display dtb clock frequency in MHZ in case VMIN does not support phy frequency +#define VBIOSSMC_MSG_SetDtbClk 0x0A ///< Set dtb clock frequency, return frequemcy in MHZ #define VBIOSSMC_MSG_SetDisplayCount 0x0B ///< Inform PMFW of number of display connected #define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0x0C ///< To ask PMFW turn off TMDP 48MHz refclk during display off to save power #define VBIOSSMC_MSG_UpdatePmeRestore 0x0D ///< To ask PMFW to write into Azalia for PME wake up event @@ -324,15 +324,26 @@ int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr) return (dprefclk_get_mhz * 1000); } -int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr) +int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr) { int fclk_get_mhz = -1; if (clk_mgr->smu_present) { fclk_get_mhz = dcn315_smu_send_msg_with_param( clk_mgr, - VBIOSSMC_MSG_GetFclkFrequency, + VBIOSSMC_MSG_GetDtbclkFreq, 0); } return (fclk_get_mhz * 1000); } + +void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable) +{ + if (!clk_mgr->smu_present) + return; + + dcn315_smu_send_msg_with_param( + clk_mgr, + VBIOSSMC_MSG_SetDtbClk, + enable); +} diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h index 66fa42f8dd18..5aa3275ac7d8 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_smu.h @@ -37,6 +37,7 @@ #define NUM_SOC_VOLTAGE_LEVELS 4 #define NUM_DF_PSTATE_LEVELS 4 + typedef struct { uint16_t MinClock; // This is either DCFCLK or SOCCLK (in MHz) uint16_t MaxClock; // This is either DCFCLK or SOCCLK (in MHz) @@ -124,5 +125,6 @@ void dcn315_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr); void dcn315_smu_request_voltage_via_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz); void dcn315_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr); int dcn315_smu_get_dpref_clk(struct clk_mgr_internal *clk_mgr); -int dcn315_smu_get_smu_fclk(struct clk_mgr_internal *clk_mgr); +int dcn315_smu_get_dtbclk(struct clk_mgr_internal *clk_mgr); +void dcn315_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable); #endif /* DAL_DC_315_SMU_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index f6e19efea756..c436db416708 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2389,6 +2389,8 @@ static enum surface_update_type check_update_surfaces_for_stream( if (stream_update->mst_bw_update) su_flags->bits.mst_bw = 1; + if (stream_update->crtc_timing_adjust && dc_extended_blank_supported(dc)) + su_flags->bits.crtc_timing_adjust = 1; if (su_flags->raw != 0) overall_type = UPDATE_TYPE_FULL; @@ -2650,6 +2652,9 @@ static void copy_stream_update_to_stream(struct dc *dc, if (update->vrr_infopacket) stream->vrr_infopacket = *update->vrr_infopacket; + if (update->crtc_timing_adjust) + stream->adjust = *update->crtc_timing_adjust; + if (update->dpms_off) stream->dpms_off = *update->dpms_off; @@ -4051,3 +4056,17 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause) pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, pipe->stream_res.tg->inst); } +/* + * dc_extended_blank_supported: Decide whether extended blank is supported + * + * Extended blank is a freesync optimization feature to be enabled in the future. + * During the extra vblank period gained from freesync, we have the ability to enter z9/z10. + * + * @param [in] dc: Current DC state + * @return: Indicate whether extended blank is supported (true or false) + */ +bool dc_extended_blank_supported(struct dc *dc) +{ + return dc->debug.extended_blank_optimization && !dc->debug.disable_z10 + && dc->caps.zstate_support && dc->caps.is_apu; +} diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cb87dd643180..bbaa5abdf888 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -983,8 +983,7 @@ static bool should_verify_link_capability_destructively(struct dc_link *link, destrictive = false; } } - } else if (dc_is_hdmi_signal(link->local_sink->sink_signal)) - destrictive = true; + } return destrictive; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 351081f574cb..22dabe596dfc 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -5216,6 +5216,62 @@ static void retrieve_cable_id(struct dc_link *link) &link->dpcd_caps.cable_id, &usbc_cable_id); } +/* DPRX may take some time to respond to AUX messages after HPD asserted. + * If AUX read unsuccessful, try to wake unresponsive DPRX by toggling DPCD SET_POWER (0x600). + */ +static enum dc_status wa_try_to_wake_dprx(struct dc_link *link, uint64_t timeout_ms) +{ + enum dc_status status = DC_ERROR_UNEXPECTED; + uint8_t dpcd_data = 0; + uint64_t start_ts = 0; + uint64_t current_ts = 0; + uint64_t time_taken_ms = 0; + enum dc_connection_type type = dc_connection_none; + + status = core_link_read_dpcd( + link, + DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV, + &dpcd_data, + sizeof(dpcd_data)); + + if (status != DC_OK) { + DC_LOG_WARNING("%s: Read DPCD LTTPR_CAP failed - try to toggle DPCD SET_POWER for %lld ms.", + __func__, + timeout_ms); + start_ts = dm_get_timestamp(link->ctx); + + do { + if (!dc_link_detect_sink(link, &type) || type == dc_connection_none) + break; + + dpcd_data = DP_SET_POWER_D3; + status = core_link_write_dpcd( + link, + DP_SET_POWER, + &dpcd_data, + sizeof(dpcd_data)); + + dpcd_data = DP_SET_POWER_D0; + status = core_link_write_dpcd( + link, + DP_SET_POWER, + &dpcd_data, + sizeof(dpcd_data)); + + current_ts = dm_get_timestamp(link->ctx); + time_taken_ms = div_u64(dm_get_elapse_time_in_ns(link->ctx, current_ts, start_ts), 1000000); + } while (status != DC_OK && time_taken_ms < timeout_ms); + + DC_LOG_WARNING("%s: DPCD SET_POWER %s after %lld ms%s", + __func__, + (status == DC_OK) ? "succeeded" : "failed", + time_taken_ms, + (type == dc_connection_none) ? ". Unplugged." : "."); + } + + return status; +} + static bool retrieve_link_cap(struct dc_link *link) { /* DP_ADAPTER_CAP - DP_DPCD_REV + 1 == 16 and also DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT + 1 == 16, @@ -5251,6 +5307,15 @@ static bool retrieve_link_cap(struct dc_link *link) dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD); + /* Try to ensure AUX channel active before proceeding. */ + if (link->dc->debug.aux_wake_wa.bits.enable_wa) { + uint64_t timeout_ms = link->dc->debug.aux_wake_wa.bits.timeout_ms; + + if (link->dc->debug.aux_wake_wa.bits.use_default_timeout) + timeout_ms = LINK_AUX_WAKE_TIMEOUT_MS; + status = wa_try_to_wake_dprx(link, timeout_ms); + } + is_lttpr_present = dp_retrieve_lttpr_cap(link); /* Read DP tunneling information. */ status = dpcd_get_tunneling_device_data(link); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 7af153434e9e..d251c3f3a714 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1685,8 +1685,8 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; - // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks - if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + /*compare audio info*/ + if (memcmp(&old_stream->audio_info, &stream->audio_info, sizeof(stream->audio_info)) != 0) return false; return true; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 4ffab7bb1098..77ef9d1f9ea8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -188,6 +188,7 @@ struct dc_caps { bool psp_setup_panel_mode; bool extended_aux_timeout_support; bool dmcub_support; + bool zstate_support; uint32_t num_of_internal_disp; enum dp_protocol_version max_dp_protocol_version; unsigned int mall_size_per_mem_channel; @@ -525,6 +526,22 @@ union dpia_debug_options { uint32_t raw; }; +/* AUX wake work around options + * 0: enable/disable work around + * 1: use default timeout LINK_AUX_WAKE_TIMEOUT_MS + * 15-2: reserved + * 31-16: timeout in ms + */ +union aux_wake_wa_options { + struct { + uint32_t enable_wa : 1; + uint32_t use_default_timeout : 1; + uint32_t rsvd: 14; + uint32_t timeout_ms : 16; + } bits; + uint32_t raw; +}; + struct dc_debug_data { uint32_t ltFailCount; uint32_t i2cErrorCount; @@ -703,13 +720,15 @@ struct dc_debug_options { bool enable_driver_sequence_debug; enum det_size crb_alloc_policy; int crb_alloc_policy_min_disp_count; -#if defined(CONFIG_DRM_AMD_DC_DCN) bool disable_z10; +#if defined(CONFIG_DRM_AMD_DC_DCN) bool enable_z9_disable_interface; bool enable_sw_cntl_psr; union dpia_debug_options dpia_debug; #endif bool apply_vendor_specific_lttpr_wa; + bool extended_blank_optimization; + union aux_wake_wa_options aux_wake_wa; bool ignore_dpref_ss; uint8_t psr_power_use_phy_fsm; }; @@ -1369,6 +1388,8 @@ struct dc_sink_init_data { bool converter_disable_audio; }; +bool dc_extended_blank_supported(struct dc *dc); + struct dc_sink *dc_sink_create(const struct dc_sink_init_data *init_params); /* Newer interfaces */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 99a750f561f8..c4168c11257c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -131,6 +131,7 @@ union stream_update_flags { uint32_t wb_update:1; uint32_t dsc_changed : 1; uint32_t mst_bw : 1; + uint32_t crtc_timing_adjust : 1; } bits; uint32_t raw; @@ -289,6 +290,7 @@ struct dc_stream_update { struct dc_3dlut *lut3d_func; struct test_pattern *pending_test_pattern; + struct dc_crtc_timing_adjust *crtc_timing_adjust; }; bool dc_is_stream_unchanged( diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index c3e141c19a77..781334b395ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1497,16 +1497,12 @@ void dcn10_init_hw(struct dc *dc) link->link_status.link_active = true; } - /* Power gate DSCs */ - if (!is_optimized_init_done) { - for (i = 0; i < res_pool->res_cap->num_dsc; i++) - if (hws->funcs.dsc_pg_control != NULL) - hws->funcs.dsc_pg_control(hws, res_pool->dscs[i]->inst, false); - } - /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -1559,8 +1555,6 @@ void dcn10_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (dc->clk_mgr->funcs->notify_wm_ranges) dc->clk_mgr->funcs->notify_wm_ranges(dc->clk_mgr); @@ -2056,7 +2050,7 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, { struct dc_context *dc_ctx = dc->ctx; int i, master = -1, embedded = -1; - struct dc_crtc_timing hw_crtc_timing[MAX_PIPES] = {0}; + struct dc_crtc_timing *hw_crtc_timing; uint64_t phase[MAX_PIPES]; uint64_t modulo[MAX_PIPES]; unsigned int pclk; @@ -2067,6 +2061,10 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, uint32_t dp_ref_clk_100hz = dc->res_pool->dp_clock_source->ctx->dc->clk_mgr->dprefclk_khz*10; + hw_crtc_timing = kcalloc(MAX_PIPES, sizeof(*hw_crtc_timing), GFP_KERNEL); + if (!hw_crtc_timing) + return master; + if (dc->config.vblank_alignment_dto_params && dc->res_pool->dp_clock_source->funcs->override_dp_pix_clk) { embedded_h_total = @@ -2130,6 +2128,8 @@ static int dcn10_align_pixel_clocks(struct dc *dc, int group_size, } } + + kfree(hw_crtc_timing); return master; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index ab910deed481..4290eaf11a04 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1857,6 +1857,7 @@ void dcn20_optimize_bandwidth( struct dc_state *context) { struct hubbub *hubbub = dc->res_pool->hubbub; + int i; /* program dchubbub watermarks */ hubbub->funcs->program_watermarks(hubbub, @@ -1873,6 +1874,17 @@ void dcn20_optimize_bandwidth( dc->clk_mgr, context, true); + if (dc_extended_blank_supported(dc) && context->bw_ctx.bw.dcn.clk.zstate_support == DCN_ZSTATE_SUPPORT_ALLOW) { + for (i = 0; i < dc->res_pool->pipe_count; ++i) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (pipe_ctx->stream && pipe_ctx->plane_res.hubp->funcs->program_extended_blank + && pipe_ctx->stream->adjust.v_total_min == pipe_ctx->stream->adjust.v_total_max + && pipe_ctx->stream->adjust.v_total_max > pipe_ctx->stream->timing.v_total) + pipe_ctx->plane_res.hubp->funcs->program_extended_blank(pipe_ctx->plane_res.hubp, + pipe_ctx->dlg_regs.optimized_min_dst_y_next_start); + } + } /* increase compbuf size */ if (hubbub->funcs->program_compbuf_size) hubbub->funcs->program_compbuf_size(hubbub, context->bw_ctx.bw.dcn.compbuf_size_kb, true); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index d473708d5399..7802d603f796 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1976,7 +1976,6 @@ int dcn20_validate_apply_pipe_split_flags( /*If need split for odm but 4 way split already*/ if (split[i] == 2 && ((pipe->prev_odm_pipe && !pipe->prev_odm_pipe->prev_odm_pipe) || !pipe->next_odm_pipe)) { - ASSERT(0); /* NOT expected yet */ merge[i] = true; /* 4 -> 2 ODM */ } else if (split[i] == 0 && pipe->prev_odm_pipe) { ASSERT(0); /* NOT expected yet */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 612732656772..3fe4bfbb98a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -644,7 +644,7 @@ static const struct dc_debug_options debug_defaults_drv = { .clock_trace = true, .disable_pplib_clock_request = true, .min_disp_clk_khz = 100000, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index ed0a0e5fd805..f61ec8763844 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -547,6 +547,9 @@ void dcn30_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -624,8 +627,6 @@ void dcn30_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c index 3e6d6ebd199e..51c5f3685470 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c @@ -1042,5 +1042,7 @@ void hubbub31_construct(struct dcn20_hubbub *hubbub31, hubbub31->detile_buf_size = det_size_kb * 1024; hubbub31->pixel_chunk_size = pixel_chunk_size_kb * 1024; hubbub31->crb_size_segs = config_return_buffer_size_kb / DCN31_CRB_SEGMENT_SIZE_KB; + + hubbub31->debug_test_index_pstate = 0x6; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c index 53b792b997b7..8ae6117953ca 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c @@ -54,6 +54,13 @@ void hubp31_soft_reset(struct hubp *hubp, bool reset) REG_UPDATE(DCHUBP_CNTL, HUBP_SOFT_RESET, reset); } +void hubp31_program_extended_blank(struct hubp *hubp, unsigned int min_dst_y_next_start_optimized) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_SET(BLANK_OFFSET_1, 0, MIN_DST_Y_NEXT_START, min_dst_y_next_start_optimized); +} + static struct hubp_funcs dcn31_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, .hubp_is_triplebuffer_enabled = hubp2_is_triplebuffer_enabled, @@ -80,6 +87,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .set_unbounded_requesting = hubp31_set_unbounded_requesting, .hubp_soft_reset = hubp31_soft_reset, .hubp_in_blank = hubp1_in_blank, + .program_extended_blank = hubp31_program_extended_blank, }; bool hubp31_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 4be228680909..631d8ac63aa4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -199,6 +199,9 @@ void dcn31_init_hw(struct dc *dc) /* we want to turn off all dp displays before doing detection */ dc_link_blank_all_dp_displays(dc); + if (hws->funcs.enable_power_gating_plane) + hws->funcs.enable_power_gating_plane(dc->hwseq, true); + /* If taking control over from VBIOS, we may want to optimize our first * mode set, so we need to skip powering down pipes until we know which * pipes we want to use. @@ -248,8 +251,6 @@ void dcn31_init_hw(struct dc *dc) REG_UPDATE(DCFCLK_CNTL, DCFCLK_GATE_DIS, 0); } - if (hws->funcs.enable_power_gating_plane) - hws->funcs.enable_power_gating_plane(dc->hwseq, true); if (!dcb->funcs->is_accelerated_mode(dcb) && dc->res_pool->hubbub->funcs->init_watermarks) dc->res_pool->hubbub->funcs->init_watermarks(dc->res_pool->hubbub); @@ -338,20 +339,20 @@ void dcn31_enable_power_gating_plane( bool enable) { bool force_on = true; /* disable power gating */ + uint32_t org_ip_request_cntl = 0; if (enable && !hws->ctx->dc->debug.disable_hubp_power_gate) force_on = false; + REG_GET(DC_IP_REQUEST_CNTL, IP_REQUEST_EN, &org_ip_request_cntl); + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 1); /* DCHUBP0/1/2/3/4/5 */ REG_UPDATE(DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN0_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN2_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN2_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); /* DPP0/1/2/3/4/5 */ REG_UPDATE(DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN1_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN3_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN3_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); force_on = true; /* disable power gating */ if (enable && !hws->ctx->dc->debug.disable_dsc_power_gate) @@ -359,11 +360,11 @@ void dcn31_enable_power_gating_plane( /* DCS0/1/2/3/4/5 */ REG_UPDATE(DOMAIN16_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN16_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN17_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN17_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); REG_UPDATE(DOMAIN18_PG_CONFIG, DOMAIN_POWER_FORCEON, force_on); - REG_WAIT(DOMAIN18_PG_STATUS, DOMAIN_PGFSM_PWR_STATUS, force_on, 1, 1000); + + if (org_ip_request_cntl == 0) + REG_SET(DC_IP_REQUEST_CNTL, 0, IP_REQUEST_EN, 0); } void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index 8afe2130d7c5..e05527a3a8ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -124,7 +124,6 @@ static bool optc31_enable_crtc(struct timing_generator *optc) static bool optc31_disable_crtc(struct timing_generator *optc) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - /* disable otg request until end of the first line * in the vertical blank region */ @@ -138,6 +137,7 @@ static bool optc31_disable_crtc(struct timing_generator *optc) REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000); + optc1_clear_optc_underflow(optc); return true; } @@ -158,6 +158,9 @@ static bool optc31_immediate_disable_crtc(struct timing_generator *optc) OTG_BUSY, 0, 1, 100000); + /* clear the false state */ + optc1_clear_optc_underflow(optc); + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 89b7b6b7254a..63934ecf6be8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2032,7 +2032,9 @@ bool dcn31_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); + DC_FP_START(); out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); // Disable fast_validate to set min dcfclk in alculate_wm_and_dlg if (pipe_cnt == 0) @@ -2232,6 +2234,7 @@ static bool dcn31_resource_construct( dc->caps.extended_aux_timeout_support = true; dc->caps.dmcub_support = true; dc->caps.is_apu = true; + dc->caps.zstate_support = true; /* Color pipeline capabilities */ dc->caps.color.dpp.dcn_arch = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index 2f6122153bdb..f93af45aeab4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -722,8 +722,10 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc { int plane_count; int i; + unsigned int optimized_min_dst_y_next_start_us; plane_count = 0; + optimized_min_dst_y_next_start_us = 0; for (i = 0; i < dc->res_pool->pipe_count; i++) { if (context->res_ctx.pipe_ctx[i].plane_state) plane_count++; @@ -744,11 +746,22 @@ static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struc struct dc_link *link = context->streams[0]->sink->link; struct dc_stream_status *stream_status = &context->stream_status[0]; + if (dc_extended_blank_supported(dc)) { + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].stream == context->streams[0] + && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min == context->res_ctx.pipe_ctx[i].stream->adjust.v_total_max + && context->res_ctx.pipe_ctx[i].stream->adjust.v_total_min > context->res_ctx.pipe_ctx[i].stream->timing.v_total) { + optimized_min_dst_y_next_start_us = + context->res_ctx.pipe_ctx[i].dlg_regs.optimized_min_dst_y_next_start_us; + break; + } + } + } /* zstate only supported on PWRSEQ0 and when there's <2 planes*/ if (link->link_index != 0 || stream_status->plane_count > 1) return DCN_ZSTATE_SUPPORT_DISALLOW; - if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) + if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000) return DCN_ZSTATE_SUPPORT_ALLOW; else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !dc->debug.disable_psr) return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY; @@ -786,8 +799,6 @@ void dcn20_calculate_dlg_params( != dm_dram_clock_change_unsupported; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); - context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); if (context->bw_ctx.bw.dcn.clk.dispclk_khz < dc->debug.min_disp_clk_khz) @@ -843,6 +854,7 @@ void dcn20_calculate_dlg_params( &pipes[pipe_idx].pipe); pipe_idx++; } + context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); } static void swizzle_to_dml_params( diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c index e0fecf127bd5..53d760e169e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_rq_dlg_calc_31.c @@ -1055,6 +1055,7 @@ static void dml_rq_dlg_get_dlg_params( float vba__refcyc_per_req_delivery_pre_l = get_refcyc_per_req_delivery_pre_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA float vba__refcyc_per_req_delivery_l = get_refcyc_per_req_delivery_l_in_us(mode_lib, e2e_pipe_param, num_pipes, pipe_idx) * refclk_freq_in_mhz; // From VBA + int blank_lines; memset(disp_dlg_regs, 0, sizeof(*disp_dlg_regs)); memset(disp_ttu_regs, 0, sizeof(*disp_ttu_regs)); @@ -1080,6 +1081,18 @@ static void dml_rq_dlg_get_dlg_params( dlg_vblank_start = interlaced ? (vblank_start / 2) : vblank_start; disp_dlg_regs->min_dst_y_next_start = (unsigned int) (((double) dlg_vblank_start) * dml_pow(2, 2)); + blank_lines = (dst->vblank_end + dst->vtotal_min - dst->vblank_start - dst->vstartup_start - 1); + if (blank_lines < 0) + blank_lines = 0; + if (blank_lines != 0) { + disp_dlg_regs->optimized_min_dst_y_next_start_us = + ((unsigned int) blank_lines * dst->hactive) / (unsigned int) dst->pixel_rate_mhz; + disp_dlg_regs->optimized_min_dst_y_next_start = + (unsigned int)(((double) (dlg_vblank_start + blank_lines)) * dml_pow(2, 2)); + } else { + // use unoptimized value + disp_dlg_regs->optimized_min_dst_y_next_start = disp_dlg_regs->min_dst_y_next_start; + } ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)dml_pow(2, 18)); dml_print("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, min_ttu_vblank); diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 59f0a61c33cf..2df660cd8801 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -446,6 +446,8 @@ struct _vcs_dpi_display_dlg_regs_st { unsigned int refcyc_h_blank_end; unsigned int dlg_vblank_end; unsigned int min_dst_y_next_start; + unsigned int optimized_min_dst_y_next_start; + unsigned int optimized_min_dst_y_next_start_us; unsigned int refcyc_per_htotal; unsigned int refcyc_x_after_scaler; unsigned int dst_y_after_scaler; diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index efc2339f1fa0..4385d19bc489 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -864,11 +864,11 @@ static bool setup_dsc_config( min_slices_h = inc_num_slices(dsc_common_caps.slice_caps, min_slices_h); } + is_dsc_possible = (min_slices_h <= max_slices_h); + if (pic_width % min_slices_h != 0) min_slices_h = 0; // DSC TODO: Maybe try increasing the number of slices first? - is_dsc_possible = (min_slices_h <= max_slices_h); - if (min_slices_h == 0 && max_slices_h == 0) is_dsc_possible = false; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h index ab9939db8cea..44f167d2584f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h @@ -33,6 +33,7 @@ #define MAX_MTP_SLOT_COUNT 64 #define DP_REPEATER_CONFIGURATION_AND_STATUS_SIZE 0x50 #define TRAINING_AUX_RD_INTERVAL 100 //us +#define LINK_AUX_WAKE_TIMEOUT_MS 1500 // Timeout when trying to wake unresponsive DPRX. struct dc_link; struct dc_stream_state; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index e45b7993c5c5..ad69d78c4ac3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -195,6 +195,9 @@ struct hubp_funcs { void (*hubp_set_flip_int)(struct hubp *hubp); + void (*program_extended_blank)(struct hubp *hubp, + unsigned int min_dst_y_next_start_optimized); + void (*hubp_wait_pipe_read_start)(struct hubp *hubp); }; diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c index b691aa45e84f..79bc207415bc 100644 --- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c +++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c @@ -100,7 +100,8 @@ enum vsc_packet_revision { //PB7 = MD0 #define MASK_VTEM_MD0__VRR_EN 0x01 #define MASK_VTEM_MD0__M_CONST 0x02 -#define MASK_VTEM_MD0__RESERVED2 0x0C +#define MASK_VTEM_MD0__QMS_EN 0x04 +#define MASK_VTEM_MD0__RESERVED2 0x08 #define MASK_VTEM_MD0__FVA_FACTOR_M1 0xF0 //MD1 @@ -109,7 +110,7 @@ enum vsc_packet_revision { //MD2 #define MASK_VTEM_MD2__BASE_REFRESH_RATE_98 0x03 #define MASK_VTEM_MD2__RB 0x04 -#define MASK_VTEM_MD2__RESERVED3 0xF8 +#define MASK_VTEM_MD2__NEXT_TFR 0xF8 //MD3 #define MASK_VTEM_MD3__BASE_REFRESH_RATE_07 0xFF diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 89fbee568be4..5504d81c77b7 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -173,6 +173,17 @@ bool amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) if (!pp_funcs || !pp_funcs->get_asic_baco_capability) return false; + /* Don't use baco for reset in S3. + * This is a workaround for some platforms + * where entering BACO during suspend + * seems to cause reboots or hangs. + * This might be related to the fact that BACO controls + * power to the whole GPU including devices like audio and USB. + * Powering down/up everything may adversely affect these other + * devices. Needs more investigation. + */ + if (adev->in_s3) + return false; mutex_lock(&adev->pm.mutex); @@ -500,6 +511,9 @@ int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size) struct smu_context *smu = adev->powerplay.pp_handle; int ret = 0; + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + mutex_lock(&adev->pm.mutex); ret = smu_send_hbm_bad_pages_num(smu, size); mutex_unlock(&adev->pm.mutex); @@ -512,6 +526,9 @@ int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t si struct smu_context *smu = adev->powerplay.pp_handle; int ret = 0; + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + mutex_lock(&adev->pm.mutex); ret = smu_send_hbm_bad_channel_flag(smu, size); mutex_unlock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index 9ddd8491ff00..ede71de2343d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -773,13 +773,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? - data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk : + (data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk / 100) : min_mclk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinVcn, @@ -792,11 +792,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk, + data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxSocclkByFreq, - data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk, + data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk / 100, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxVcn, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c index 7bfac029e513..b81711c4ff33 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_5_ppt.c @@ -991,7 +991,7 @@ static int smu_v13_0_5_set_performance_level(struct smu_context *smu, return -EINVAL; } - if (sclk_min && sclk_max) { + if (sclk_min && sclk_max && smu_v13_0_5_clk_dpm_is_enabled(smu, SMU_SCLK)) { ret = smu_v13_0_5_set_soft_freq_limited_range(smu, SMU_SCLK, sclk_min, diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c index d63d83800a8a..e0b9f7063b20 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_plane.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_plane.c @@ -135,7 +135,6 @@ static void komeda_plane_destroy(struct drm_plane *plane) static void komeda_plane_reset(struct drm_plane *plane) { struct komeda_plane_state *state; - struct komeda_plane *kplane = to_kplane(plane); if (plane->state) __drm_atomic_helper_plane_destroy_state(plane->state); @@ -144,16 +143,8 @@ static void komeda_plane_reset(struct drm_plane *plane) plane->state = NULL; state = kzalloc(sizeof(*state), GFP_KERNEL); - if (state) { - state->base.rotation = DRM_MODE_ROTATE_0; - state->base.pixel_blend_mode = DRM_MODE_BLEND_PREMULTI; - state->base.alpha = DRM_BLEND_ALPHA_OPAQUE; - state->base.zpos = kplane->layer->base.id; - state->base.color_encoding = DRM_COLOR_YCBCR_BT601; - state->base.color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; - plane->state = &state->base; - plane->state->plane = plane; - } + if (state) + __drm_atomic_helper_plane_reset(plane, &state->base); } static struct drm_plane_state * @@ -265,6 +256,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms, formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl, layer->layer_type, &n_formats); + if (!formats) { + kfree(kplane); + return -ENOMEM; + } err = drm_universal_plane_init(&kms->base, plane, get_possible_crtcs(kms, c->pipeline), @@ -275,8 +270,10 @@ static int komeda_plane_add(struct komeda_kms_dev *kms, komeda_put_fourcc_list(formats); - if (err) - goto cleanup; + if (err) { + kfree(kplane); + return err; + } drm_plane_helper_add(plane, &komeda_plane_helper_funcs); diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c index 494075ddbef6..b5928b52e279 100644 --- a/drivers/gpu/drm/arm/malidp_crtc.c +++ b/drivers/gpu/drm/arm/malidp_crtc.c @@ -487,7 +487,10 @@ static void malidp_crtc_reset(struct drm_crtc *crtc) if (crtc->state) malidp_crtc_destroy_state(crtc, crtc->state); - __drm_atomic_helper_crtc_reset(crtc, &state->base); + if (state) + __drm_atomic_helper_crtc_reset(crtc, &state->base); + else + __drm_atomic_helper_crtc_reset(crtc, NULL); } static int malidp_crtc_enable_vblank(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 0562bdaac00c..81d9f5004025 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -310,17 +310,13 @@ static int malidp_se_check_scaling(struct malidp_plane *mp, static u32 malidp_get_pgsize_bitmap(struct malidp_plane *mp) { - u32 pgsize_bitmap = 0; + struct iommu_domain *mmu_dom; - if (iommu_present(&platform_bus_type)) { - struct iommu_domain *mmu_dom = - iommu_get_domain_for_dev(mp->base.dev->dev); + mmu_dom = iommu_get_domain_for_dev(mp->base.dev->dev); + if (mmu_dom) + return mmu_dom->pgsize_bitmap; - if (mmu_dom) - pgsize_bitmap = mmu_dom->pgsize_bitmap; - } - - return pgsize_bitmap; + return 0; } /* diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 007e5a282f67..c08ccb4b332b 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -32,6 +32,7 @@ config DRM_CHIPONE_ICN6211 select DRM_KMS_HELPER select DRM_MIPI_DSI select DRM_PANEL_BRIDGE + select REGMAP_I2C help ICN6211 is MIPI-DSI/RGB Converter bridge from chipone. @@ -77,6 +78,7 @@ config DRM_DISPLAY_CONNECTOR config DRM_ITE_IT6505 tristate "ITE IT6505 DisplayPort bridge" depends on OF + select DRM_DP_HELPER select DRM_KMS_HELPER select EXTCON help @@ -98,6 +100,19 @@ config DRM_LONTIUM_LT8912B Say M here if you want to support this hardware as a module. The module will be named "lontium-lt8912b". +config DRM_LONTIUM_LT9211 + tristate "Lontium LT9211 DSI/LVDS/DPI bridge" + depends on OF + select DRM_PANEL_BRIDGE + select DRM_KMS_HELPER + select DRM_MIPI_DSI + select REGMAP_I2C + help + Driver for Lontium LT9211 Single/Dual-Link DSI/LVDS or Single DPI + input to Single-link/Dual-Link DSI/LVDS or Single DPI output bridge + chip. + Please say Y if you have such hardware. + config DRM_LONTIUM_LT9611 tristate "Lontium LT9611 DSI/HDMI bridge" select SND_SOC_HDMI_CODEC if SND_SOC @@ -265,6 +280,7 @@ config DRM_TOSHIBA_TC358767 select DRM_DP_HELPER select DRM_KMS_HELPER select REGMAP_I2C + select DRM_MIPI_DSI select DRM_PANEL help Toshiba TC358767 eDP bridge chip driver. diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile index 425844c30495..b0edf2022fa0 100644 --- a/drivers/gpu/drm/bridge/Makefile +++ b/drivers/gpu/drm/bridge/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_DRM_CROS_EC_ANX7688) += cros-ec-anx7688.o obj-$(CONFIG_DRM_DISPLAY_CONNECTOR) += display-connector.o obj-$(CONFIG_DRM_ITE_IT6505) += ite-it6505.o obj-$(CONFIG_DRM_LONTIUM_LT8912B) += lontium-lt8912b.o +obj-$(CONFIG_DRM_LONTIUM_LT9211) += lontium-lt9211.o obj-$(CONFIG_DRM_LONTIUM_LT9611) += lontium-lt9611.o obj-$(CONFIG_DRM_LONTIUM_LT9611UXC) += lontium-lt9611uxc.o obj-$(CONFIG_DRM_LVDS_CODEC) += lvds-codec.o diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 005bf18682ff..b3f10c54e064 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1292,8 +1292,10 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) goto err_unregister_cec; adv7511->bridge.funcs = &adv7511_bridge_funcs; - adv7511->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID - | DRM_BRIDGE_OP_HPD; + adv7511->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID; + if (adv7511->i2c_main->irq) + adv7511->bridge.ops |= DRM_BRIDGE_OP_HPD; + adv7511->bridge.of_node = dev->of_node; adv7511->bridge.type = DRM_MODE_CONNECTOR_HDMIA; @@ -1313,6 +1315,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) adv7511_audio_exit(adv7511); drm_bridge_remove(&adv7511->bridge); err_unregister_cec: + cec_unregister_adapter(adv7511->cec_adap); i2c_unregister_device(adv7511->i2c_cec); clk_disable_unprepare(adv7511->cec_clk); err_i2c_unregister_packet: diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index eb590fb8e8d0..c5afccd46440 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -1119,9 +1119,7 @@ static int analogix_dp_get_modes(struct drm_connector *connector) return 0; } - pm_runtime_get_sync(dp->dev); edid = drm_get_edid(connector, &dp->aux.ddc); - pm_runtime_put(dp->dev); if (edid) { drm_connector_update_edid_property(&dp->connector, edid); @@ -1632,8 +1630,20 @@ static ssize_t analogix_dpaux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) { struct analogix_dp_device *dp = to_dp(aux); + int ret; - return analogix_dp_transfer(dp, msg); + pm_runtime_get_sync(dp->dev); + + ret = analogix_dp_detect_hpd(dp); + if (ret) + goto out; + + ret = analogix_dp_transfer(dp, msg); +out: + pm_runtime_mark_last_busy(dp->dev); + pm_runtime_put_autosuspend(dp->dev); + + return ret; } struct analogix_dp_device * @@ -1764,6 +1774,8 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) if (ret) return ret; + pm_runtime_use_autosuspend(dp->dev); + pm_runtime_set_autosuspend_delay(dp->dev, 100); pm_runtime_enable(dp->dev); ret = analogix_dp_create_bridge(drm_dev, dp); @@ -1775,6 +1787,7 @@ int analogix_dp_bind(struct analogix_dp_device *dp, struct drm_device *drm_dev) return 0; err_disable_pm_runtime: + pm_runtime_dont_use_autosuspend(dp->dev); pm_runtime_disable(dp->dev); drm_dp_aux_unregister(&dp->aux); @@ -1793,6 +1806,7 @@ void analogix_dp_unbind(struct analogix_dp_device *dp) } drm_dp_aux_unregister(&dp->aux); + pm_runtime_dont_use_autosuspend(dp->dev); pm_runtime_disable(dp->dev); } EXPORT_SYMBOL_GPL(analogix_dp_unbind); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 31ecf5626f1d..376da01243a3 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -874,7 +874,10 @@ static int anx7625_hdcp_enable(struct anx7625_data *ctx) } /* Read downstream capability */ - anx7625_aux_trans(ctx, DP_AUX_NATIVE_READ, 0x68028, 1, &bcap); + ret = anx7625_aux_trans(ctx, DP_AUX_NATIVE_READ, 0x68028, 1, &bcap); + if (ret < 0) + return ret; + if (!(bcap & 0x01)) { pr_warn("downstream not support HDCP 1.4, cap(%x).\n", bcap); return 0; @@ -921,12 +924,20 @@ static void anx7625_dp_start(struct anx7625_data *ctx) { int ret; struct device *dev = &ctx->client->dev; + u8 data; if (!ctx->display_timing_valid) { DRM_DEV_ERROR(dev, "mipi not set display timing yet.\n"); return; } + dev_dbg(dev, "set downstream sink into normal\n"); + /* Downstream sink enter into normal mode */ + data = 1; + ret = anx7625_aux_trans(ctx, DP_AUX_NATIVE_WRITE, 0x000600, 1, &data); + if (ret < 0) + dev_err(dev, "IO error : set sink into normal mode fail\n"); + /* Disable HDCP */ anx7625_write_and(ctx, ctx->i2c.rx_p1_client, 0xee, 0x9f); @@ -1475,12 +1486,12 @@ static void anx7625_dp_adjust_swing(struct anx7625_data *ctx) for (i = 0; i < ctx->pdata.dp_lane0_swing_reg_cnt; i++) anx7625_reg_write(ctx, ctx->i2c.tx_p1_client, DP_TX_LANE0_SWING_REG0 + i, - ctx->pdata.lane0_reg_data[i] & 0xFF); + ctx->pdata.lane0_reg_data[i]); for (i = 0; i < ctx->pdata.dp_lane1_swing_reg_cnt; i++) anx7625_reg_write(ctx, ctx->i2c.tx_p1_client, DP_TX_LANE1_SWING_REG0 + i, - ctx->pdata.lane1_reg_data[i] & 0xFF); + ctx->pdata.lane1_reg_data[i]); } static void dp_hpd_change_handler(struct anx7625_data *ctx, bool on) @@ -1587,8 +1598,8 @@ static int anx7625_get_swing_setting(struct device *dev, num_regs = DP_TX_SWING_REG_CNT; pdata->dp_lane0_swing_reg_cnt = num_regs; - of_property_read_u32_array(dev->of_node, "analogix,lane0-swing", - pdata->lane0_reg_data, num_regs); + of_property_read_u8_array(dev->of_node, "analogix,lane0-swing", + pdata->lane0_reg_data, num_regs); } if (of_get_property(dev->of_node, @@ -1597,8 +1608,8 @@ static int anx7625_get_swing_setting(struct device *dev, num_regs = DP_TX_SWING_REG_CNT; pdata->dp_lane1_swing_reg_cnt = num_regs; - of_property_read_u32_array(dev->of_node, "analogix,lane1-swing", - pdata->lane1_reg_data, num_regs); + of_property_read_u8_array(dev->of_node, "analogix,lane1-swing", + pdata->lane1_reg_data, num_regs); } return 0; @@ -1608,8 +1619,6 @@ static int anx7625_parse_dt(struct device *dev, struct anx7625_platform_data *pdata) { struct device_node *np = dev->of_node, *ep0; - struct drm_panel *panel; - int ret; int bus_type, mipi_lanes; anx7625_get_swing_setting(dev, pdata); @@ -1646,18 +1655,14 @@ static int anx7625_parse_dt(struct device *dev, if (of_property_read_bool(np, "analogix,audio-enable")) pdata->audio_en = 1; - ret = drm_of_find_panel_or_bridge(np, 1, 0, &panel, NULL); - if (ret < 0) { - if (ret == -ENODEV) + pdata->panel_bridge = devm_drm_of_get_bridge(dev, np, 1, 0); + if (IS_ERR(pdata->panel_bridge)) { + if (PTR_ERR(pdata->panel_bridge) == -ENODEV) return 0; - return ret; - } - if (!panel) - return -ENODEV; - pdata->panel_bridge = devm_drm_panel_bridge_add(dev, panel); - if (IS_ERR(pdata->panel_bridge)) return PTR_ERR(pdata->panel_bridge); + } + DRM_DEV_DEBUG_DRIVER(dev, "get panel node.\n"); return 0; @@ -1927,14 +1932,14 @@ static int anx7625_audio_get_eld(struct device *dev, void *data, struct anx7625_data *ctx = dev_get_drvdata(dev); if (!ctx->connector) { - dev_err(dev, "connector not initial\n"); - return -EINVAL; + /* Pass en empty ELD if connector not available */ + memset(buf, 0, len); + } else { + dev_dbg(dev, "audio copy eld\n"); + memcpy(buf, ctx->connector->eld, + min(sizeof(ctx->connector->eld), len)); } - dev_dbg(dev, "audio copy eld\n"); - memcpy(buf, ctx->connector->eld, - min(sizeof(ctx->connector->eld), len)); - return 0; } @@ -2011,7 +2016,8 @@ static int anx7625_attach_dsi(struct anx7625_data *ctx) dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE | - MIPI_DSI_MODE_VIDEO_HSE; + MIPI_DSI_MODE_VIDEO_HSE | + MIPI_DSI_HS_PKT_END_ALIGNED; ret = devm_mipi_dsi_attach(dev, dsi); if (ret) { @@ -2654,7 +2660,7 @@ static int anx7625_i2c_probe(struct i2c_client *client, if (ret) { if (ret != -EPROBE_DEFER) DRM_DEV_ERROR(dev, "fail to parse DT : %d\n", ret); - return ret; + goto free_wq; } if (anx7625_register_i2c_dummy_clients(platform, client) != 0) { @@ -2669,7 +2675,7 @@ static int anx7625_i2c_probe(struct i2c_client *client, pm_suspend_ignore_children(dev, true); ret = devm_add_action_or_reset(dev, anx7625_runtime_disable, dev); if (ret) - return ret; + goto free_wq; if (!platform->pdata.low_power_mode) { anx7625_disable_pd_protocol(platform); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.h b/drivers/gpu/drm/bridge/analogix/anx7625.h index edbbfe410a56..e257a84db962 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.h +++ b/drivers/gpu/drm/bridge/analogix/anx7625.h @@ -426,9 +426,9 @@ struct anx7625_platform_data { int mipi_lanes; int audio_en; int dp_lane0_swing_reg_cnt; - int lane0_reg_data[DP_TX_SWING_REG_CNT]; + u8 lane0_reg_data[DP_TX_SWING_REG_CNT]; int dp_lane1_swing_reg_cnt; - int lane1_reg_data[DP_TX_SWING_REG_CNT]; + u8 lane1_reg_data[DP_TX_SWING_REG_CNT]; u32 low_power_mode; struct device_node *mipi_host_node; }; diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c index d9b7f48b99fb..47dea657a752 100644 --- a/drivers/gpu/drm/bridge/chipone-icn6211.c +++ b/drivers/gpu/drm/bridge/chipone-icn6211.c @@ -11,12 +11,25 @@ #include #include +#include #include #include +#include #include -#include