Merge 9867cb1fd5 ("Merge tag 'jfs-5.11' of git://github.com/kleikamp/linux-shaggy") into android-mainline

Steps on the way to 5.11-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I58050cac7c91095375b3ea3b6dbbd35e6bcfe887
This commit is contained in:
Greg Kroah-Hartman 2020-12-18 16:33:24 +01:00
commit 15c84c847a
123 changed files with 7229 additions and 4837 deletions

View File

@ -19,6 +19,7 @@ properties:
- st,stm32mp151-pwr-mcu
- st,stm32-syscfg
- st,stm32-power-config
- st,stm32-tamp
- const: syscon
reg:

View File

@ -113,8 +113,8 @@ should be referenced as follows:
For the compatible strings below the following supplies are required:
"qcom,q6v5-pil"
"qcom,msm8916-mss-pil",
- cx-supply:
- mx-supply:
- cx-supply: (deprecated, use power domain instead)
- mx-supply: (deprecated, use power domain instead)
- pll-supply:
Usage: required
Value type: <phandle>
@ -123,9 +123,9 @@ For the compatible strings below the following supplies are required:
For the compatible string below the following supplies are required:
"qcom,msm8974-mss-pil"
- cx-supply:
- cx-supply: (deprecated, use power domain instead)
- mss-supply:
- mx-supply:
- mx-supply: (deprecated, use power domain instead)
- pll-supply:
Usage: required
Value type: <phandle>
@ -149,11 +149,11 @@ For the compatible string below the following supplies are required:
Usage: required
Value type: <stringlist>
Definition: The power-domains needed depend on the compatible string:
qcom,q6v5-pil:
qcom,ipq8074-wcss-pil:
no power-domain names required
qcom,q6v5-pil:
qcom,msm8916-mss-pil:
qcom,msm8974-mss-pil:
no power-domain names required
qcom,msm8996-mss-pil:
qcom,msm8998-mss-pil:
must be "cx", "mx"

View File

@ -34,14 +34,25 @@ on the Qualcomm WCNSS core.
Definition: should be "wdog", "fatal", optionally followed by "ready",
"handover", "stop-ack"
- vddmx-supply:
- vddcx-supply:
- vddmx-supply: (deprecated for qcom,pronto-v1/2-pil)
- vddcx-supply: (deprecated for qcom,pronto-v1/2-pil)
- vddpx-supply:
Usage: required
Value type: <phandle>
Definition: reference to the regulators to be held on behalf of the
booting of the WCNSS core
- power-domains:
Usage: required (for qcom,pronto-v1/2-pil)
Value type: <phandle>
Definition: reference to the power domains to be held on behalf of the
booting of the WCNSS core
- power-domain-names:
Usage: required (for qcom,pronto-v1/2-pil)
Value type: <stringlist>
Definition: must be "cx", "mx"
- qcom,smem-states:
Usage: optional
Value type: <prop-encoded-array>
@ -111,8 +122,9 @@ pronto@fb204000 {
<&wcnss_smp2p_slave 3 0>;
interrupt-names = "wdog", "fatal", "ready", "handover", "stop-ack";
vddmx-supply = <&pm8841_s1>;
vddcx-supply = <&pm8841_s2>;
power-domains = <&rpmpd MSM8974_VDDCX>, <&rpmpd MSM8974_VDDMX>;
power-domain-names = "cx", "mx";
vddpx-supply = <&pm8941_s3>;
qcom,smem-states = <&wcnss_smp2p_out 0>;

View File

@ -38,9 +38,6 @@ properties:
st,syscfg-tz:
description:
Reference to the system configuration which holds the RCC trust zone mode
- Phandle of syscon block.
- The offset of the RCC trust zone mode register.
- The field mask of the RCC trust zone mode.
$ref: "/schemas/types.yaml#/definitions/phandle-array"
maxItems: 1
@ -91,9 +88,19 @@ properties:
$ref: "/schemas/types.yaml#/definitions/phandle-array"
description: |
Reference to the system configuration which holds the remote
1st cell: phandle to syscon block
2nd cell: register offset containing the deep sleep setting
3rd cell: register bitmask for the deep sleep bit
maxItems: 1
st,syscfg-m4-state:
$ref: "/schemas/types.yaml#/definitions/phandle-array"
description: |
Reference to the tamp register which exposes the Cortex-M4 state.
maxItems: 1
st,syscfg-rsc-tbl:
$ref: "/schemas/types.yaml#/definitions/phandle-array"
description: |
Reference to the tamp register which references the Cortex-M4
resource table address.
maxItems: 1
st,auto-boot:
@ -122,6 +129,8 @@ examples:
resets = <&rcc MCU_R>;
st,syscfg-holdboot = <&rcc 0x10C 0x1>;
st,syscfg-tz = <&rcc 0x000 0x1>;
st,syscfg-rsc-tbl = <&tamp 0x144 0xFFFFFFFF>;
st,syscfg-m4-state = <&tamp 0x148 0xFFFFFFFF>;
};
...

View File

@ -32,6 +32,7 @@ properties:
enum:
- ti,am654-r5fss
- ti,j721e-r5fss
- ti,j7200-r5fss
power-domains:
description: |
@ -95,6 +96,7 @@ patternProperties:
enum:
- ti,am654-r5f
- ti,j721e-r5f
- ti,j7200-r5f
reg:
items:

View File

@ -0,0 +1,214 @@
# SPDX-License-Identifier: (GPL-2.0-only or BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/remoteproc/ti,pru-rproc.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: TI Programmable Realtime Unit (PRU) cores
maintainers:
- Suman Anna <s-anna@ti.com>
description: |
Each Programmable Real-Time Unit and Industrial Communication Subsystem
(PRU-ICSS or PRUSS) has two 32-bit load/store RISC CPU cores called
Programmable Real-Time Units (PRUs), each represented by a node. Each PRU
core has a dedicated Instruction RAM, Control and Debug register sets, and
use the Data RAMs present within the PRU-ICSS for code execution.
The K3 SoCs containing ICSSG v1.0 (eg: AM65x SR1.0) also have two Auxiliary
PRU cores called RTUs with slightly different IP integration. The K3 SoCs
containing the revised ICSSG v1.1 (eg: J721E, AM65x SR2.0) have an extra two
auxiliary Transmit PRU cores called Tx_PRUs that augment the PRUs. Each RTU
or Tx_PRU core can also be used independently like a PRU, or alongside a
corresponding PRU core to provide/implement auxiliary functionality/support.
Each PRU, RTU or Tx_PRU core node should be defined as a child node of the
corresponding PRU-ICSS node. Each node can optionally be rendered inactive by
using the standard DT string property, "status".
Please see the overall PRU-ICSS bindings document for additional details
including a complete example,
Documentation/devicetree/bindings/soc/ti/ti,pruss.yaml
properties:
compatible:
enum:
- ti,am3356-pru # for AM335x SoC family (AM3356+ SoCs only)
- ti,am4376-pru # for AM437x SoC family (AM4376+ SoCs only)
- ti,am5728-pru # for AM57xx SoC family
- ti,k2g-pru # for 66AK2G SoC family
- ti,am654-pru # for PRUs in K3 AM65x SoC family
- ti,am654-rtu # for RTUs in K3 AM65x SoC family
- ti,am654-tx-pru # for Tx_PRUs in K3 AM65x SR2.0 SoCs
- ti,j721e-pru # for PRUs in K3 J721E SoC family
- ti,j721e-rtu # for RTUs in K3 J721E SoC family
- ti,j721e-tx-pru # for Tx_PRUs in K3 J721E SoC family
reg:
items:
- description: Address and Size of the PRU Instruction RAM
- description: Address and Size of the PRU CTRL sub-module registers
- description: Address and Size of the PRU Debug sub-module registers
reg-names:
items:
- const: iram
- const: control
- const: debug
firmware-name:
description: |
Should contain the name of the default firmware image
file located on the firmware search path.
if:
properties:
compatible:
enum:
- ti,am654-rtu
- ti,j721e-rtu
then:
properties:
$nodename:
pattern: "^rtu@[0-9a-f]+$"
else:
if:
properties:
compatible:
enum:
- ti,am654-tx-pru
- ti,j721e-tx-pru
then:
properties:
$nodename:
pattern: "^txpru@[0-9a-f]+"
else:
properties:
$nodename:
pattern: "^pru@[0-9a-f]+$"
required:
- compatible
- reg
- reg-names
- firmware-name
additionalProperties: false
examples:
- |
/* AM33xx PRU-ICSS */
pruss_tm: target-module@300000 { /* 0x4a300000, ap 9 04.0 */
compatible = "ti,sysc-pruss", "ti,sysc";
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x0 0x300000 0x80000>;
pruss: pruss@0 {
compatible = "ti,am3356-pruss";
reg = <0x0 0x80000>;
#address-cells = <1>;
#size-cells = <1>;
ranges;
pruss_mem: memories@0 {
reg = <0x0 0x2000>,
<0x2000 0x2000>,
<0x10000 0x3000>;
reg-names = "dram0", "dram1", "shrdram2";
};
pru0: pru@34000 {
compatible = "ti,am3356-pru";
reg = <0x34000 0x2000>,
<0x22000 0x400>,
<0x22400 0x100>;
reg-names = "iram", "control", "debug";
firmware-name = "am335x-pru0-fw";
};
pru1: pru@38000 {
compatible = "ti,am3356-pru";
reg = <0x38000 0x2000>,
<0x24000 0x400>,
<0x24400 0x100>;
reg-names = "iram", "control", "debug";
firmware-name = "am335x-pru1-fw";
};
};
};
- |
/* AM65x SR2.0 ICSSG */
#include <dt-bindings/soc/ti,sci_pm_domain.h>
icssg0: icssg@b000000 {
compatible = "ti,am654-icssg";
reg = <0xb000000 0x80000>;
power-domains = <&k3_pds 62 TI_SCI_PD_EXCLUSIVE>;
#address-cells = <1>;
#size-cells = <1>;
ranges = <0x0 0xb000000 0x80000>;
icssg0_mem: memories@0 {
reg = <0x0 0x2000>,
<0x2000 0x2000>,
<0x10000 0x10000>;
reg-names = "dram0", "dram1", "shrdram2";
};
pru0_0: pru@34000 {
compatible = "ti,am654-pru";
reg = <0x34000 0x4000>,
<0x22000 0x100>,
<0x22400 0x100>;
reg-names = "iram", "control", "debug";
firmware-name = "am65x-pru0_0-fw";
};
rtu0_0: rtu@4000 {
compatible = "ti,am654-rtu";
reg = <0x4000 0x2000>,
<0x23000 0x100>,
<0x23400 0x100>;
reg-names = "iram", "control", "debug";
firmware-name = "am65x-rtu0_0-fw";
};
tx_pru0_0: txpru@a000 {
compatible = "ti,am654-tx-pru";
reg = <0xa000 0x1800>,
<0x25000 0x100>,
<0x25400 0x100>;
reg-names = "iram", "control", "debug";
firmware-name = "am65x-txpru0_0-fw";
};
pru0_1: pru@38000 {
compatible = "ti,am654-pru";
reg = <0x38000 0x4000>,
<0x24000 0x100>,
<0x24400 0x100>;
reg-names = "iram", "control", "debug";
firmware-name = "am65x-pru0_1-fw";
};
rtu0_1: rtu@6000 {
compatible = "ti,am654-rtu";
reg = <0x6000 0x2000>,
<0x23800 0x100>,
<0x23c00 0x100>;
reg-names = "iram", "control", "debug";
firmware-name = "am65x-rtu0_1-fw";
};
tx_pru0_1: txpru@c000 {
compatible = "ti,am654-tx-pru";
reg = <0xc000 0x1800>,
<0x25800 0x100>,
<0x25c00 0x100>;
reg-names = "iram", "control", "debug";
firmware-name = "am65x-txpru0_1-fw";
};
};

View File

@ -94,7 +94,7 @@ static struct platform_driver sirf_hwspinlock_driver = {
.probe = sirf_hwspinlock_probe,
.driver = {
.name = "atlas7_hwspinlock",
.of_match_table = of_match_ptr(sirf_hwpinlock_ids),
.of_match_table = sirf_hwpinlock_ids,
},
};

View File

@ -4,7 +4,6 @@
* Copyright (C) 2017 Spreadtrum - http://www.spreadtrum.com
*/
#include <linux/bitops.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/device.h>
@ -15,7 +14,6 @@
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include "hwspinlock_internal.h"
@ -148,21 +146,10 @@ static struct platform_driver sprd_hwspinlock_driver = {
.probe = sprd_hwspinlock_probe,
.driver = {
.name = "sprd_hwspinlock",
.of_match_table = of_match_ptr(sprd_hwspinlock_of_match),
.of_match_table = sprd_hwspinlock_of_match,
},
};
static int __init sprd_hwspinlock_init(void)
{
return platform_driver_register(&sprd_hwspinlock_driver);
}
postcore_initcall(sprd_hwspinlock_init);
static void __exit sprd_hwspinlock_exit(void)
{
platform_driver_unregister(&sprd_hwspinlock_driver);
}
module_exit(sprd_hwspinlock_exit);
module_platform_driver(sprd_hwspinlock_driver);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Hardware spinlock driver for Spreadtrum");

View File

@ -125,6 +125,18 @@ config KEYSTONE_REMOTEPROC
It's safe to say N here if you're not interested in the Keystone
DSPs or just want to use a bare minimum kernel.
config PRU_REMOTEPROC
tristate "TI PRU remoteproc support"
depends on TI_PRUSS
default TI_PRUSS
help
Support for TI PRU remote processors present within a PRU-ICSS
subsystem via the remote processor framework.
Say Y or M here to support the Programmable Realtime Unit (PRU)
processors on various TI SoCs. It's safe to say N here if you're
not interested in the PRU or if you are unsure.
config QCOM_PIL_INFO
tristate
@ -183,7 +195,7 @@ config QCOM_Q6V5_PAS
select QCOM_RPROC_COMMON
select QCOM_SCM
help
Say y here to support the TrustZone based Peripherial Image Loader
Say y here to support the TrustZone based Peripheral Image Loader
for the Qualcomm Hexagon v5 based remote processors. This is commonly
used to control subsystems such as ADSP, Compute and Sensor.

View File

@ -18,6 +18,7 @@ obj-$(CONFIG_OMAP_REMOTEPROC) += omap_remoteproc.o
obj-$(CONFIG_WKUP_M3_RPROC) += wkup_m3_rproc.o
obj-$(CONFIG_DA8XX_REMOTEPROC) += da8xx_remoteproc.o
obj-$(CONFIG_KEYSTONE_REMOTEPROC) += keystone_remoteproc.o
obj-$(CONFIG_PRU_REMOTEPROC) += pru_rproc.o
obj-$(CONFIG_QCOM_PIL_INFO) += qcom_pil_info.o
obj-$(CONFIG_QCOM_RPROC_COMMON) += qcom_common.o
obj-$(CONFIG_QCOM_Q6V5_COMMON) += qcom_q6v5.o

View File

@ -135,7 +135,7 @@ static void *ingenic_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
return (__force void *)va;
}
static struct rproc_ops ingenic_rproc_ops = {
static const struct rproc_ops ingenic_rproc_ops = {
.prepare = ingenic_rproc_prepare,
.unprepare = ingenic_rproc_unprepare,
.start = ingenic_rproc_start,

View File

@ -32,22 +32,22 @@
#define MT8183_SCP_CACHESIZE_8KB BIT(8)
#define MT8183_SCP_CACHE_CON_WAYEN BIT(10)
#define MT8192_L2TCM_SRAM_PD_0 0x210C0
#define MT8192_L2TCM_SRAM_PD_1 0x210C4
#define MT8192_L2TCM_SRAM_PD_2 0x210C8
#define MT8192_L1TCM_SRAM_PDN 0x2102C
#define MT8192_CPU0_SRAM_PD 0x21080
#define MT8192_L2TCM_SRAM_PD_0 0x10C0
#define MT8192_L2TCM_SRAM_PD_1 0x10C4
#define MT8192_L2TCM_SRAM_PD_2 0x10C8
#define MT8192_L1TCM_SRAM_PDN 0x102C
#define MT8192_CPU0_SRAM_PD 0x1080
#define MT8192_SCP2APMCU_IPC_SET 0x24080
#define MT8192_SCP2APMCU_IPC_CLR 0x24084
#define MT8192_SCP2APMCU_IPC_SET 0x4080
#define MT8192_SCP2APMCU_IPC_CLR 0x4084
#define MT8192_SCP_IPC_INT_BIT BIT(0)
#define MT8192_SCP2SPM_IPC_CLR 0x24094
#define MT8192_GIPC_IN_SET 0x24098
#define MT8192_SCP2SPM_IPC_CLR 0x4094
#define MT8192_GIPC_IN_SET 0x4098
#define MT8192_HOST_IPC_INT_BIT BIT(0)
#define MT8192_CORE0_SW_RSTN_CLR 0x30000
#define MT8192_CORE0_SW_RSTN_SET 0x30004
#define MT8192_CORE0_WDT_CFG 0x30034
#define MT8192_CORE0_SW_RSTN_CLR 0x10000
#define MT8192_CORE0_SW_RSTN_SET 0x10004
#define MT8192_CORE0_WDT_CFG 0x10034
#define SCP_FW_VER_LEN 32
#define SCP_SHARE_BUFFER_SIZE 288
@ -78,6 +78,8 @@ struct mtk_scp_of_data {
u32 host_to_scp_reg;
u32 host_to_scp_int_bit;
size_t ipi_buf_offset;
};
struct mtk_scp {
@ -99,7 +101,7 @@ struct mtk_scp {
bool ipi_id_ack[SCP_IPI_MAX];
wait_queue_head_t ack_wq;
void __iomem *cpu_addr;
void *cpu_addr;
dma_addr_t dma_addr;
size_t dram_size;

View File

@ -21,7 +21,7 @@
#include "remoteproc_internal.h"
#define MAX_CODE_SIZE 0x500000
#define SCP_FW_END 0x7C000
#define SECTION_NAME_IPI_BUFFER ".ipi_buffer"
/**
* scp_get() - get a reference to SCP.
@ -119,16 +119,29 @@ static void scp_ipi_handler(struct mtk_scp *scp)
wake_up(&scp->ack_wq);
}
static int scp_ipi_init(struct mtk_scp *scp)
{
size_t send_offset = SCP_FW_END - sizeof(struct mtk_share_obj);
size_t recv_offset = send_offset - sizeof(struct mtk_share_obj);
static int scp_elf_read_ipi_buf_addr(struct mtk_scp *scp,
const struct firmware *fw,
size_t *offset);
/* shared buffer initialization */
scp->recv_buf =
(struct mtk_share_obj __iomem *)(scp->sram_base + recv_offset);
scp->send_buf =
(struct mtk_share_obj __iomem *)(scp->sram_base + send_offset);
static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw)
{
int ret;
size_t offset;
/* read the ipi buf addr from FW itself first */
ret = scp_elf_read_ipi_buf_addr(scp, fw, &offset);
if (ret) {
/* use default ipi buf addr if the FW doesn't have it */
offset = scp->data->ipi_buf_offset;
if (!offset)
return ret;
}
dev_info(scp->dev, "IPI buf addr %#010zx\n", offset);
scp->recv_buf = (struct mtk_share_obj __iomem *)
(scp->sram_base + offset);
scp->send_buf = (struct mtk_share_obj __iomem *)
(scp->sram_base + offset + sizeof(*scp->recv_buf));
memset_io(scp->recv_buf, 0, sizeof(*scp->recv_buf));
memset_io(scp->send_buf, 0, sizeof(*scp->send_buf));
@ -234,12 +247,14 @@ static int scp_elf_load_segments(struct rproc *rproc, const struct firmware *fw)
u32 offset = phdr->p_offset;
void __iomem *ptr;
if (phdr->p_type != PT_LOAD)
continue;
dev_dbg(dev, "phdr: type %d da 0x%x memsz 0x%x filesz 0x%x\n",
phdr->p_type, da, memsz, filesz);
if (phdr->p_type != PT_LOAD)
continue;
if (!filesz)
continue;
if (filesz > memsz) {
dev_err(dev, "bad phdr filesz 0x%x memsz 0x%x\n",
filesz, memsz);
@ -263,14 +278,38 @@ static int scp_elf_load_segments(struct rproc *rproc, const struct firmware *fw)
}
/* put the segment where the remote processor expects it */
if (phdr->p_filesz)
scp_memcpy_aligned(ptr, elf_data + phdr->p_offset,
filesz);
scp_memcpy_aligned(ptr, elf_data + phdr->p_offset, filesz);
}
return ret;
}
static int scp_elf_read_ipi_buf_addr(struct mtk_scp *scp,
const struct firmware *fw,
size_t *offset)
{
struct elf32_hdr *ehdr;
struct elf32_shdr *shdr, *shdr_strtab;
int i;
const u8 *elf_data = fw->data;
const char *strtab;
ehdr = (struct elf32_hdr *)elf_data;
shdr = (struct elf32_shdr *)(elf_data + ehdr->e_shoff);
shdr_strtab = shdr + ehdr->e_shstrndx;
strtab = (const char *)(elf_data + shdr_strtab->sh_offset);
for (i = 0; i < ehdr->e_shnum; i++, shdr++) {
if (strcmp(strtab + shdr->sh_name,
SECTION_NAME_IPI_BUFFER) == 0) {
*offset = shdr->sh_addr;
return 0;
}
}
return -ENOENT;
}
static int mt8183_scp_before_load(struct mtk_scp *scp)
{
/* Clear SCP to host interrupt */
@ -298,7 +337,7 @@ static int mt8183_scp_before_load(struct mtk_scp *scp)
return 0;
}
static void mt8192_power_on_sram(void *addr)
static void mt8192_power_on_sram(void __iomem *addr)
{
int i;
@ -307,7 +346,7 @@ static void mt8192_power_on_sram(void *addr)
writel(0, addr);
}
static void mt8192_power_off_sram(void *addr)
static void mt8192_power_off_sram(void __iomem *addr)
{
int i;
@ -350,14 +389,32 @@ static int scp_load(struct rproc *rproc, const struct firmware *fw)
ret = scp->data->scp_before_load(scp);
if (ret < 0)
return ret;
goto leave;
ret = scp_elf_load_segments(rproc, fw);
leave:
clk_disable_unprepare(scp->clk);
return ret;
}
static int scp_parse_fw(struct rproc *rproc, const struct firmware *fw)
{
struct mtk_scp *scp = rproc->priv;
struct device *dev = scp->dev;
int ret;
ret = clk_prepare_enable(scp->clk);
if (ret) {
dev_err(dev, "failed to enable clocks\n");
return ret;
}
ret = scp_ipi_init(scp, fw);
clk_disable_unprepare(scp->clk);
return ret;
}
static int scp_start(struct rproc *rproc)
{
struct mtk_scp *scp = (struct mtk_scp *)rproc->priv;
@ -408,12 +465,12 @@ static void *scp_da_to_va(struct rproc *rproc, u64 da, size_t len)
if (da < scp->sram_size) {
offset = da;
if (offset >= 0 && (offset + len) < scp->sram_size)
if (offset >= 0 && (offset + len) <= scp->sram_size)
return (void __force *)scp->sram_base + offset;
} else if (scp->dram_size) {
offset = da - scp->dma_addr;
if (offset >= 0 && (offset + len) < scp->dram_size)
return (void __force *)scp->cpu_addr + offset;
if (offset >= 0 && (offset + len) <= scp->dram_size)
return scp->cpu_addr + offset;
}
return NULL;
@ -461,6 +518,7 @@ static const struct rproc_ops scp_ops = {
.stop = scp_stop,
.load = scp_load,
.da_to_va = scp_da_to_va,
.parse_fw = scp_parse_fw,
};
/**
@ -680,19 +738,6 @@ static int scp_probe(struct platform_device *pdev)
goto release_dev_mem;
}
ret = clk_prepare_enable(scp->clk);
if (ret) {
dev_err(dev, "failed to enable clocks\n");
goto release_dev_mem;
}
ret = scp_ipi_init(scp);
clk_disable_unprepare(scp->clk);
if (ret) {
dev_err(dev, "Failed to init ipi\n");
goto release_dev_mem;
}
/* register SCP initialization IPI */
ret = scp_ipi_register(scp, SCP_IPI_INIT, scp_init_ipi_handler, scp);
if (ret) {
@ -760,6 +805,7 @@ static const struct mtk_scp_of_data mt8183_of_data = {
.scp_stop = mt8183_scp_stop,
.host_to_scp_reg = MT8183_HOST_TO_SCP,
.host_to_scp_int_bit = MT8183_HOST_IPC_INT_BIT,
.ipi_buf_offset = 0x7bdb0,
};
static const struct mtk_scp_of_data mt8192_of_data = {
@ -784,7 +830,7 @@ static struct platform_driver mtk_scp_driver = {
.remove = scp_remove,
.driver = {
.name = "mtk-scp",
.of_match_table = of_match_ptr(mtk_scp_of_match),
.of_match_table = mtk_scp_of_match,
},
};

View File

@ -0,0 +1,875 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* PRU-ICSS remoteproc driver for various TI SoCs
*
* Copyright (C) 2014-2020 Texas Instruments Incorporated - https://www.ti.com/
*
* Author(s):
* Suman Anna <s-anna@ti.com>
* Andrew F. Davis <afd@ti.com>
* Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org> for Texas Instruments
*/
#include <linux/bitops.h>
#include <linux/debugfs.h>
#include <linux/irqdomain.h>
#include <linux/module.h>
#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/pruss_driver.h>
#include <linux/remoteproc.h>
#include "remoteproc_internal.h"
#include "remoteproc_elf_helpers.h"
#include "pru_rproc.h"
/* PRU_ICSS_PRU_CTRL registers */
#define PRU_CTRL_CTRL 0x0000
#define PRU_CTRL_STS 0x0004
#define PRU_CTRL_WAKEUP_EN 0x0008
#define PRU_CTRL_CYCLE 0x000C
#define PRU_CTRL_STALL 0x0010
#define PRU_CTRL_CTBIR0 0x0020
#define PRU_CTRL_CTBIR1 0x0024
#define PRU_CTRL_CTPPR0 0x0028
#define PRU_CTRL_CTPPR1 0x002C
/* CTRL register bit-fields */
#define CTRL_CTRL_SOFT_RST_N BIT(0)
#define CTRL_CTRL_EN BIT(1)
#define CTRL_CTRL_SLEEPING BIT(2)
#define CTRL_CTRL_CTR_EN BIT(3)
#define CTRL_CTRL_SINGLE_STEP BIT(8)
#define CTRL_CTRL_RUNSTATE BIT(15)
/* PRU_ICSS_PRU_DEBUG registers */
#define PRU_DEBUG_GPREG(x) (0x0000 + (x) * 4)
#define PRU_DEBUG_CT_REG(x) (0x0080 + (x) * 4)
/* PRU/RTU/Tx_PRU Core IRAM address masks */
#define PRU_IRAM_ADDR_MASK 0x3ffff
#define PRU0_IRAM_ADDR_MASK 0x34000
#define PRU1_IRAM_ADDR_MASK 0x38000
#define RTU0_IRAM_ADDR_MASK 0x4000
#define RTU1_IRAM_ADDR_MASK 0x6000
#define TX_PRU0_IRAM_ADDR_MASK 0xa000
#define TX_PRU1_IRAM_ADDR_MASK 0xc000
/* PRU device addresses for various type of PRU RAMs */
#define PRU_IRAM_DA 0 /* Instruction RAM */
#define PRU_PDRAM_DA 0 /* Primary Data RAM */
#define PRU_SDRAM_DA 0x2000 /* Secondary Data RAM */
#define PRU_SHRDRAM_DA 0x10000 /* Shared Data RAM */
#define MAX_PRU_SYS_EVENTS 160
/**
* enum pru_iomem - PRU core memory/register range identifiers
*
* @PRU_IOMEM_IRAM: PRU Instruction RAM range
* @PRU_IOMEM_CTRL: PRU Control register range
* @PRU_IOMEM_DEBUG: PRU Debug register range
* @PRU_IOMEM_MAX: just keep this one at the end
*/
enum pru_iomem {
PRU_IOMEM_IRAM = 0,
PRU_IOMEM_CTRL,
PRU_IOMEM_DEBUG,
PRU_IOMEM_MAX,
};
/**
* enum pru_type - PRU core type identifier
*
* @PRU_TYPE_PRU: Programmable Real-time Unit
* @PRU_TYPE_RTU: Auxiliary Programmable Real-Time Unit
* @PRU_TYPE_TX_PRU: Transmit Programmable Real-Time Unit
* @PRU_TYPE_MAX: just keep this one at the end
*/
enum pru_type {
PRU_TYPE_PRU = 0,
PRU_TYPE_RTU,
PRU_TYPE_TX_PRU,
PRU_TYPE_MAX,
};
/**
* struct pru_private_data - device data for a PRU core
* @type: type of the PRU core (PRU, RTU, Tx_PRU)
* @is_k3: flag used to identify the need for special load handling
*/
struct pru_private_data {
enum pru_type type;
unsigned int is_k3 : 1;
};
/**
* struct pru_rproc - PRU remoteproc structure
* @id: id of the PRU core within the PRUSS
* @dev: PRU core device pointer
* @pruss: back-reference to parent PRUSS structure
* @rproc: remoteproc pointer for this PRU core
* @data: PRU core specific data
* @mem_regions: data for each of the PRU memory regions
* @fw_name: name of firmware image used during loading
* @mapped_irq: virtual interrupt numbers of created fw specific mapping
* @pru_interrupt_map: pointer to interrupt mapping description (firmware)
* @pru_interrupt_map_sz: pru_interrupt_map size
* @dbg_single_step: debug state variable to set PRU into single step mode
* @dbg_continuous: debug state variable to restore PRU execution mode
* @evt_count: number of mapped events
*/
struct pru_rproc {
int id;
struct device *dev;
struct pruss *pruss;
struct rproc *rproc;
const struct pru_private_data *data;
struct pruss_mem_region mem_regions[PRU_IOMEM_MAX];
const char *fw_name;
unsigned int *mapped_irq;
struct pru_irq_rsc *pru_interrupt_map;
size_t pru_interrupt_map_sz;
u32 dbg_single_step;
u32 dbg_continuous;
u8 evt_count;
};
static inline u32 pru_control_read_reg(struct pru_rproc *pru, unsigned int reg)
{
return readl_relaxed(pru->mem_regions[PRU_IOMEM_CTRL].va + reg);
}
static inline
void pru_control_write_reg(struct pru_rproc *pru, unsigned int reg, u32 val)
{
writel_relaxed(val, pru->mem_regions[PRU_IOMEM_CTRL].va + reg);
}
static inline u32 pru_debug_read_reg(struct pru_rproc *pru, unsigned int reg)
{
return readl_relaxed(pru->mem_regions[PRU_IOMEM_DEBUG].va + reg);
}
static int regs_show(struct seq_file *s, void *data)
{
struct rproc *rproc = s->private;
struct pru_rproc *pru = rproc->priv;
int i, nregs = 32;
u32 pru_sts;
int pru_is_running;
seq_puts(s, "============== Control Registers ==============\n");
seq_printf(s, "CTRL := 0x%08x\n",
pru_control_read_reg(pru, PRU_CTRL_CTRL));
pru_sts = pru_control_read_reg(pru, PRU_CTRL_STS);
seq_printf(s, "STS (PC) := 0x%08x (0x%08x)\n", pru_sts, pru_sts << 2);
seq_printf(s, "WAKEUP_EN := 0x%08x\n",
pru_control_read_reg(pru, PRU_CTRL_WAKEUP_EN));
seq_printf(s, "CYCLE := 0x%08x\n",
pru_control_read_reg(pru, PRU_CTRL_CYCLE));
seq_printf(s, "STALL := 0x%08x\n",
pru_control_read_reg(pru, PRU_CTRL_STALL));
seq_printf(s, "CTBIR0 := 0x%08x\n",
pru_control_read_reg(pru, PRU_CTRL_CTBIR0));
seq_printf(s, "CTBIR1 := 0x%08x\n",
pru_control_read_reg(pru, PRU_CTRL_CTBIR1));
seq_printf(s, "CTPPR0 := 0x%08x\n",
pru_control_read_reg(pru, PRU_CTRL_CTPPR0));
seq_printf(s, "CTPPR1 := 0x%08x\n",
pru_control_read_reg(pru, PRU_CTRL_CTPPR1));
seq_puts(s, "=============== Debug Registers ===============\n");
pru_is_running = pru_control_read_reg(pru, PRU_CTRL_CTRL) &
CTRL_CTRL_RUNSTATE;
if (pru_is_running) {
seq_puts(s, "PRU is executing, cannot print/access debug registers.\n");
return 0;
}
for (i = 0; i < nregs; i++) {
seq_printf(s, "GPREG%-2d := 0x%08x\tCT_REG%-2d := 0x%08x\n",
i, pru_debug_read_reg(pru, PRU_DEBUG_GPREG(i)),
i, pru_debug_read_reg(pru, PRU_DEBUG_CT_REG(i)));
}
return 0;
}
DEFINE_SHOW_ATTRIBUTE(regs);
/*
* Control PRU single-step mode
*
* This is a debug helper function used for controlling the single-step
* mode of the PRU. The PRU Debug registers are not accessible when the
* PRU is in RUNNING state.
*
* Writing a non-zero value sets the PRU into single-step mode irrespective
* of its previous state. The PRU mode is saved only on the first set into
* a single-step mode. Writing a zero value will restore the PRU into its
* original mode.
*/
static int pru_rproc_debug_ss_set(void *data, u64 val)
{
struct rproc *rproc = data;
struct pru_rproc *pru = rproc->priv;
u32 reg_val;
val = val ? 1 : 0;
if (!val && !pru->dbg_single_step)
return 0;
reg_val = pru_control_read_reg(pru, PRU_CTRL_CTRL);
if (val && !pru->dbg_single_step)
pru->dbg_continuous = reg_val;
if (val)
reg_val |= CTRL_CTRL_SINGLE_STEP | CTRL_CTRL_EN;
else
reg_val = pru->dbg_continuous;
pru->dbg_single_step = val;
pru_control_write_reg(pru, PRU_CTRL_CTRL, reg_val);
return 0;
}
static int pru_rproc_debug_ss_get(void *data, u64 *val)
{
struct rproc *rproc = data;
struct pru_rproc *pru = rproc->priv;
*val = pru->dbg_single_step;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(pru_rproc_debug_ss_fops, pru_rproc_debug_ss_get,
pru_rproc_debug_ss_set, "%llu\n");
/*
* Create PRU-specific debugfs entries
*
* The entries are created only if the parent remoteproc debugfs directory
* exists, and will be cleaned up by the remoteproc core.
*/
static void pru_rproc_create_debug_entries(struct rproc *rproc)
{
if (!rproc->dbg_dir)
return;
debugfs_create_file("regs", 0400, rproc->dbg_dir,
rproc, &regs_fops);
debugfs_create_file("single_step", 0600, rproc->dbg_dir,
rproc, &pru_rproc_debug_ss_fops);
}
static void pru_dispose_irq_mapping(struct pru_rproc *pru)
{
while (pru->evt_count--) {
if (pru->mapped_irq[pru->evt_count] > 0)
irq_dispose_mapping(pru->mapped_irq[pru->evt_count]);
}
kfree(pru->mapped_irq);
}
/*
* Parse the custom PRU interrupt map resource and configure the INTC
* appropriately.
*/
static int pru_handle_intrmap(struct rproc *rproc)
{
struct device *dev = rproc->dev.parent;
struct pru_rproc *pru = rproc->priv;
struct pru_irq_rsc *rsc = pru->pru_interrupt_map;
struct irq_fwspec fwspec;
struct device_node *irq_parent;
int i, ret = 0;
/* not having pru_interrupt_map is not an error */
if (!rsc)
return 0;
/* currently supporting only type 0 */
if (rsc->type != 0) {
dev_err(dev, "unsupported rsc type: %d\n", rsc->type);
return -EINVAL;
}
if (rsc->num_evts > MAX_PRU_SYS_EVENTS)
return -EINVAL;
if (sizeof(*rsc) + rsc->num_evts * sizeof(struct pruss_int_map) !=
pru->pru_interrupt_map_sz)
return -EINVAL;
pru->evt_count = rsc->num_evts;
pru->mapped_irq = kcalloc(pru->evt_count, sizeof(unsigned int),
GFP_KERNEL);
if (!pru->mapped_irq)
return -ENOMEM;
/*
* parse and fill in system event to interrupt channel and
* channel-to-host mapping
*/
irq_parent = of_irq_find_parent(pru->dev->of_node);
if (!irq_parent) {
kfree(pru->mapped_irq);
return -ENODEV;
}
fwspec.fwnode = of_node_to_fwnode(irq_parent);
fwspec.param_count = 3;
for (i = 0; i < pru->evt_count; i++) {
fwspec.param[0] = rsc->pru_intc_map[i].event;
fwspec.param[1] = rsc->pru_intc_map[i].chnl;
fwspec.param[2] = rsc->pru_intc_map[i].host;
dev_dbg(dev, "mapping%d: event %d, chnl %d, host %d\n",
i, fwspec.param[0], fwspec.param[1], fwspec.param[2]);
pru->mapped_irq[i] = irq_create_fwspec_mapping(&fwspec);
if (!pru->mapped_irq[i]) {
dev_err(dev, "failed to get virq\n");
ret = pru->mapped_irq[i];
goto map_fail;
}
}
return ret;
map_fail:
pru_dispose_irq_mapping(pru);
return ret;
}
static int pru_rproc_start(struct rproc *rproc)
{
struct device *dev = &rproc->dev;
struct pru_rproc *pru = rproc->priv;
const char *names[PRU_TYPE_MAX] = { "PRU", "RTU", "Tx_PRU" };
u32 val;
int ret;
dev_dbg(dev, "starting %s%d: entry-point = 0x%llx\n",
names[pru->data->type], pru->id, (rproc->bootaddr >> 2));
ret = pru_handle_intrmap(rproc);
/*
* reset references to pru interrupt map - they will stop being valid
* after rproc_start returns
*/
pru->pru_interrupt_map = NULL;
pru->pru_interrupt_map_sz = 0;
if (ret)
return ret;
val = CTRL_CTRL_EN | ((rproc->bootaddr >> 2) << 16);
pru_control_write_reg(pru, PRU_CTRL_CTRL, val);
return 0;
}
static int pru_rproc_stop(struct rproc *rproc)
{
struct device *dev = &rproc->dev;
struct pru_rproc *pru = rproc->priv;
const char *names[PRU_TYPE_MAX] = { "PRU", "RTU", "Tx_PRU" };
u32 val;
dev_dbg(dev, "stopping %s%d\n", names[pru->data->type], pru->id);
val = pru_control_read_reg(pru, PRU_CTRL_CTRL);
val &= ~CTRL_CTRL_EN;
pru_control_write_reg(pru, PRU_CTRL_CTRL, val);
/* dispose irq mapping - new firmware can provide new mapping */
if (pru->mapped_irq)
pru_dispose_irq_mapping(pru);
return 0;
}
/*
* Convert PRU device address (data spaces only) to kernel virtual address.
*
* Each PRU has access to all data memories within the PRUSS, accessible at
* different ranges. So, look through both its primary and secondary Data
* RAMs as well as any shared Data RAM to convert a PRU device address to
* kernel virtual address. Data RAM0 is primary Data RAM for PRU0 and Data
* RAM1 is primary Data RAM for PRU1.
*/
static void *pru_d_da_to_va(struct pru_rproc *pru, u32 da, size_t len)
{
struct pruss_mem_region dram0, dram1, shrd_ram;
struct pruss *pruss = pru->pruss;
u32 offset;
void *va = NULL;
if (len == 0)
return NULL;
dram0 = pruss->mem_regions[PRUSS_MEM_DRAM0];
dram1 = pruss->mem_regions[PRUSS_MEM_DRAM1];
/* PRU1 has its local RAM addresses reversed */
if (pru->id == 1)
swap(dram0, dram1);
shrd_ram = pruss->mem_regions[PRUSS_MEM_SHRD_RAM2];
if (da >= PRU_PDRAM_DA && da + len <= PRU_PDRAM_DA + dram0.size) {
offset = da - PRU_PDRAM_DA;
va = (__force void *)(dram0.va + offset);
} else if (da >= PRU_SDRAM_DA &&
da + len <= PRU_SDRAM_DA + dram1.size) {
offset = da - PRU_SDRAM_DA;
va = (__force void *)(dram1.va + offset);
} else if (da >= PRU_SHRDRAM_DA &&
da + len <= PRU_SHRDRAM_DA + shrd_ram.size) {
offset = da - PRU_SHRDRAM_DA;
va = (__force void *)(shrd_ram.va + offset);
}
return va;
}
/*
* Convert PRU device address (instruction space) to kernel virtual address.
*
* A PRU does not have an unified address space. Each PRU has its very own
* private Instruction RAM, and its device address is identical to that of
* its primary Data RAM device address.
*/
static void *pru_i_da_to_va(struct pru_rproc *pru, u32 da, size_t len)
{
u32 offset;
void *va = NULL;
if (len == 0)
return NULL;
if (da >= PRU_IRAM_DA &&
da + len <= PRU_IRAM_DA + pru->mem_regions[PRU_IOMEM_IRAM].size) {
offset = da - PRU_IRAM_DA;
va = (__force void *)(pru->mem_regions[PRU_IOMEM_IRAM].va +
offset);
}
return va;
}
/*
* Provide address translations for only PRU Data RAMs through the remoteproc
* core for any PRU client drivers. The PRU Instruction RAM access is restricted
* only to the PRU loader code.
*/
static void *pru_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
{
struct pru_rproc *pru = rproc->priv;
return pru_d_da_to_va(pru, da, len);
}
/* PRU-specific address translator used by PRU loader. */
static void *pru_da_to_va(struct rproc *rproc, u64 da, size_t len, bool is_iram)
{
struct pru_rproc *pru = rproc->priv;
void *va;
if (is_iram)
va = pru_i_da_to_va(pru, da, len);
else
va = pru_d_da_to_va(pru, da, len);
return va;
}
static struct rproc_ops pru_rproc_ops = {
.start = pru_rproc_start,
.stop = pru_rproc_stop,
.da_to_va = pru_rproc_da_to_va,
};
/*
* Custom memory copy implementation for ICSSG PRU/RTU/Tx_PRU Cores
*
* The ICSSG PRU/RTU/Tx_PRU cores have a memory copying issue with IRAM
* memories, that is not seen on previous generation SoCs. The data is reflected
* properly in the IRAM memories only for integer (4-byte) copies. Any unaligned
* copies result in all the other pre-existing bytes zeroed out within that
* 4-byte boundary, thereby resulting in wrong text/code in the IRAMs. Also, the
* IRAM memory port interface does not allow any 8-byte copies (as commonly used
* by ARM64 memcpy implementation) and throws an exception. The DRAM memory
* ports do not show this behavior.
*/
static int pru_rproc_memcpy(void *dest, const void *src, size_t count)
{
const u32 *s = src;
u32 *d = dest;
size_t size = count / 4;
u32 *tmp_src = NULL;
/*
* TODO: relax limitation of 4-byte aligned dest addresses and copy
* sizes
*/
if ((long)dest % 4 || count % 4)
return -EINVAL;
/* src offsets in ELF firmware image can be non-aligned */
if ((long)src % 4) {
tmp_src = kmemdup(src, count, GFP_KERNEL);
if (!tmp_src)
return -ENOMEM;
s = tmp_src;
}
while (size--)
*d++ = *s++;
kfree(tmp_src);
return 0;
}
static int
pru_rproc_load_elf_segments(struct rproc *rproc, const struct firmware *fw)
{
struct pru_rproc *pru = rproc->priv;
struct device *dev = &rproc->dev;
struct elf32_hdr *ehdr;
struct elf32_phdr *phdr;
int i, ret = 0;
const u8 *elf_data = fw->data;
ehdr = (struct elf32_hdr *)elf_data;
phdr = (struct elf32_phdr *)(elf_data + ehdr->e_phoff);
/* go through the available ELF segments */
for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
u32 da = phdr->p_paddr;
u32 memsz = phdr->p_memsz;
u32 filesz = phdr->p_filesz;
u32 offset = phdr->p_offset;
bool is_iram;
void *ptr;
if (phdr->p_type != PT_LOAD || !filesz)
continue;
dev_dbg(dev, "phdr: type %d da 0x%x memsz 0x%x filesz 0x%x\n",
phdr->p_type, da, memsz, filesz);
if (filesz > memsz) {
dev_err(dev, "bad phdr filesz 0x%x memsz 0x%x\n",
filesz, memsz);
ret = -EINVAL;
break;
}
if (offset + filesz > fw->size) {
dev_err(dev, "truncated fw: need 0x%x avail 0x%zx\n",
offset + filesz, fw->size);
ret = -EINVAL;
break;
}
/* grab the kernel address for this device address */
is_iram = phdr->p_flags & PF_X;
ptr = pru_da_to_va(rproc, da, memsz, is_iram);
if (!ptr) {
dev_err(dev, "bad phdr da 0x%x mem 0x%x\n", da, memsz);
ret = -EINVAL;
break;
}
if (pru->data->is_k3 && is_iram) {
ret = pru_rproc_memcpy(ptr, elf_data + phdr->p_offset,
filesz);
if (ret) {
dev_err(dev, "PRU memory copy failed for da 0x%x memsz 0x%x\n",
da, memsz);
break;
}
} else {
memcpy(ptr, elf_data + phdr->p_offset, filesz);
}
/* skip the memzero logic performed by remoteproc ELF loader */
}
return ret;
}
static const void *
pru_rproc_find_interrupt_map(struct device *dev, const struct firmware *fw)
{
struct elf32_shdr *shdr, *name_table_shdr;
const char *name_table;
const u8 *elf_data = fw->data;
struct elf32_hdr *ehdr = (struct elf32_hdr *)elf_data;
u16 shnum = ehdr->e_shnum;
u16 shstrndx = ehdr->e_shstrndx;
int i;
/* first, get the section header */
shdr = (struct elf32_shdr *)(elf_data + ehdr->e_shoff);
/* compute name table section header entry in shdr array */
name_table_shdr = shdr + shstrndx;
/* finally, compute the name table section address in elf */
name_table = elf_data + name_table_shdr->sh_offset;
for (i = 0; i < shnum; i++, shdr++) {
u32 size = shdr->sh_size;
u32 offset = shdr->sh_offset;
u32 name = shdr->sh_name;
if (strcmp(name_table + name, ".pru_irq_map"))
continue;
/* make sure we have the entire irq map */
if (offset + size > fw->size || offset + size < size) {
dev_err(dev, ".pru_irq_map section truncated\n");
return ERR_PTR(-EINVAL);
}
/* make sure irq map has at least the header */
if (sizeof(struct pru_irq_rsc) > size) {
dev_err(dev, "header-less .pru_irq_map section\n");
return ERR_PTR(-EINVAL);
}
return shdr;
}
dev_dbg(dev, "no .pru_irq_map section found for this fw\n");
return NULL;
}
/*
* Use a custom parse_fw callback function for dealing with PRU firmware
* specific sections.
*
* The firmware blob can contain optional ELF sections: .resource_table section
* and .pru_irq_map one. The second one contains the PRUSS interrupt mapping
* description, which needs to be setup before powering on the PRU core. To
* avoid RAM wastage this ELF section is not mapped to any ELF segment (by the
* firmware linker) and therefore is not loaded to PRU memory.
*/
static int pru_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
{
struct device *dev = &rproc->dev;
struct pru_rproc *pru = rproc->priv;
const u8 *elf_data = fw->data;
const void *shdr;
u8 class = fw_elf_get_class(fw);
u64 sh_offset;
int ret;
/* load optional rsc table */
ret = rproc_elf_load_rsc_table(rproc, fw);
if (ret == -EINVAL)
dev_dbg(&rproc->dev, "no resource table found for this fw\n");
else if (ret)
return ret;
/* find .pru_interrupt_map section, not having it is not an error */
shdr = pru_rproc_find_interrupt_map(dev, fw);
if (IS_ERR(shdr))
return PTR_ERR(shdr);
if (!shdr)
return 0;
/* preserve pointer to PRU interrupt map together with it size */
sh_offset = elf_shdr_get_sh_offset(class, shdr);
pru->pru_interrupt_map = (struct pru_irq_rsc *)(elf_data + sh_offset);
pru->pru_interrupt_map_sz = elf_shdr_get_sh_size(class, shdr);
return 0;
}
/*
* Compute PRU id based on the IRAM addresses. The PRU IRAMs are
* always at a particular offset within the PRUSS address space.
*/
static int pru_rproc_set_id(struct pru_rproc *pru)
{
int ret = 0;
switch (pru->mem_regions[PRU_IOMEM_IRAM].pa & PRU_IRAM_ADDR_MASK) {
case TX_PRU0_IRAM_ADDR_MASK:
fallthrough;
case RTU0_IRAM_ADDR_MASK:
fallthrough;
case PRU0_IRAM_ADDR_MASK:
pru->id = 0;
break;
case TX_PRU1_IRAM_ADDR_MASK:
fallthrough;
case RTU1_IRAM_ADDR_MASK:
fallthrough;
case PRU1_IRAM_ADDR_MASK:
pru->id = 1;
break;
default:
ret = -EINVAL;
}
return ret;
}
static int pru_rproc_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct device_node *np = dev->of_node;
struct platform_device *ppdev = to_platform_device(dev->parent);
struct pru_rproc *pru;
const char *fw_name;
struct rproc *rproc = NULL;
struct resource *res;
int i, ret;
const struct pru_private_data *data;
const char *mem_names[PRU_IOMEM_MAX] = { "iram", "control", "debug" };
data = of_device_get_match_data(&pdev->dev);
if (!data)
return -ENODEV;
ret = of_property_read_string(np, "firmware-name", &fw_name);
if (ret) {
dev_err(dev, "unable to retrieve firmware-name %d\n", ret);
return ret;
}
rproc = devm_rproc_alloc(dev, pdev->name, &pru_rproc_ops, fw_name,
sizeof(*pru));
if (!rproc) {
dev_err(dev, "rproc_alloc failed\n");
return -ENOMEM;
}
/* use a custom load function to deal with PRU-specific quirks */
rproc->ops->load = pru_rproc_load_elf_segments;
/* use a custom parse function to deal with PRU-specific resources */
rproc->ops->parse_fw = pru_rproc_parse_fw;
/* error recovery is not supported for PRUs */
rproc->recovery_disabled = true;
/*
* rproc_add will auto-boot the processor normally, but this is not
* desired with PRU client driven boot-flow methodology. A PRU
* application/client driver will boot the corresponding PRU
* remote-processor as part of its state machine either through the
* remoteproc sysfs interface or through the equivalent kernel API.
*/
rproc->auto_boot = false;
pru = rproc->priv;
pru->dev = dev;
pru->data = data;
pru->pruss = platform_get_drvdata(ppdev);
pru->rproc = rproc;
pru->fw_name = fw_name;
for (i = 0; i < ARRAY_SIZE(mem_names); i++) {
res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
mem_names[i]);
pru->mem_regions[i].va = devm_ioremap_resource(dev, res);
if (IS_ERR(pru->mem_regions[i].va)) {
dev_err(dev, "failed to parse and map memory resource %d %s\n",
i, mem_names[i]);
ret = PTR_ERR(pru->mem_regions[i].va);
return ret;
}
pru->mem_regions[i].pa = res->start;
pru->mem_regions[i].size = resource_size(res);
dev_dbg(dev, "memory %8s: pa %pa size 0x%zx va %pK\n",
mem_names[i], &pru->mem_regions[i].pa,
pru->mem_regions[i].size, pru->mem_regions[i].va);
}
ret = pru_rproc_set_id(pru);
if (ret < 0)
return ret;
platform_set_drvdata(pdev, rproc);
ret = devm_rproc_add(dev, pru->rproc);
if (ret) {
dev_err(dev, "rproc_add failed: %d\n", ret);
return ret;
}
pru_rproc_create_debug_entries(rproc);
dev_dbg(dev, "PRU rproc node %pOF probed successfully\n", np);
return 0;
}
static int pru_rproc_remove(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct rproc *rproc = platform_get_drvdata(pdev);
dev_dbg(dev, "%s: removing rproc %s\n", __func__, rproc->name);
return 0;
}
static const struct pru_private_data pru_data = {
.type = PRU_TYPE_PRU,
};
static const struct pru_private_data k3_pru_data = {
.type = PRU_TYPE_PRU,
.is_k3 = 1,
};
static const struct pru_private_data k3_rtu_data = {
.type = PRU_TYPE_RTU,
.is_k3 = 1,
};
static const struct pru_private_data k3_tx_pru_data = {
.type = PRU_TYPE_TX_PRU,
.is_k3 = 1,
};
static const struct of_device_id pru_rproc_match[] = {
{ .compatible = "ti,am3356-pru", .data = &pru_data },
{ .compatible = "ti,am4376-pru", .data = &pru_data },
{ .compatible = "ti,am5728-pru", .data = &pru_data },
{ .compatible = "ti,k2g-pru", .data = &pru_data },
{ .compatible = "ti,am654-pru", .data = &k3_pru_data },
{ .compatible = "ti,am654-rtu", .data = &k3_rtu_data },
{ .compatible = "ti,am654-tx-pru", .data = &k3_tx_pru_data },
{ .compatible = "ti,j721e-pru", .data = &k3_pru_data },
{ .compatible = "ti,j721e-rtu", .data = &k3_rtu_data },
{ .compatible = "ti,j721e-tx-pru", .data = &k3_tx_pru_data },
{},
};
MODULE_DEVICE_TABLE(of, pru_rproc_match);
static struct platform_driver pru_rproc_driver = {
.driver = {
.name = "pru-rproc",
.of_match_table = pru_rproc_match,
.suppress_bind_attrs = true,
},
.probe = pru_rproc_probe,
.remove = pru_rproc_remove,
};
module_platform_driver(pru_rproc_driver);
MODULE_AUTHOR("Suman Anna <s-anna@ti.com>");
MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>");
MODULE_AUTHOR("Grzegorz Jaszczyk <grzegorz.jaszczyk@linaro.org>");
MODULE_DESCRIPTION("PRU-ICSS Remote Processor Driver");
MODULE_LICENSE("GPL v2");

View File

@ -0,0 +1,46 @@
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
/*
* PRUSS Remote Processor specific types
*
* Copyright (C) 2014-2020 Texas Instruments Incorporated - https://www.ti.com/
* Suman Anna <s-anna@ti.com>
*/
#ifndef _PRU_RPROC_H_
#define _PRU_RPROC_H_
/**
* struct pruss_int_map - PRU system events _to_ channel and host mapping
* @event: number of the system event
* @chnl: channel number assigned to a given @event
* @host: host number assigned to a given @chnl
*
* PRU system events are mapped to channels, and these channels are mapped
* to host interrupts. Events can be mapped to channels in a one-to-one or
* many-to-one ratio (multiple events per channel), and channels can be
* mapped to host interrupts in a one-to-one or many-to-one ratio (multiple
* channels per interrupt).
*/
struct pruss_int_map {
u8 event;
u8 chnl;
u8 host;
};
/**
* struct pru_irq_rsc - PRU firmware section header for IRQ data
* @type: resource type
* @num_evts: number of described events
* @pru_intc_map: PRU interrupt routing description
*
* The PRU firmware blob can contain optional .pru_irq_map ELF section, which
* provides the PRUSS interrupt mapping description. The pru_irq_rsc struct
* describes resource entry format.
*/
struct pru_irq_rsc {
u8 type;
u8 num_evts;
struct pruss_int_map pru_intc_map[];
} __packed;
#endif /* _PRU_RPROC_H_ */

View File

@ -17,6 +17,7 @@
#include <linux/rpmsg/qcom_smd.h>
#include <linux/slab.h>
#include <linux/soc/qcom/mdt_loader.h>
#include <linux/soc/qcom/smem.h>
#include "remoteproc_internal.h"
#include "qcom_common.h"
@ -25,6 +26,61 @@
#define to_smd_subdev(d) container_of(d, struct qcom_rproc_subdev, subdev)
#define to_ssr_subdev(d) container_of(d, struct qcom_rproc_ssr, subdev)
#define MAX_NUM_OF_SS 10
#define MAX_REGION_NAME_LENGTH 16
#define SBL_MINIDUMP_SMEM_ID 602
#define MD_REGION_VALID ('V' << 24 | 'A' << 16 | 'L' << 8 | 'I' << 0)
#define MD_SS_ENCR_DONE ('D' << 24 | 'O' << 16 | 'N' << 8 | 'E' << 0)
#define MD_SS_ENABLED ('E' << 24 | 'N' << 16 | 'B' << 8 | 'L' << 0)
/**
* struct minidump_region - Minidump region
* @name : Name of the region to be dumped
* @seq_num: : Use to differentiate regions with same name.
* @valid : This entry to be dumped (if set to 1)
* @address : Physical address of region to be dumped
* @size : Size of the region
*/
struct minidump_region {
char name[MAX_REGION_NAME_LENGTH];
__le32 seq_num;
__le32 valid;
__le64 address;
__le64 size;
};
/**
* struct minidump_subsystem_toc: Subsystem's SMEM Table of content
* @status : Subsystem toc init status
* @enabled : if set to 1, this region would be copied during coredump
* @encryption_status: Encryption status for this subsystem
* @encryption_required : Decides to encrypt the subsystem regions or not
* @region_count : Number of regions added in this subsystem toc
* @regions_baseptr : regions base pointer of the subsystem
*/
struct minidump_subsystem {
__le32 status;
__le32 enabled;
__le32 encryption_status;
__le32 encryption_required;
__le32 region_count;
__le64 regions_baseptr;
};
/**
* struct minidump_global_toc: Global Table of Content
* @status : Global Minidump init status
* @md_revision : Minidump revision
* @enabled : Minidump enable status
* @subsystems : Array of subsystems toc
*/
struct minidump_global_toc {
__le32 status;
__le32 md_revision;
__le32 enabled;
struct minidump_subsystem subsystems[MAX_NUM_OF_SS];
};
struct qcom_ssr_subsystem {
const char *name;
struct srcu_notifier_head notifier_list;
@ -34,6 +90,96 @@ struct qcom_ssr_subsystem {
static LIST_HEAD(qcom_ssr_subsystem_list);
static DEFINE_MUTEX(qcom_ssr_subsys_lock);
static void qcom_minidump_cleanup(struct rproc *rproc)
{
struct rproc_dump_segment *entry, *tmp;
list_for_each_entry_safe(entry, tmp, &rproc->dump_segments, node) {
list_del(&entry->node);
kfree(entry->priv);
kfree(entry);
}
}
static int qcom_add_minidump_segments(struct rproc *rproc, struct minidump_subsystem *subsystem)
{
struct minidump_region __iomem *ptr;
struct minidump_region region;
int seg_cnt, i;
dma_addr_t da;
size_t size;
char *name;
if (WARN_ON(!list_empty(&rproc->dump_segments))) {
dev_err(&rproc->dev, "dump segment list already populated\n");
return -EUCLEAN;
}
seg_cnt = le32_to_cpu(subsystem->region_count);
ptr = ioremap((unsigned long)le64_to_cpu(subsystem->regions_baseptr),
seg_cnt * sizeof(struct minidump_region));
if (!ptr)
return -EFAULT;
for (i = 0; i < seg_cnt; i++) {
memcpy_fromio(&region, ptr + i, sizeof(region));
if (region.valid == MD_REGION_VALID) {
name = kstrdup(region.name, GFP_KERNEL);
if (!name) {
iounmap(ptr);
return -ENOMEM;
}
da = le64_to_cpu(region.address);
size = le32_to_cpu(region.size);
rproc_coredump_add_custom_segment(rproc, da, size, NULL, name);
}
}
iounmap(ptr);
return 0;
}
void qcom_minidump(struct rproc *rproc, unsigned int minidump_id)
{
int ret;
struct minidump_subsystem *subsystem;
struct minidump_global_toc *toc;
/* Get Global minidump ToC*/
toc = qcom_smem_get(QCOM_SMEM_HOST_ANY, SBL_MINIDUMP_SMEM_ID, NULL);
/* check if global table pointer exists and init is set */
if (IS_ERR(toc) || !toc->status) {
dev_err(&rproc->dev, "Minidump TOC not found in SMEM\n");
return;
}
/* Get subsystem table of contents using the minidump id */
subsystem = &toc->subsystems[minidump_id];
/**
* Collect minidump if SS ToC is valid and segment table
* is initialized in memory and encryption status is set.
*/
if (subsystem->regions_baseptr == 0 ||
le32_to_cpu(subsystem->status) != 1 ||
le32_to_cpu(subsystem->enabled) != MD_SS_ENABLED ||
le32_to_cpu(subsystem->encryption_status) != MD_SS_ENCR_DONE) {
dev_err(&rproc->dev, "Minidump not ready, skipping\n");
return;
}
ret = qcom_add_minidump_segments(rproc, subsystem);
if (ret) {
dev_err(&rproc->dev, "Failed with error: %d while adding minidump entries\n", ret);
goto clean_minidump;
}
rproc_coredump_using_sections(rproc);
clean_minidump:
qcom_minidump_cleanup(rproc);
}
EXPORT_SYMBOL_GPL(qcom_minidump);
static int glink_subdev_start(struct rproc_subdev *subdev)
{
struct qcom_rproc_glink *glink = to_glink_subdev(subdev);

View File

@ -33,6 +33,8 @@ struct qcom_rproc_ssr {
struct qcom_ssr_subsystem *info;
};
void qcom_minidump(struct rproc *rproc, unsigned int minidump_id);
void qcom_add_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink,
const char *ssr_name);
void qcom_remove_glink_subdev(struct rproc *rproc, struct qcom_rproc_glink *glink);
@ -51,6 +53,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
const char *name,
int ssctl_instance);
void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon);
bool qcom_sysmon_shutdown_acked(struct qcom_sysmon *sysmon);
#else
static inline struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
const char *name,
@ -62,6 +65,11 @@ static inline struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
static inline void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
{
}
static inline bool qcom_sysmon_shutdown_acked(struct qcom_sysmon *sysmon)
{
return false;
}
#endif
#endif

View File

@ -13,6 +13,7 @@
#include <linux/soc/qcom/smem.h>
#include <linux/soc/qcom/smem_state.h>
#include <linux/remoteproc.h>
#include "qcom_common.h"
#include "qcom_q6v5.h"
#define Q6V5_PANIC_DELAY_MS 200
@ -146,15 +147,20 @@ static irqreturn_t q6v5_stop_interrupt(int irq, void *data)
/**
* qcom_q6v5_request_stop() - request the remote processor to stop
* @q6v5: reference to qcom_q6v5 context
* @sysmon: reference to the remote's sysmon instance, or NULL
*
* Return: 0 on success, negative errno on failure
*/
int qcom_q6v5_request_stop(struct qcom_q6v5 *q6v5)
int qcom_q6v5_request_stop(struct qcom_q6v5 *q6v5, struct qcom_sysmon *sysmon)
{
int ret;
q6v5->running = false;
/* Don't perform SMP2P dance if sysmon already shut down the remote */
if (qcom_sysmon_shutdown_acked(sysmon))
return 0;
qcom_smem_state_update_bits(q6v5->state,
BIT(q6v5->stop_bit), BIT(q6v5->stop_bit));

View File

@ -8,6 +8,7 @@
struct rproc;
struct qcom_smem_state;
struct qcom_sysmon;
struct qcom_q6v5 {
struct device *dev;
@ -40,7 +41,7 @@ int qcom_q6v5_init(struct qcom_q6v5 *q6v5, struct platform_device *pdev,
int qcom_q6v5_prepare(struct qcom_q6v5 *q6v5);
int qcom_q6v5_unprepare(struct qcom_q6v5 *q6v5);
int qcom_q6v5_request_stop(struct qcom_q6v5 *q6v5);
int qcom_q6v5_request_stop(struct qcom_q6v5 *q6v5, struct qcom_sysmon *sysmon);
int qcom_q6v5_wait_for_start(struct qcom_q6v5 *q6v5, int timeout);
unsigned long qcom_q6v5_panic(struct qcom_q6v5 *q6v5);

View File

@ -193,8 +193,10 @@ static int adsp_start(struct rproc *rproc)
dev_pm_genpd_set_performance_state(adsp->dev, INT_MAX);
ret = pm_runtime_get_sync(adsp->dev);
if (ret)
if (ret) {
pm_runtime_put_noidle(adsp->dev);
goto disable_xo_clk;
}
ret = clk_bulk_prepare_enable(adsp->num_clks, adsp->clks);
if (ret) {
@ -264,7 +266,7 @@ static int adsp_stop(struct rproc *rproc)
int handover;
int ret;
ret = qcom_q6v5_request_stop(&adsp->q6v5);
ret = qcom_q6v5_request_stop(&adsp->q6v5, adsp->sysmon);
if (ret == -ETIMEDOUT)
dev_err(adsp->dev, "timed out on wait\n");
@ -362,15 +364,12 @@ static int adsp_init_mmio(struct qcom_adsp *adsp,
struct platform_device *pdev)
{
struct device_node *syscon;
struct resource *res;
int ret;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
adsp->qdsp6ss_base = devm_ioremap(&pdev->dev, res->start,
resource_size(res));
if (!adsp->qdsp6ss_base) {
adsp->qdsp6ss_base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(adsp->qdsp6ss_base)) {
dev_err(adsp->dev, "failed to map QDSP6SS registers\n");
return -ENOMEM;
return PTR_ERR(adsp->qdsp6ss_base);
}
syscon = of_parse_phandle(pdev->dev.of_node, "qcom,halt-regs", 0);

View File

@ -132,6 +132,7 @@ struct qcom_mss_reg_res {
struct rproc_hexagon_res {
const char *hexagon_mba_image;
struct qcom_mss_reg_res *proxy_supply;
struct qcom_mss_reg_res *fallback_proxy_supply;
struct qcom_mss_reg_res *active_supply;
char **proxy_clk_names;
char **reset_clk_names;
@ -177,16 +178,17 @@ struct q6v5 {
int proxy_pd_count;
struct reg_info active_regs[1];
struct reg_info proxy_regs[3];
struct reg_info proxy_regs[1];
struct reg_info fallback_proxy_regs[2];
int active_reg_count;
int proxy_reg_count;
int fallback_proxy_reg_count;
bool dump_mba_loaded;
size_t current_dump_size;
size_t total_dump_size;
phys_addr_t mba_phys;
void *mba_region;
size_t mba_size;
size_t dp_size;
@ -349,8 +351,11 @@ static int q6v5_pds_enable(struct q6v5 *qproc, struct device **pds,
for (i = 0; i < pd_count; i++) {
dev_pm_genpd_set_performance_state(pds[i], INT_MAX);
ret = pm_runtime_get_sync(pds[i]);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_noidle(pds[i]);
dev_pm_genpd_set_performance_state(pds[i], 0);
goto unroll_pd_votes;
}
}
return 0;
@ -405,7 +410,7 @@ static int q6v5_xfer_mem_ownership(struct q6v5 *qproc, int *current_perm,
current_perm, next, perms);
}
static void q6v5_debug_policy_load(struct q6v5 *qproc)
static void q6v5_debug_policy_load(struct q6v5 *qproc, void *mba_region)
{
const struct firmware *dp_fw;
@ -413,7 +418,7 @@ static void q6v5_debug_policy_load(struct q6v5 *qproc)
return;
if (SZ_1M + dp_fw->size <= qproc->mba_size) {
memcpy(qproc->mba_region + SZ_1M, dp_fw->data, dp_fw->size);
memcpy(mba_region + SZ_1M, dp_fw->data, dp_fw->size);
qproc->dp_size = dp_fw->size;
}
@ -423,6 +428,7 @@ static void q6v5_debug_policy_load(struct q6v5 *qproc)
static int q6v5_load(struct rproc *rproc, const struct firmware *fw)
{
struct q6v5 *qproc = rproc->priv;
void *mba_region;
/* MBA is restricted to a maximum size of 1M */
if (fw->size > qproc->mba_size || fw->size > SZ_1M) {
@ -430,8 +436,16 @@ static int q6v5_load(struct rproc *rproc, const struct firmware *fw)
return -EINVAL;
}
memcpy(qproc->mba_region, fw->data, fw->size);
q6v5_debug_policy_load(qproc);
mba_region = memremap(qproc->mba_phys, qproc->mba_size, MEMREMAP_WC);
if (!mba_region) {
dev_err(qproc->dev, "unable to map memory region: %pa+%zx\n",
&qproc->mba_phys, qproc->mba_size);
return -EBUSY;
}
memcpy(mba_region, fw->data, fw->size);
q6v5_debug_policy_load(qproc, mba_region);
memunmap(mba_region);
return 0;
}
@ -538,6 +552,7 @@ static void q6v5_dump_mba_logs(struct q6v5 *qproc)
{
struct rproc *rproc = qproc->rproc;
void *data;
void *mba_region;
if (!qproc->has_mba_logs)
return;
@ -546,12 +561,16 @@ static void q6v5_dump_mba_logs(struct q6v5 *qproc)
qproc->mba_size))
return;
data = vmalloc(MBA_LOG_SIZE);
if (!data)
mba_region = memremap(qproc->mba_phys, qproc->mba_size, MEMREMAP_WC);
if (!mba_region)
return;
memcpy(data, qproc->mba_region, MBA_LOG_SIZE);
dev_coredumpv(&rproc->dev, data, MBA_LOG_SIZE, GFP_KERNEL);
data = vmalloc(MBA_LOG_SIZE);
if (data) {
memcpy(data, mba_region, MBA_LOG_SIZE);
dev_coredumpv(&rproc->dev, data, MBA_LOG_SIZE, GFP_KERNEL);
}
memunmap(mba_region);
}
static int q6v5proc_reset(struct q6v5 *qproc)
@ -890,11 +909,18 @@ static int q6v5_mba_load(struct q6v5 *qproc)
goto disable_active_pds;
}
ret = q6v5_regulator_enable(qproc, qproc->fallback_proxy_regs,
qproc->fallback_proxy_reg_count);
if (ret) {
dev_err(qproc->dev, "failed to enable fallback proxy supplies\n");
goto disable_proxy_pds;
}
ret = q6v5_regulator_enable(qproc, qproc->proxy_regs,
qproc->proxy_reg_count);
if (ret) {
dev_err(qproc->dev, "failed to enable proxy supplies\n");
goto disable_proxy_pds;
goto disable_fallback_proxy_reg;
}
ret = q6v5_clk_enable(qproc->dev, qproc->proxy_clks,
@ -1008,6 +1034,9 @@ static int q6v5_mba_load(struct q6v5 *qproc)
disable_proxy_reg:
q6v5_regulator_disable(qproc, qproc->proxy_regs,
qproc->proxy_reg_count);
disable_fallback_proxy_reg:
q6v5_regulator_disable(qproc, qproc->fallback_proxy_regs,
qproc->fallback_proxy_reg_count);
disable_proxy_pds:
q6v5_pds_disable(qproc, qproc->proxy_pds, qproc->proxy_pd_count);
disable_active_pds:
@ -1063,6 +1092,8 @@ static void q6v5_mba_reclaim(struct q6v5 *qproc)
qproc->proxy_pd_count);
q6v5_clk_disable(qproc->dev, qproc->proxy_clks,
qproc->proxy_clk_count);
q6v5_regulator_disable(qproc, qproc->fallback_proxy_regs,
qproc->fallback_proxy_reg_count);
q6v5_regulator_disable(qproc, qproc->proxy_regs,
qproc->proxy_reg_count);
}
@ -1179,7 +1210,7 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
goto release_firmware;
}
ptr = ioremap_wc(qproc->mpss_phys + offset, phdr->p_memsz);
ptr = memremap(qproc->mpss_phys + offset, phdr->p_memsz, MEMREMAP_WC);
if (!ptr) {
dev_err(qproc->dev,
"unable to map memory region: %pa+%zx-%x\n",
@ -1194,7 +1225,7 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
"failed to load segment %d from truncated file %s\n",
i, fw_name);
ret = -EINVAL;
iounmap(ptr);
memunmap(ptr);
goto release_firmware;
}
@ -1206,7 +1237,7 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
ptr, phdr->p_filesz);
if (ret) {
dev_err(qproc->dev, "failed to load %s\n", fw_name);
iounmap(ptr);
memunmap(ptr);
goto release_firmware;
}
@ -1217,7 +1248,7 @@ static int q6v5_mpss_load(struct q6v5 *qproc)
memset(ptr + phdr->p_filesz, 0,
phdr->p_memsz - phdr->p_filesz);
}
iounmap(ptr);
memunmap(ptr);
size += phdr->p_memsz;
code_length = readl(qproc->rmb_base + RMB_PMI_CODE_LENGTH_REG);
@ -1284,11 +1315,11 @@ static void qcom_q6v5_dump_segment(struct rproc *rproc,
}
if (!ret)
ptr = ioremap_wc(qproc->mpss_phys + offset + cp_offset, size);
ptr = memremap(qproc->mpss_phys + offset + cp_offset, size, MEMREMAP_WC);
if (ptr) {
memcpy(dest, ptr, size);
iounmap(ptr);
memunmap(ptr);
} else {
memset(dest, 0xff, size);
}
@ -1355,7 +1386,7 @@ static int q6v5_stop(struct rproc *rproc)
struct q6v5 *qproc = (struct q6v5 *)rproc->priv;
int ret;
ret = qcom_q6v5_request_stop(&qproc->q6v5);
ret = qcom_q6v5_request_stop(&qproc->q6v5, qproc->sysmon);
if (ret == -ETIMEDOUT)
dev_err(qproc->dev, "timed out on wait\n");
@ -1423,6 +1454,8 @@ static void qcom_msa_handover(struct qcom_q6v5 *q6v5)
qproc->proxy_clk_count);
q6v5_regulator_disable(qproc, qproc->proxy_regs,
qproc->proxy_reg_count);
q6v5_regulator_disable(qproc, qproc->fallback_proxy_regs,
qproc->fallback_proxy_reg_count);
q6v5_pds_disable(qproc, qproc->proxy_pds, qproc->proxy_pd_count);
}
@ -1588,12 +1621,6 @@ static int q6v5_alloc_memory_region(struct q6v5 *qproc)
qproc->mba_phys = r.start;
qproc->mba_size = resource_size(&r);
qproc->mba_region = devm_ioremap_wc(qproc->dev, qproc->mba_phys, qproc->mba_size);
if (!qproc->mba_region) {
dev_err(qproc->dev, "unable to map memory region: %pa+%zx\n",
&r.start, qproc->mba_size);
return -EBUSY;
}
if (!child) {
node = of_parse_phandle(qproc->dev->of_node,
@ -1717,11 +1744,22 @@ static int q6v5_probe(struct platform_device *pdev)
ret = q6v5_pds_attach(&pdev->dev, qproc->proxy_pds,
desc->proxy_pd_names);
if (ret < 0) {
/* Fallback to regulators for old device trees */
if (ret == -ENODATA && desc->fallback_proxy_supply) {
ret = q6v5_regulator_init(&pdev->dev,
qproc->fallback_proxy_regs,
desc->fallback_proxy_supply);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to get fallback proxy regulators.\n");
goto detach_active_pds;
}
qproc->fallback_proxy_reg_count = ret;
} else if (ret < 0) {
dev_err(&pdev->dev, "Failed to init power domains\n");
goto detach_active_pds;
} else {
qproc->proxy_pd_count = ret;
}
qproc->proxy_pd_count = ret;
qproc->has_alt_reset = desc->has_alt_reset;
ret = q6v5_init_reset(qproc);
@ -1922,6 +1960,13 @@ static const struct rproc_hexagon_res msm8996_mss = {
static const struct rproc_hexagon_res msm8916_mss = {
.hexagon_mba_image = "mba.mbn",
.proxy_supply = (struct qcom_mss_reg_res[]) {
{
.supply = "pll",
.uA = 100000,
},
{}
},
.fallback_proxy_supply = (struct qcom_mss_reg_res[]) {
{
.supply = "mx",
.uV = 1050000,
@ -1930,10 +1975,6 @@ static const struct rproc_hexagon_res msm8916_mss = {
.supply = "cx",
.uA = 100000,
},
{
.supply = "pll",
.uA = 100000,
},
{}
},
.proxy_clk_names = (char*[]){
@ -1946,6 +1987,11 @@ static const struct rproc_hexagon_res msm8916_mss = {
"mem",
NULL
},
.proxy_pd_names = (char*[]){
"mx",
"cx",
NULL
},
.need_mem_protection = false,
.has_alt_reset = false,
.has_mba_logs = false,
@ -1956,6 +2002,13 @@ static const struct rproc_hexagon_res msm8916_mss = {
static const struct rproc_hexagon_res msm8974_mss = {
.hexagon_mba_image = "mba.b00",
.proxy_supply = (struct qcom_mss_reg_res[]) {
{
.supply = "pll",
.uA = 100000,
},
{}
},
.fallback_proxy_supply = (struct qcom_mss_reg_res[]) {
{
.supply = "mx",
.uV = 1050000,
@ -1964,10 +2017,6 @@ static const struct rproc_hexagon_res msm8974_mss = {
.supply = "cx",
.uA = 100000,
},
{
.supply = "pll",
.uA = 100000,
},
{}
},
.active_supply = (struct qcom_mss_reg_res[]) {
@ -1988,6 +2037,11 @@ static const struct rproc_hexagon_res msm8974_mss = {
"mem",
NULL
},
.proxy_pd_names = (char*[]){
"mx",
"cx",
NULL
},
.need_mem_protection = false,
.has_alt_reset = false,
.has_mba_logs = false,

View File

@ -33,6 +33,7 @@ struct adsp_data {
int crash_reason_smem;
const char *firmware_name;
int pas_id;
unsigned int minidump_id;
bool has_aggre2_clk;
bool auto_boot;
@ -63,6 +64,7 @@ struct qcom_adsp {
int proxy_pd_count;
int pas_id;
unsigned int minidump_id;
int crash_reason_smem;
bool has_aggre2_clk;
const char *info_name;
@ -81,6 +83,13 @@ struct qcom_adsp {
struct qcom_sysmon *sysmon;
};
static void adsp_minidump(struct rproc *rproc)
{
struct qcom_adsp *adsp = rproc->priv;
qcom_minidump(rproc, adsp->minidump_id);
}
static int adsp_pds_enable(struct qcom_adsp *adsp, struct device **pds,
size_t pd_count)
{
@ -90,8 +99,11 @@ static int adsp_pds_enable(struct qcom_adsp *adsp, struct device **pds,
for (i = 0; i < pd_count; i++) {
dev_pm_genpd_set_performance_state(pds[i], INT_MAX);
ret = pm_runtime_get_sync(pds[i]);
if (ret < 0)
if (ret < 0) {
pm_runtime_put_noidle(pds[i]);
dev_pm_genpd_set_performance_state(pds[i], 0);
goto unroll_pd_votes;
}
}
return 0;
@ -214,7 +226,7 @@ static int adsp_stop(struct rproc *rproc)
int handover;
int ret;
ret = qcom_q6v5_request_stop(&adsp->q6v5);
ret = qcom_q6v5_request_stop(&adsp->q6v5, adsp->sysmon);
if (ret == -ETIMEDOUT)
dev_err(adsp->dev, "timed out on wait\n");
@ -258,6 +270,15 @@ static const struct rproc_ops adsp_ops = {
.panic = adsp_panic,
};
static const struct rproc_ops adsp_minidump_ops = {
.start = adsp_start,
.stop = adsp_stop,
.da_to_va = adsp_da_to_va,
.load = adsp_load,
.panic = adsp_panic,
.coredump = adsp_minidump,
};
static int adsp_init_clock(struct qcom_adsp *adsp)
{
int ret;
@ -383,6 +404,7 @@ static int adsp_probe(struct platform_device *pdev)
struct qcom_adsp *adsp;
struct rproc *rproc;
const char *fw_name;
const struct rproc_ops *ops = &adsp_ops;
int ret;
desc = of_device_get_match_data(&pdev->dev);
@ -398,8 +420,11 @@ static int adsp_probe(struct platform_device *pdev)
if (ret < 0 && ret != -EINVAL)
return ret;
rproc = rproc_alloc(&pdev->dev, pdev->name, &adsp_ops,
fw_name, sizeof(*adsp));
if (desc->minidump_id)
ops = &adsp_minidump_ops;
rproc = rproc_alloc(&pdev->dev, pdev->name, ops, fw_name, sizeof(*adsp));
if (!rproc) {
dev_err(&pdev->dev, "unable to allocate remoteproc\n");
return -ENOMEM;
@ -411,6 +436,7 @@ static int adsp_probe(struct platform_device *pdev)
adsp = (struct qcom_adsp *)rproc->priv;
adsp->dev = &pdev->dev;
adsp->rproc = rproc;
adsp->minidump_id = desc->minidump_id;
adsp->pas_id = desc->pas_id;
adsp->has_aggre2_clk = desc->has_aggre2_clk;
adsp->info_name = desc->sysmon_name;
@ -607,6 +633,7 @@ static const struct adsp_data mpss_resource_init = {
.crash_reason_smem = 421,
.firmware_name = "modem.mdt",
.pas_id = 4,
.minidump_id = 3,
.has_aggre2_clk = false,
.auto_boot = false,
.active_pd_names = (char*[]){

View File

@ -390,7 +390,7 @@ static int q6v5_wcss_stop(struct rproc *rproc)
int ret;
/* WCSS powerdown */
ret = qcom_q6v5_request_stop(&wcss->q6v5);
ret = qcom_q6v5_request_stop(&wcss->q6v5, NULL);
if (ret == -ETIMEDOUT) {
dev_err(wcss->dev, "timed out on wait\n");
return ret;

View File

@ -22,6 +22,9 @@ struct qcom_sysmon {
struct rproc_subdev subdev;
struct rproc *rproc;
int state;
struct mutex state_lock;
struct list_head node;
const char *name;
@ -41,6 +44,7 @@ struct qcom_sysmon {
struct mutex lock;
bool ssr_ack;
bool shutdown_acked;
struct qmi_handle qmi;
struct sockaddr_qrtr ssctl;
@ -112,10 +116,13 @@ static void sysmon_send_event(struct qcom_sysmon *sysmon,
/**
* sysmon_request_shutdown() - request graceful shutdown of remote
* @sysmon: sysmon context
*
* Return: boolean indicator of the remote processor acking the request
*/
static void sysmon_request_shutdown(struct qcom_sysmon *sysmon)
static bool sysmon_request_shutdown(struct qcom_sysmon *sysmon)
{
char *req = "ssr:shutdown";
bool acked = false;
int ret;
mutex_lock(&sysmon->lock);
@ -138,9 +145,13 @@ static void sysmon_request_shutdown(struct qcom_sysmon *sysmon)
if (!sysmon->ssr_ack)
dev_err(sysmon->dev,
"unexpected response to sysmon shutdown request\n");
else
acked = true;
out_unlock:
mutex_unlock(&sysmon->lock);
return acked;
}
static int sysmon_callback(struct rpmsg_device *rpdev, void *data, int count,
@ -283,7 +294,7 @@ static void sysmon_ind_cb(struct qmi_handle *qmi, struct sockaddr_qrtr *sq,
complete(&sysmon->ind_comp);
}
static struct qmi_msg_handler qmi_indication_handler[] = {
static const struct qmi_msg_handler qmi_indication_handler[] = {
{
.type = QMI_INDICATION,
.msg_id = SSCTL_SHUTDOWN_READY_IND,
@ -294,14 +305,33 @@ static struct qmi_msg_handler qmi_indication_handler[] = {
{}
};
static bool ssctl_request_shutdown_wait(struct qcom_sysmon *sysmon)
{
int ret;
ret = wait_for_completion_timeout(&sysmon->shutdown_comp, 10 * HZ);
if (ret)
return true;
ret = try_wait_for_completion(&sysmon->ind_comp);
if (ret)
return true;
dev_err(sysmon->dev, "timeout waiting for shutdown ack\n");
return false;
}
/**
* ssctl_request_shutdown() - request shutdown via SSCTL QMI service
* @sysmon: sysmon context
*
* Return: boolean indicator of the remote processor acking the request
*/
static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
static bool ssctl_request_shutdown(struct qcom_sysmon *sysmon)
{
struct ssctl_shutdown_resp resp;
struct qmi_txn txn;
bool acked = false;
int ret;
reinit_completion(&sysmon->ind_comp);
@ -309,7 +339,7 @@ static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
ret = qmi_txn_init(&sysmon->qmi, &txn, ssctl_shutdown_resp_ei, &resp);
if (ret < 0) {
dev_err(sysmon->dev, "failed to allocate QMI txn\n");
return;
return false;
}
ret = qmi_send_request(&sysmon->qmi, &sysmon->ssctl, &txn,
@ -317,27 +347,23 @@ static void ssctl_request_shutdown(struct qcom_sysmon *sysmon)
if (ret < 0) {
dev_err(sysmon->dev, "failed to send shutdown request\n");
qmi_txn_cancel(&txn);
return;
return false;
}
ret = qmi_txn_wait(&txn, 5 * HZ);
if (ret < 0)
dev_err(sysmon->dev, "failed receiving QMI response\n");
else if (resp.resp.result)
dev_err(sysmon->dev, "shutdown request failed\n");
else
if (ret < 0) {
dev_err(sysmon->dev, "timeout waiting for shutdown response\n");
} else if (resp.resp.result) {
dev_err(sysmon->dev, "shutdown request rejected\n");
} else {
dev_dbg(sysmon->dev, "shutdown request completed\n");
if (sysmon->shutdown_irq > 0) {
ret = wait_for_completion_timeout(&sysmon->shutdown_comp,
10 * HZ);
if (!ret) {
ret = try_wait_for_completion(&sysmon->ind_comp);
if (!ret)
dev_err(sysmon->dev,
"timeout waiting for shutdown ack\n");
}
acked = true;
}
if (sysmon->shutdown_irq > 0)
return ssctl_request_shutdown_wait(sysmon);
return acked;
}
/**
@ -371,18 +397,18 @@ static void ssctl_send_event(struct qcom_sysmon *sysmon,
SSCTL_SUBSYS_EVENT_REQ, 40,
ssctl_subsys_event_req_ei, &req);
if (ret < 0) {
dev_err(sysmon->dev, "failed to send shutdown request\n");
dev_err(sysmon->dev, "failed to send subsystem event\n");
qmi_txn_cancel(&txn);
return;
}
ret = qmi_txn_wait(&txn, 5 * HZ);
if (ret < 0)
dev_err(sysmon->dev, "failed receiving QMI response\n");
dev_err(sysmon->dev, "timeout waiting for subsystem event response\n");
else if (resp.resp.result)
dev_err(sysmon->dev, "ssr event send failed\n");
dev_err(sysmon->dev, "subsystem event rejected\n");
else
dev_dbg(sysmon->dev, "ssr event send completed\n");
dev_dbg(sysmon->dev, "subsystem event accepted\n");
}
/**
@ -448,7 +474,10 @@ static int sysmon_prepare(struct rproc_subdev *subdev)
.ssr_event = SSCTL_SSR_EVENT_BEFORE_POWERUP
};
mutex_lock(&sysmon->state_lock);
sysmon->state = SSCTL_SSR_EVENT_BEFORE_POWERUP;
blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
mutex_unlock(&sysmon->state_lock);
return 0;
}
@ -472,20 +501,25 @@ static int sysmon_start(struct rproc_subdev *subdev)
.ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP
};
mutex_lock(&sysmon->state_lock);
sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP;
blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
mutex_unlock(&sysmon->state_lock);
mutex_lock(&sysmon_lock);
list_for_each_entry(target, &sysmon_list, node) {
if (target == sysmon ||
target->rproc->state != RPROC_RUNNING)
if (target == sysmon)
continue;
mutex_lock(&target->state_lock);
event.subsys_name = target->name;
event.ssr_event = target->state;
if (sysmon->ssctl_version == 2)
ssctl_send_event(sysmon, &event);
else if (sysmon->ept)
sysmon_send_event(sysmon, &event);
mutex_unlock(&target->state_lock);
}
mutex_unlock(&sysmon_lock);
@ -500,16 +534,21 @@ static void sysmon_stop(struct rproc_subdev *subdev, bool crashed)
.ssr_event = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN
};
sysmon->shutdown_acked = false;
mutex_lock(&sysmon->state_lock);
sysmon->state = SSCTL_SSR_EVENT_BEFORE_SHUTDOWN;
blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
mutex_unlock(&sysmon->state_lock);
/* Don't request graceful shutdown if we've crashed */
if (crashed)
return;
if (sysmon->ssctl_version)
ssctl_request_shutdown(sysmon);
sysmon->shutdown_acked = ssctl_request_shutdown(sysmon);
else if (sysmon->ept)
sysmon_request_shutdown(sysmon);
sysmon->shutdown_acked = sysmon_request_shutdown(sysmon);
}
static void sysmon_unprepare(struct rproc_subdev *subdev)
@ -521,7 +560,10 @@ static void sysmon_unprepare(struct rproc_subdev *subdev)
.ssr_event = SSCTL_SSR_EVENT_AFTER_SHUTDOWN
};
mutex_lock(&sysmon->state_lock);
sysmon->state = SSCTL_SSR_EVENT_AFTER_SHUTDOWN;
blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event);
mutex_unlock(&sysmon->state_lock);
}
/**
@ -534,11 +576,10 @@ static int sysmon_notify(struct notifier_block *nb, unsigned long event,
void *data)
{
struct qcom_sysmon *sysmon = container_of(nb, struct qcom_sysmon, nb);
struct rproc *rproc = sysmon->rproc;
struct sysmon_event *sysmon_event = data;
/* Skip non-running rprocs and the originating instance */
if (rproc->state != RPROC_RUNNING ||
if (sysmon->state != SSCTL_SSR_EVENT_AFTER_POWERUP ||
!strcmp(sysmon_event->subsys_name, sysmon->name)) {
dev_dbg(sysmon->dev, "not notifying %s\n", sysmon->name);
return NOTIFY_DONE;
@ -591,6 +632,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc,
init_completion(&sysmon->ind_comp);
init_completion(&sysmon->shutdown_comp);
mutex_init(&sysmon->lock);
mutex_init(&sysmon->state_lock);
sysmon->shutdown_irq = of_irq_get_byname(sysmon->dev->of_node,
"shutdown-ack");
@ -664,6 +706,22 @@ void qcom_remove_sysmon_subdev(struct qcom_sysmon *sysmon)
}
EXPORT_SYMBOL_GPL(qcom_remove_sysmon_subdev);
/**
* qcom_sysmon_shutdown_acked() - query the success of the last shutdown
* @sysmon: sysmon context
*
* When sysmon is used to request a graceful shutdown of the remote processor
* this can be used by the remoteproc driver to query the success, in order to
* know if it should fall back to other means of requesting a shutdown.
*
* Return: boolean indicator of the success of the last shutdown request
*/
bool qcom_sysmon_shutdown_acked(struct qcom_sysmon *sysmon)
{
return sysmon && sysmon->shutdown_acked;
}
EXPORT_SYMBOL_GPL(qcom_sysmon_shutdown_acked);
/**
* sysmon_probe() - probe sys_mon channel
* @rpdev: rpmsg device handle

View File

@ -17,6 +17,8 @@
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pm_domain.h>
#include <linux/pm_runtime.h>
#include <linux/qcom_scm.h>
#include <linux/regulator/consumer.h>
#include <linux/remoteproc.h>
@ -51,12 +53,15 @@
#define WCNSS_PMU_XO_MODE_19p2 0
#define WCNSS_PMU_XO_MODE_48 3
#define WCNSS_MAX_PDS 2
struct wcnss_data {
size_t pmu_offset;
size_t spare_offset;
const char *pd_names[WCNSS_MAX_PDS];
const struct wcnss_vreg_info *vregs;
size_t num_vregs;
size_t num_vregs, num_pd_vregs;
};
struct qcom_wcnss {
@ -80,6 +85,8 @@ struct qcom_wcnss {
struct mutex iris_lock;
struct qcom_iris *iris;
struct device *pds[WCNSS_MAX_PDS];
size_t num_pds;
struct regulator_bulk_data *vregs;
size_t num_vregs;
@ -111,24 +118,28 @@ static const struct wcnss_data pronto_v1_data = {
.pmu_offset = 0x1004,
.spare_offset = 0x1088,
.pd_names = { "mx", "cx" },
.vregs = (struct wcnss_vreg_info[]) {
{ "vddmx", 950000, 1150000, 0 },
{ "vddcx", .super_turbo = true},
{ "vddpx", 1800000, 1800000, 0 },
},
.num_vregs = 3,
.num_pd_vregs = 2,
.num_vregs = 1,
};
static const struct wcnss_data pronto_v2_data = {
.pmu_offset = 0x1004,
.spare_offset = 0x1088,
.pd_names = { "mx", "cx" },
.vregs = (struct wcnss_vreg_info[]) {
{ "vddmx", 1287500, 1287500, 0 },
{ "vddcx", .super_turbo = true },
{ "vddpx", 1800000, 1800000, 0 },
},
.num_vregs = 3,
.num_pd_vregs = 2,
.num_vregs = 1,
};
void qcom_wcnss_assign_iris(struct qcom_wcnss *wcnss,
@ -219,7 +230,7 @@ static void wcnss_configure_iris(struct qcom_wcnss *wcnss)
static int wcnss_start(struct rproc *rproc)
{
struct qcom_wcnss *wcnss = (struct qcom_wcnss *)rproc->priv;
int ret;
int ret, i;
mutex_lock(&wcnss->iris_lock);
if (!wcnss->iris) {
@ -228,9 +239,18 @@ static int wcnss_start(struct rproc *rproc)
goto release_iris_lock;
}
for (i = 0; i < wcnss->num_pds; i++) {
dev_pm_genpd_set_performance_state(wcnss->pds[i], INT_MAX);
ret = pm_runtime_get_sync(wcnss->pds[i]);
if (ret < 0) {
pm_runtime_put_noidle(wcnss->pds[i]);
goto disable_pds;
}
}
ret = regulator_bulk_enable(wcnss->num_vregs, wcnss->vregs);
if (ret)
goto release_iris_lock;
goto disable_pds;
ret = qcom_iris_enable(wcnss->iris);
if (ret)
@ -262,6 +282,11 @@ static int wcnss_start(struct rproc *rproc)
qcom_iris_disable(wcnss->iris);
disable_regulators:
regulator_bulk_disable(wcnss->num_vregs, wcnss->vregs);
disable_pds:
for (i--; i >= 0; i--) {
pm_runtime_put(wcnss->pds[i]);
dev_pm_genpd_set_performance_state(wcnss->pds[i], 0);
}
release_iris_lock:
mutex_unlock(&wcnss->iris_lock);
@ -371,14 +396,54 @@ static irqreturn_t wcnss_stop_ack_interrupt(int irq, void *dev)
return IRQ_HANDLED;
}
static int wcnss_init_pds(struct qcom_wcnss *wcnss,
const char * const pd_names[WCNSS_MAX_PDS])
{
int i, ret;
for (i = 0; i < WCNSS_MAX_PDS; i++) {
if (!pd_names[i])
break;
wcnss->pds[i] = dev_pm_domain_attach_by_name(wcnss->dev, pd_names[i]);
if (IS_ERR_OR_NULL(wcnss->pds[i])) {
ret = PTR_ERR(wcnss->pds[i]) ? : -ENODATA;
for (i--; i >= 0; i--)
dev_pm_domain_detach(wcnss->pds[i], false);
return ret;
}
}
wcnss->num_pds = i;
return 0;
}
static void wcnss_release_pds(struct qcom_wcnss *wcnss)
{
int i;
for (i = 0; i < wcnss->num_pds; i++)
dev_pm_domain_detach(wcnss->pds[i], false);
}
static int wcnss_init_regulators(struct qcom_wcnss *wcnss,
const struct wcnss_vreg_info *info,
int num_vregs)
int num_vregs, int num_pd_vregs)
{
struct regulator_bulk_data *bulk;
int ret;
int i;
/*
* If attaching the power domains suceeded we can skip requesting
* the regulators for the power domains. For old device trees we need to
* reserve extra space to manage them through the regulator interface.
*/
if (wcnss->num_pds)
info += num_pd_vregs;
else
num_vregs += num_pd_vregs;
bulk = devm_kcalloc(wcnss->dev,
num_vregs, sizeof(struct regulator_bulk_data),
GFP_KERNEL);
@ -514,33 +579,42 @@ static int wcnss_probe(struct platform_device *pdev)
wcnss->pmu_cfg = mmio + data->pmu_offset;
wcnss->spare_out = mmio + data->spare_offset;
ret = wcnss_init_regulators(wcnss, data->vregs, data->num_vregs);
if (ret)
/*
* We might need to fallback to regulators instead of power domains
* for old device trees. Don't report an error in that case.
*/
ret = wcnss_init_pds(wcnss, data->pd_names);
if (ret && (ret != -ENODATA || !data->num_pd_vregs))
goto free_rproc;
ret = wcnss_init_regulators(wcnss, data->vregs, data->num_vregs,
data->num_pd_vregs);
if (ret)
goto detach_pds;
ret = wcnss_request_irq(wcnss, pdev, "wdog", false, wcnss_wdog_interrupt);
if (ret < 0)
goto free_rproc;
goto detach_pds;
wcnss->wdog_irq = ret;
ret = wcnss_request_irq(wcnss, pdev, "fatal", false, wcnss_fatal_interrupt);
if (ret < 0)
goto free_rproc;
goto detach_pds;
wcnss->fatal_irq = ret;
ret = wcnss_request_irq(wcnss, pdev, "ready", true, wcnss_ready_interrupt);
if (ret < 0)
goto free_rproc;
goto detach_pds;
wcnss->ready_irq = ret;
ret = wcnss_request_irq(wcnss, pdev, "handover", true, wcnss_handover_interrupt);
if (ret < 0)
goto free_rproc;
goto detach_pds;
wcnss->handover_irq = ret;
ret = wcnss_request_irq(wcnss, pdev, "stop-ack", true, wcnss_stop_ack_interrupt);
if (ret < 0)
goto free_rproc;
goto detach_pds;
wcnss->stop_ack_irq = ret;
if (wcnss->stop_ack_irq) {
@ -548,7 +622,7 @@ static int wcnss_probe(struct platform_device *pdev)
&wcnss->stop_bit);
if (IS_ERR(wcnss->state)) {
ret = PTR_ERR(wcnss->state);
goto free_rproc;
goto detach_pds;
}
}
@ -556,15 +630,17 @@ static int wcnss_probe(struct platform_device *pdev)
wcnss->sysmon = qcom_add_sysmon_subdev(rproc, "wcnss", WCNSS_SSCTL_ID);
if (IS_ERR(wcnss->sysmon)) {
ret = PTR_ERR(wcnss->sysmon);
goto free_rproc;
goto detach_pds;
}
ret = rproc_add(rproc);
if (ret)
goto free_rproc;
goto detach_pds;
return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
detach_pds:
wcnss_release_pds(wcnss);
free_rproc:
rproc_free(rproc);
@ -582,6 +658,7 @@ static int wcnss_remove(struct platform_device *pdev)
qcom_remove_sysmon_subdev(wcnss->sysmon);
qcom_remove_smd_subdev(wcnss->rproc, &wcnss->smd_subdev);
wcnss_release_pds(wcnss);
rproc_free(wcnss->rproc);
return 0;

View File

@ -1704,7 +1704,7 @@ int rproc_trigger_recovery(struct rproc *rproc)
goto unlock_mutex;
/* generate coredump */
rproc_coredump(rproc);
rproc->ops->coredump(rproc);
/* load firmware */
ret = request_firmware(&firmware_p, rproc->firmware, dev);
@ -1934,6 +1934,69 @@ struct rproc *rproc_get_by_phandle(phandle phandle)
#endif
EXPORT_SYMBOL(rproc_get_by_phandle);
/**
* rproc_set_firmware() - assign a new firmware
* @rproc: rproc handle to which the new firmware is being assigned
* @fw_name: new firmware name to be assigned
*
* This function allows remoteproc drivers or clients to configure a custom
* firmware name that is different from the default name used during remoteproc
* registration. The function does not trigger a remote processor boot,
* only sets the firmware name used for a subsequent boot. This function
* should also be called only when the remote processor is offline.
*
* This allows either the userspace to configure a different name through
* sysfs or a kernel-level remoteproc or a remoteproc client driver to set
* a specific firmware when it is controlling the boot and shutdown of the
* remote processor.
*
* Return: 0 on success or a negative value upon failure
*/
int rproc_set_firmware(struct rproc *rproc, const char *fw_name)
{
struct device *dev;
int ret, len;
char *p;
if (!rproc || !fw_name)
return -EINVAL;
dev = rproc->dev.parent;
ret = mutex_lock_interruptible(&rproc->lock);
if (ret) {
dev_err(dev, "can't lock rproc %s: %d\n", rproc->name, ret);
return -EINVAL;
}
if (rproc->state != RPROC_OFFLINE) {
dev_err(dev, "can't change firmware while running\n");
ret = -EBUSY;
goto out;
}
len = strcspn(fw_name, "\n");
if (!len) {
dev_err(dev, "can't provide empty string for firmware name\n");
ret = -EINVAL;
goto out;
}
p = kstrndup(fw_name, len, GFP_KERNEL);
if (!p) {
ret = -ENOMEM;
goto out;
}
kfree(rproc->firmware);
rproc->firmware = p;
out:
mutex_unlock(&rproc->lock);
return ret;
}
EXPORT_SYMBOL(rproc_set_firmware);
static int rproc_validate(struct rproc *rproc)
{
switch (rproc->state) {
@ -2126,6 +2189,10 @@ static int rproc_alloc_ops(struct rproc *rproc, const struct rproc_ops *ops)
if (!rproc->ops)
return -ENOMEM;
/* Default to rproc_coredump if no coredump function is specified */
if (!rproc->ops->coredump)
rproc->ops->coredump = rproc_coredump;
if (rproc->ops->load)
return 0;

View File

@ -323,3 +323,143 @@ void rproc_coredump(struct rproc *rproc)
*/
wait_for_completion(&dump_state.dump_done);
}
/**
* rproc_coredump_using_sections() - perform coredump using section headers
* @rproc: rproc handle
*
* This function will generate an ELF header for the registered sections of
* segments and create a devcoredump device associated with rproc. Based on
* the coredump configuration this function will directly copy the segments
* from device memory to userspace or copy segments from device memory to
* a separate buffer, which can then be read by userspace.
* The first approach avoids using extra vmalloc memory. But it will stall
* recovery flow until dump is read by userspace.
*/
void rproc_coredump_using_sections(struct rproc *rproc)
{
struct rproc_dump_segment *segment;
void *shdr;
void *ehdr;
size_t data_size;
size_t strtbl_size = 0;
size_t strtbl_index = 1;
size_t offset;
void *data;
u8 class = rproc->elf_class;
int shnum;
struct rproc_coredump_state dump_state;
unsigned int dump_conf = rproc->dump_conf;
char *str_tbl = "STR_TBL";
if (list_empty(&rproc->dump_segments) ||
dump_conf == RPROC_COREDUMP_DISABLED)
return;
if (class == ELFCLASSNONE) {
dev_err(&rproc->dev, "Elf class is not set\n");
return;
}
/*
* We allocate two extra section headers. The first one is null.
* Second section header is for the string table. Also space is
* allocated for string table.
*/
data_size = elf_size_of_hdr(class) + 2 * elf_size_of_shdr(class);
shnum = 2;
/* the extra byte is for the null character at index 0 */
strtbl_size += strlen(str_tbl) + 2;
list_for_each_entry(segment, &rproc->dump_segments, node) {
data_size += elf_size_of_shdr(class);
strtbl_size += strlen(segment->priv) + 1;
if (dump_conf == RPROC_COREDUMP_ENABLED)
data_size += segment->size;
shnum++;
}
data_size += strtbl_size;
data = vmalloc(data_size);
if (!data)
return;
ehdr = data;
memset(ehdr, 0, elf_size_of_hdr(class));
/* e_ident field is common for both elf32 and elf64 */
elf_hdr_init_ident(ehdr, class);
elf_hdr_set_e_type(class, ehdr, ET_CORE);
elf_hdr_set_e_machine(class, ehdr, rproc->elf_machine);
elf_hdr_set_e_version(class, ehdr, EV_CURRENT);
elf_hdr_set_e_entry(class, ehdr, rproc->bootaddr);
elf_hdr_set_e_shoff(class, ehdr, elf_size_of_hdr(class));
elf_hdr_set_e_ehsize(class, ehdr, elf_size_of_hdr(class));
elf_hdr_set_e_shentsize(class, ehdr, elf_size_of_shdr(class));
elf_hdr_set_e_shnum(class, ehdr, shnum);
elf_hdr_set_e_shstrndx(class, ehdr, 1);
/*
* The zeroth index of the section header is reserved and is rarely used.
* Set the section header as null (SHN_UNDEF) and move to the next one.
*/
shdr = data + elf_hdr_get_e_shoff(class, ehdr);
memset(shdr, 0, elf_size_of_shdr(class));
shdr += elf_size_of_shdr(class);
/* Initialize the string table. */
offset = elf_hdr_get_e_shoff(class, ehdr) +
elf_size_of_shdr(class) * elf_hdr_get_e_shnum(class, ehdr);
memset(data + offset, 0, strtbl_size);
/* Fill in the string table section header. */
memset(shdr, 0, elf_size_of_shdr(class));
elf_shdr_set_sh_type(class, shdr, SHT_STRTAB);
elf_shdr_set_sh_offset(class, shdr, offset);
elf_shdr_set_sh_size(class, shdr, strtbl_size);
elf_shdr_set_sh_entsize(class, shdr, 0);
elf_shdr_set_sh_flags(class, shdr, 0);
elf_shdr_set_sh_name(class, shdr, elf_strtbl_add(str_tbl, ehdr, class, &strtbl_index));
offset += elf_shdr_get_sh_size(class, shdr);
shdr += elf_size_of_shdr(class);
list_for_each_entry(segment, &rproc->dump_segments, node) {
memset(shdr, 0, elf_size_of_shdr(class));
elf_shdr_set_sh_type(class, shdr, SHT_PROGBITS);
elf_shdr_set_sh_offset(class, shdr, offset);
elf_shdr_set_sh_addr(class, shdr, segment->da);
elf_shdr_set_sh_size(class, shdr, segment->size);
elf_shdr_set_sh_entsize(class, shdr, 0);
elf_shdr_set_sh_flags(class, shdr, SHF_WRITE);
elf_shdr_set_sh_name(class, shdr,
elf_strtbl_add(segment->priv, ehdr, class, &strtbl_index));
/* No need to copy segments for inline dumps */
if (dump_conf == RPROC_COREDUMP_ENABLED)
rproc_copy_segment(rproc, data + offset, segment, 0,
segment->size);
offset += elf_shdr_get_sh_size(class, shdr);
shdr += elf_size_of_shdr(class);
}
if (dump_conf == RPROC_COREDUMP_ENABLED) {
dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
return;
}
/* Initialize the dump state struct to be used by rproc_coredump_read */
dump_state.rproc = rproc;
dump_state.header = data;
init_completion(&dump_state.dump_done);
dev_coredumpm(&rproc->dev, NULL, &dump_state, data_size, GFP_KERNEL,
rproc_coredump_read, rproc_coredump_free);
/* Wait until the dump is read and free is called. Data is freed
* by devcoredump framework automatically after 5 minutes.
*/
wait_for_completion(&dump_state.dump_done);
}
EXPORT_SYMBOL(rproc_coredump_using_sections);

View File

@ -65,6 +65,7 @@ ELF_GEN_FIELD_GET_SET(hdr, e_type, u16)
ELF_GEN_FIELD_GET_SET(hdr, e_version, u32)
ELF_GEN_FIELD_GET_SET(hdr, e_ehsize, u32)
ELF_GEN_FIELD_GET_SET(hdr, e_phentsize, u16)
ELF_GEN_FIELD_GET_SET(hdr, e_shentsize, u16)
ELF_GEN_FIELD_GET_SET(phdr, p_paddr, u64)
ELF_GEN_FIELD_GET_SET(phdr, p_vaddr, u64)
@ -75,6 +76,9 @@ ELF_GEN_FIELD_GET_SET(phdr, p_offset, u64)
ELF_GEN_FIELD_GET_SET(phdr, p_flags, u32)
ELF_GEN_FIELD_GET_SET(phdr, p_align, u64)
ELF_GEN_FIELD_GET_SET(shdr, sh_type, u32)
ELF_GEN_FIELD_GET_SET(shdr, sh_flags, u32)
ELF_GEN_FIELD_GET_SET(shdr, sh_entsize, u16)
ELF_GEN_FIELD_GET_SET(shdr, sh_size, u64)
ELF_GEN_FIELD_GET_SET(shdr, sh_offset, u64)
ELF_GEN_FIELD_GET_SET(shdr, sh_name, u32)
@ -93,4 +97,26 @@ ELF_STRUCT_SIZE(shdr)
ELF_STRUCT_SIZE(phdr)
ELF_STRUCT_SIZE(hdr)
static inline unsigned int elf_strtbl_add(const char *name, void *ehdr, u8 class, size_t *index)
{
u16 shstrndx = elf_hdr_get_e_shstrndx(class, ehdr);
void *shdr;
char *strtab;
size_t idx, ret;
shdr = ehdr + elf_size_of_hdr(class) + shstrndx * elf_size_of_shdr(class);
strtab = ehdr + elf_shdr_get_sh_offset(class, shdr);
idx = index ? *index : 0;
if (!strtab || !name)
return 0;
ret = idx;
strcpy((strtab + idx), name);
idx += strlen(name) + 1;
if (index)
*index = idx;
return ret;
}
#endif /* REMOTEPROC_ELF_LOADER_H */

View File

@ -154,38 +154,9 @@ static ssize_t firmware_store(struct device *dev,
const char *buf, size_t count)
{
struct rproc *rproc = to_rproc(dev);
char *p;
int err, len = count;
int err;
err = mutex_lock_interruptible(&rproc->lock);
if (err) {
dev_err(dev, "can't lock rproc %s: %d\n", rproc->name, err);
return -EINVAL;
}
if (rproc->state != RPROC_OFFLINE) {
dev_err(dev, "can't change firmware while running\n");
err = -EBUSY;
goto out;
}
len = strcspn(buf, "\n");
if (!len) {
dev_err(dev, "can't provide a NULL firmware\n");
err = -EINVAL;
goto out;
}
p = kstrndup(buf, len, GFP_KERNEL);
if (!p) {
err = -ENOMEM;
goto out;
}
kfree(rproc->firmware);
rproc->firmware = p;
out:
mutex_unlock(&rproc->lock);
err = rproc_set_firmware(rproc, buf);
return err ? err : count;
}

View File

@ -541,7 +541,7 @@ static void stm32_rproc_kick(struct rproc *rproc, int vqid)
}
}
static struct rproc_ops st_rproc_ops = {
static const struct rproc_ops st_rproc_ops = {
.start = stm32_rproc_start,
.stop = stm32_rproc_stop,
.attach = stm32_rproc_attach,

View File

@ -445,10 +445,10 @@ static int k3_dsp_rproc_of_get_memories(struct platform_device *pdev,
kproc->mem[i].cpu_addr = devm_ioremap_wc(dev, res->start,
resource_size(res));
if (IS_ERR(kproc->mem[i].cpu_addr)) {
if (!kproc->mem[i].cpu_addr) {
dev_err(dev, "failed to map %s memory\n",
data->mems[i].name);
return PTR_ERR(kproc->mem[i].cpu_addr);
return -ENOMEM;
}
kproc->mem[i].bus_addr = res->start;
kproc->mem[i].dev_addr = data->mems[i].dev_addr;

View File

@ -38,6 +38,8 @@
#define PROC_BOOT_CFG_FLAG_R5_TCM_RSTBASE 0x00000800
#define PROC_BOOT_CFG_FLAG_R5_BTCM_EN 0x00001000
#define PROC_BOOT_CFG_FLAG_R5_ATCM_EN 0x00002000
/* Available from J7200 SoCs onwards */
#define PROC_BOOT_CFG_FLAG_R5_MEM_INIT_DIS 0x00004000
/* R5 TI-SCI Processor Control Flags */
#define PROC_BOOT_CTRL_FLAG_R5_CORE_HALT 0x00000001
@ -67,16 +69,28 @@ enum cluster_mode {
CLUSTER_MODE_LOCKSTEP,
};
/**
* struct k3_r5_soc_data - match data to handle SoC variations
* @tcm_is_double: flag to denote the larger unified TCMs in certain modes
* @tcm_ecc_autoinit: flag to denote the auto-initialization of TCMs for ECC
*/
struct k3_r5_soc_data {
bool tcm_is_double;
bool tcm_ecc_autoinit;
};
/**
* struct k3_r5_cluster - K3 R5F Cluster structure
* @dev: cached device pointer
* @mode: Mode to configure the Cluster - Split or LockStep
* @cores: list of R5 cores within the cluster
* @soc_data: SoC-specific feature data for a R5FSS
*/
struct k3_r5_cluster {
struct device *dev;
enum cluster_mode mode;
struct list_head cores;
const struct k3_r5_soc_data *soc_data;
};
/**
@ -362,8 +376,16 @@ static int k3_r5_rproc_prepare(struct rproc *rproc)
struct k3_r5_cluster *cluster = kproc->cluster;
struct k3_r5_core *core = kproc->core;
struct device *dev = kproc->dev;
u32 ctrl = 0, cfg = 0, stat = 0;
u64 boot_vec = 0;
bool mem_init_dis;
int ret;
ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl, &stat);
if (ret < 0)
return ret;
mem_init_dis = !!(cfg & PROC_BOOT_CFG_FLAG_R5_MEM_INIT_DIS);
ret = (cluster->mode == CLUSTER_MODE_LOCKSTEP) ?
k3_r5_lockstep_release(cluster) : k3_r5_split_release(core);
if (ret) {
@ -372,6 +394,17 @@ static int k3_r5_rproc_prepare(struct rproc *rproc)
return ret;
}
/*
* Newer IP revisions like on J7200 SoCs support h/w auto-initialization
* of TCMs, so there is no need to perform the s/w memzero. This bit is
* configurable through System Firmware, the default value does perform
* auto-init, but account for it in case it is disabled
*/
if (cluster->soc_data->tcm_ecc_autoinit && !mem_init_dis) {
dev_dbg(dev, "leveraging h/w init for TCM memories\n");
return 0;
}
/*
* Zero out both TCMs unconditionally (access from v8 Arm core is not
* affected by ATCM & BTCM enable configuration values) so that ECC
@ -855,6 +888,43 @@ static void k3_r5_reserved_mem_exit(struct k3_r5_rproc *kproc)
of_reserved_mem_device_release(kproc->dev);
}
/*
* Each R5F core within a typical R5FSS instance has a total of 64 KB of TCMs,
* split equally into two 32 KB banks between ATCM and BTCM. The TCMs from both
* cores are usable in Split-mode, but only the Core0 TCMs can be used in
* LockStep-mode. The newer revisions of the R5FSS IP maximizes these TCMs by
* leveraging the Core1 TCMs as well in certain modes where they would have
* otherwise been unusable (Eg: LockStep-mode on J7200 SoCs). This is done by
* making a Core1 TCM visible immediately after the corresponding Core0 TCM.
* The SoC memory map uses the larger 64 KB sizes for the Core0 TCMs, and the
* dts representation reflects this increased size on supported SoCs. The Core0
* TCM sizes therefore have to be adjusted to only half the original size in
* Split mode.
*/
static void k3_r5_adjust_tcm_sizes(struct k3_r5_rproc *kproc)
{
struct k3_r5_cluster *cluster = kproc->cluster;
struct k3_r5_core *core = kproc->core;
struct device *cdev = core->dev;
struct k3_r5_core *core0;
if (cluster->mode == CLUSTER_MODE_LOCKSTEP ||
!cluster->soc_data->tcm_is_double)
return;
core0 = list_first_entry(&cluster->cores, struct k3_r5_core, elem);
if (core == core0) {
WARN_ON(core->mem[0].size != SZ_64K);
WARN_ON(core->mem[1].size != SZ_64K);
core->mem[0].size /= 2;
core->mem[1].size /= 2;
dev_dbg(cdev, "adjusted TCM sizes, ATCM = 0x%zx BTCM = 0x%zx\n",
core->mem[0].size, core->mem[1].size);
}
}
static int k3_r5_cluster_rproc_init(struct platform_device *pdev)
{
struct k3_r5_cluster *cluster = platform_get_drvdata(pdev);
@ -902,6 +972,8 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev)
goto err_config;
}
k3_r5_adjust_tcm_sizes(kproc);
ret = k3_r5_reserved_mem_init(kproc);
if (ret) {
dev_err(dev, "reserved memory init failed, ret = %d\n",
@ -940,9 +1012,9 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev)
return ret;
}
static int k3_r5_cluster_rproc_exit(struct platform_device *pdev)
static void k3_r5_cluster_rproc_exit(void *data)
{
struct k3_r5_cluster *cluster = platform_get_drvdata(pdev);
struct k3_r5_cluster *cluster = platform_get_drvdata(data);
struct k3_r5_rproc *kproc;
struct k3_r5_core *core;
struct rproc *rproc;
@ -967,8 +1039,6 @@ static int k3_r5_cluster_rproc_exit(struct platform_device *pdev)
rproc_free(rproc);
core->rproc = NULL;
}
return 0;
}
static int k3_r5_core_of_get_internal_memories(struct platform_device *pdev,
@ -1255,9 +1325,9 @@ static void k3_r5_core_of_exit(struct platform_device *pdev)
devres_release_group(dev, k3_r5_core_of_init);
}
static void k3_r5_cluster_of_exit(struct platform_device *pdev)
static void k3_r5_cluster_of_exit(void *data)
{
struct k3_r5_cluster *cluster = platform_get_drvdata(pdev);
struct k3_r5_cluster *cluster = platform_get_drvdata(data);
struct platform_device *cpdev;
struct k3_r5_core *core, *temp;
@ -1311,15 +1381,23 @@ static int k3_r5_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct device_node *np = dev_of_node(dev);
struct k3_r5_cluster *cluster;
const struct k3_r5_soc_data *data;
int ret;
int num_cores;
data = of_device_get_match_data(&pdev->dev);
if (!data) {
dev_err(dev, "SoC-specific data is not defined\n");
return -ENODEV;
}
cluster = devm_kzalloc(dev, sizeof(*cluster), GFP_KERNEL);
if (!cluster)
return -ENOMEM;
cluster->dev = dev;
cluster->mode = CLUSTER_MODE_LOCKSTEP;
cluster->soc_data = data;
INIT_LIST_HEAD(&cluster->cores);
ret = of_property_read_u32(np, "ti,cluster-mode", &cluster->mode);
@ -1351,9 +1429,7 @@ static int k3_r5_probe(struct platform_device *pdev)
return ret;
}
ret = devm_add_action_or_reset(dev,
(void(*)(void *))k3_r5_cluster_of_exit,
pdev);
ret = devm_add_action_or_reset(dev, k3_r5_cluster_of_exit, pdev);
if (ret)
return ret;
@ -1364,18 +1440,27 @@ static int k3_r5_probe(struct platform_device *pdev)
return ret;
}
ret = devm_add_action_or_reset(dev,
(void(*)(void *))k3_r5_cluster_rproc_exit,
pdev);
ret = devm_add_action_or_reset(dev, k3_r5_cluster_rproc_exit, pdev);
if (ret)
return ret;
return 0;
}
static const struct k3_r5_soc_data am65_j721e_soc_data = {
.tcm_is_double = false,
.tcm_ecc_autoinit = false,
};
static const struct k3_r5_soc_data j7200_soc_data = {
.tcm_is_double = true,
.tcm_ecc_autoinit = true,
};
static const struct of_device_id k3_r5_of_match[] = {
{ .compatible = "ti,am654-r5fss", },
{ .compatible = "ti,j721e-r5fss", },
{ .compatible = "ti,am654-r5fss", .data = &am65_j721e_soc_data, },
{ .compatible = "ti,j721e-r5fss", .data = &am65_j721e_soc_data, },
{ .compatible = "ti,j7200-r5fss", .data = &j7200_soc_data, },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, k3_r5_of_match);

View File

@ -15,6 +15,14 @@ config RPMSG_CHAR
in /dev. They make it possible for user-space programs to send and
receive rpmsg packets.
config RPMSG_NS
tristate "RPMSG name service announcement"
depends on RPMSG
help
Say Y here to enable the support of the name service announcement
channel that probes the associated RPMsg device on remote endpoint
service announcement.
config RPMSG_MTK_SCP
tristate "MediaTek SCP"
depends on MTK_SCP
@ -62,6 +70,7 @@ config RPMSG_VIRTIO
tristate "Virtio RPMSG bus driver"
depends on HAS_DMA
select RPMSG
select RPMSG_NS
select VIRTIO
endmenu

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_RPMSG) += rpmsg_core.o
obj-$(CONFIG_RPMSG_CHAR) += rpmsg_char.o
obj-$(CONFIG_RPMSG_NS) += rpmsg_ns.o
obj-$(CONFIG_RPMSG_MTK_SCP) += mtk_rpmsg.o
qcom_glink-objs := qcom_glink_native.o qcom_glink_ssr.o
obj-$(CONFIG_RPMSG_QCOM_GLINK) += qcom_glink.o

View File

@ -20,6 +20,50 @@
#include "rpmsg_internal.h"
/**
* rpmsg_create_channel() - create a new rpmsg channel
* using its name and address info.
* @rpdev: rpmsg device
* @chinfo: channel_info to bind
*
* Returns a pointer to the new rpmsg device on success, or NULL on error.
*/
struct rpmsg_device *rpmsg_create_channel(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo)
{
if (WARN_ON(!rpdev))
return NULL;
if (!rpdev->ops || !rpdev->ops->create_channel) {
dev_err(&rpdev->dev, "no create_channel ops found\n");
return NULL;
}
return rpdev->ops->create_channel(rpdev, chinfo);
}
EXPORT_SYMBOL(rpmsg_create_channel);
/**
* rpmsg_release_channel() - release a rpmsg channel
* using its name and address info.
* @rpdev: rpmsg device
* @chinfo: channel_info to bind
*
* Returns 0 on success or an appropriate error value.
*/
int rpmsg_release_channel(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo)
{
if (WARN_ON(!rpdev))
return -EINVAL;
if (!rpdev->ops || !rpdev->ops->release_channel) {
dev_err(&rpdev->dev, "no release_channel ops found\n");
return -ENXIO;
}
return rpdev->ops->release_channel(rpdev, chinfo);
}
EXPORT_SYMBOL(rpmsg_release_channel);
/**
* rpmsg_create_ept() - create a new rpmsg_endpoint
* @rpdev: rpmsg channel device

View File

@ -21,6 +21,8 @@
/**
* struct rpmsg_device_ops - indirection table for the rpmsg_device operations
* @create_channel: create backend-specific channel, optional
* @release_channel: release backend-specific channel, optional
* @create_ept: create backend-specific endpoint, required
* @announce_create: announce presence of new channel, optional
* @announce_destroy: announce destruction of channel, optional
@ -30,6 +32,10 @@
* advertise new channels implicitly by creating the endpoints.
*/
struct rpmsg_device_ops {
struct rpmsg_device *(*create_channel)(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo);
int (*release_channel)(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo);
struct rpmsg_endpoint *(*create_ept)(struct rpmsg_device *rpdev,
rpmsg_rx_cb_t cb, void *priv,
struct rpmsg_channel_info chinfo);
@ -73,13 +79,13 @@ struct rpmsg_endpoint_ops {
int (*set_signals)(struct rpmsg_endpoint *ept, u32 set, u32 clear);
};
int rpmsg_register_device(struct rpmsg_device *rpdev);
int rpmsg_unregister_device(struct device *parent,
struct rpmsg_channel_info *chinfo);
struct device *rpmsg_find_device(struct device *parent,
struct rpmsg_channel_info *chinfo);
struct rpmsg_device *rpmsg_create_channel(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo);
int rpmsg_release_channel(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo);
/**
* rpmsg_chrdev_register_device() - register chrdev device based on rpdev
* @rpdev: prepared rpdev to be used for creating endpoints

126
drivers/rpmsg/rpmsg_ns.c Normal file
View File

@ -0,0 +1,126 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) STMicroelectronics 2020 - All Rights Reserved
*/
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/rpmsg.h>
#include <linux/rpmsg/ns.h>
#include <linux/slab.h>
#include "rpmsg_internal.h"
/**
* rpmsg_ns_register_device() - register name service device based on rpdev
* @rpdev: prepared rpdev to be used for creating endpoints
*
* This function wraps rpmsg_register_device() preparing the rpdev for use as
* basis for the rpmsg name service device.
*/
int rpmsg_ns_register_device(struct rpmsg_device *rpdev)
{
strcpy(rpdev->id.name, "rpmsg_ns");
rpdev->driver_override = "rpmsg_ns";
rpdev->src = RPMSG_NS_ADDR;
rpdev->dst = RPMSG_NS_ADDR;
return rpmsg_register_device(rpdev);
}
EXPORT_SYMBOL(rpmsg_ns_register_device);
/* invoked when a name service announcement arrives */
static int rpmsg_ns_cb(struct rpmsg_device *rpdev, void *data, int len,
void *priv, u32 src)
{
struct rpmsg_ns_msg *msg = data;
struct rpmsg_device *newch;
struct rpmsg_channel_info chinfo;
struct device *dev = rpdev->dev.parent;
int ret;
#if defined(CONFIG_DYNAMIC_DEBUG)
dynamic_hex_dump("NS announcement: ", DUMP_PREFIX_NONE, 16, 1,
data, len, true);
#endif
if (len != sizeof(*msg)) {
dev_err(dev, "malformed ns msg (%d)\n", len);
return -EINVAL;
}
/* don't trust the remote processor for null terminating the name */
msg->name[RPMSG_NAME_SIZE - 1] = '\0';
strncpy(chinfo.name, msg->name, sizeof(chinfo.name));
chinfo.src = RPMSG_ADDR_ANY;
chinfo.dst = rpmsg32_to_cpu(rpdev, msg->addr);
dev_info(dev, "%sing channel %s addr 0x%x\n",
rpmsg32_to_cpu(rpdev, msg->flags) & RPMSG_NS_DESTROY ?
"destroy" : "creat", msg->name, chinfo.dst);
if (rpmsg32_to_cpu(rpdev, msg->flags) & RPMSG_NS_DESTROY) {
ret = rpmsg_release_channel(rpdev, &chinfo);
if (ret)
dev_err(dev, "rpmsg_destroy_channel failed: %d\n", ret);
} else {
newch = rpmsg_create_channel(rpdev, &chinfo);
if (!newch)
dev_err(dev, "rpmsg_create_channel failed\n");
}
return 0;
}
static int rpmsg_ns_probe(struct rpmsg_device *rpdev)
{
struct rpmsg_endpoint *ns_ept;
struct rpmsg_channel_info ns_chinfo = {
.src = RPMSG_NS_ADDR,
.dst = RPMSG_NS_ADDR,
.name = "name_service",
};
/*
* Create the NS announcement service endpoint associated to the RPMsg
* device. The endpoint will be automatically destroyed when the RPMsg
* device will be deleted.
*/
ns_ept = rpmsg_create_ept(rpdev, rpmsg_ns_cb, NULL, ns_chinfo);
if (!ns_ept) {
dev_err(&rpdev->dev, "failed to create the ns ept\n");
return -ENOMEM;
}
rpdev->ept = ns_ept;
return 0;
}
static struct rpmsg_driver rpmsg_ns_driver = {
.drv.name = KBUILD_MODNAME,
.probe = rpmsg_ns_probe,
};
static int rpmsg_ns_init(void)
{
int ret;
ret = register_rpmsg_driver(&rpmsg_ns_driver);
if (ret < 0)
pr_err("%s: Failed to register rpmsg driver\n", __func__);
return ret;
}
postcore_initcall(rpmsg_ns_init);
static void rpmsg_ns_exit(void)
{
unregister_rpmsg_driver(&rpmsg_ns_driver);
}
module_exit(rpmsg_ns_exit);
MODULE_DESCRIPTION("Name service announcement rpmsg driver");
MODULE_AUTHOR("Arnaud Pouliquen <arnaud.pouliquen@st.com>");
MODULE_ALIAS("rpmsg:" KBUILD_MODNAME);
MODULE_LICENSE("GPL v2");

View File

@ -19,11 +19,12 @@
#include <linux/mutex.h>
#include <linux/of_device.h>
#include <linux/rpmsg.h>
#include <linux/rpmsg/byteorder.h>
#include <linux/rpmsg/ns.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/virtio.h>
#include <linux/virtio_byteorder.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
#include <linux/wait.h>
@ -48,7 +49,6 @@
* @endpoints_lock: lock of the endpoints set
* @sendq: wait queue of sending contexts waiting for a tx buffers
* @sleepers: number of senders that are waiting for a tx buffer
* @ns_ept: the bus's name service endpoint
*
* This structure stores the rpmsg state of a given virtio remote processor
* device (there might be several virtio proc devices for each physical
@ -67,7 +67,6 @@ struct virtproc_info {
struct mutex endpoints_lock;
wait_queue_head_t sendq;
atomic_t sleepers;
struct rpmsg_endpoint *ns_ept;
};
/* The feature bitmap for virtio rpmsg */
@ -85,42 +84,14 @@ struct virtproc_info {
* Every message sent(/received) on the rpmsg bus begins with this header.
*/
struct rpmsg_hdr {
__virtio32 src;
__virtio32 dst;
__virtio32 reserved;
__virtio16 len;
__virtio16 flags;
__rpmsg32 src;
__rpmsg32 dst;
__rpmsg32 reserved;
__rpmsg16 len;
__rpmsg16 flags;
u8 data[];
} __packed;
/**
* struct rpmsg_ns_msg - dynamic name service announcement message
* @name: name of remote service that is published
* @addr: address of remote service that is published
* @flags: indicates whether service is created or destroyed
*
* This message is sent across to publish a new service, or announce
* about its removal. When we receive these messages, an appropriate
* rpmsg channel (i.e device) is created/destroyed. In turn, the ->probe()
* or ->remove() handler of the appropriate rpmsg driver will be invoked
* (if/as-soon-as one is registered).
*/
struct rpmsg_ns_msg {
char name[RPMSG_NAME_SIZE];
__virtio32 addr;
__virtio32 flags;
} __packed;
/**
* enum rpmsg_ns_flags - dynamic name service announcement flags
*
* @RPMSG_NS_CREATE: a new remote service was just created
* @RPMSG_NS_DESTROY: a known remote service was just destroyed
*/
enum rpmsg_ns_flags {
RPMSG_NS_CREATE = 0,
RPMSG_NS_DESTROY = 1,
};
/**
* struct virtio_rpmsg_channel - rpmsg channel descriptor
@ -167,9 +138,6 @@ struct virtio_rpmsg_channel {
*/
#define RPMSG_RESERVED_ADDRESSES (1024)
/* Address 53 is reserved for advertising remote services */
#define RPMSG_NS_ADDR (53)
static void virtio_rpmsg_destroy_ept(struct rpmsg_endpoint *ept);
static int virtio_rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len);
static int virtio_rpmsg_sendto(struct rpmsg_endpoint *ept, void *data, int len,
@ -181,6 +149,8 @@ static int virtio_rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data,
int len, u32 dst);
static int virtio_rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src,
u32 dst, void *data, int len);
static struct rpmsg_device *__rpmsg_create_channel(struct virtproc_info *vrp,
struct rpmsg_channel_info *chinfo);
static const struct rpmsg_endpoint_ops virtio_endpoint_ops = {
.destroy_ept = virtio_rpmsg_destroy_ept,
@ -285,6 +255,24 @@ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp,
return NULL;
}
static struct rpmsg_device *virtio_rpmsg_create_channel(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo)
{
struct virtio_rpmsg_channel *vch = to_virtio_rpmsg_channel(rpdev);
struct virtproc_info *vrp = vch->vrp;
return __rpmsg_create_channel(vrp, chinfo);
}
static int virtio_rpmsg_release_channel(struct rpmsg_device *rpdev,
struct rpmsg_channel_info *chinfo)
{
struct virtio_rpmsg_channel *vch = to_virtio_rpmsg_channel(rpdev);
struct virtproc_info *vrp = vch->vrp;
return rpmsg_unregister_device(&vrp->vdev->dev, chinfo);
}
static struct rpmsg_endpoint *virtio_rpmsg_create_ept(struct rpmsg_device *rpdev,
rpmsg_rx_cb_t cb,
void *priv,
@ -341,8 +329,8 @@ static int virtio_rpmsg_announce_create(struct rpmsg_device *rpdev)
struct rpmsg_ns_msg nsm;
strncpy(nsm.name, rpdev->id.name, RPMSG_NAME_SIZE);
nsm.addr = cpu_to_virtio32(vrp->vdev, rpdev->ept->addr);
nsm.flags = cpu_to_virtio32(vrp->vdev, RPMSG_NS_CREATE);
nsm.addr = cpu_to_rpmsg32(rpdev, rpdev->ept->addr);
nsm.flags = cpu_to_rpmsg32(rpdev, RPMSG_NS_CREATE);
err = rpmsg_sendto(rpdev->ept, &nsm, sizeof(nsm), RPMSG_NS_ADDR);
if (err)
@ -365,8 +353,8 @@ static int virtio_rpmsg_announce_destroy(struct rpmsg_device *rpdev)
struct rpmsg_ns_msg nsm;
strncpy(nsm.name, rpdev->id.name, RPMSG_NAME_SIZE);
nsm.addr = cpu_to_virtio32(vrp->vdev, rpdev->ept->addr);
nsm.flags = cpu_to_virtio32(vrp->vdev, RPMSG_NS_DESTROY);
nsm.addr = cpu_to_rpmsg32(rpdev, rpdev->ept->addr);
nsm.flags = cpu_to_rpmsg32(rpdev, RPMSG_NS_DESTROY);
err = rpmsg_sendto(rpdev->ept, &nsm, sizeof(nsm), RPMSG_NS_ADDR);
if (err)
@ -377,6 +365,8 @@ static int virtio_rpmsg_announce_destroy(struct rpmsg_device *rpdev)
}
static const struct rpmsg_device_ops virtio_rpmsg_ops = {
.create_channel = virtio_rpmsg_create_channel,
.release_channel = virtio_rpmsg_release_channel,
.create_ept = virtio_rpmsg_create_ept,
.announce_create = virtio_rpmsg_announce_create,
.announce_destroy = virtio_rpmsg_announce_destroy,
@ -395,8 +385,8 @@ static void virtio_rpmsg_release_device(struct device *dev)
* this function will be used to create both static and dynamic
* channels.
*/
static struct rpmsg_device *rpmsg_create_channel(struct virtproc_info *vrp,
struct rpmsg_channel_info *chinfo)
static struct rpmsg_device *__rpmsg_create_channel(struct virtproc_info *vrp,
struct rpmsg_channel_info *chinfo)
{
struct virtio_rpmsg_channel *vch;
struct rpmsg_device *rpdev;
@ -425,6 +415,7 @@ static struct rpmsg_device *rpmsg_create_channel(struct virtproc_info *vrp,
rpdev->src = chinfo->src;
rpdev->dst = chinfo->dst;
rpdev->ops = &virtio_rpmsg_ops;
rpdev->little_endian = virtio_is_little_endian(vrp->vdev);
/*
* rpmsg server channels has predefined local address (for now),
@ -618,10 +609,10 @@ static int rpmsg_send_offchannel_raw(struct rpmsg_device *rpdev,
}
}
msg->len = cpu_to_virtio16(vrp->vdev, len);
msg->len = cpu_to_rpmsg16(rpdev, len);
msg->flags = 0;
msg->src = cpu_to_virtio32(vrp->vdev, src);
msg->dst = cpu_to_virtio32(vrp->vdev, dst);
msg->src = cpu_to_rpmsg32(rpdev, src);
msg->dst = cpu_to_rpmsg32(rpdev, dst);
msg->reserved = 0;
memcpy(msg->data, data, len);
@ -710,14 +701,15 @@ static int rpmsg_recv_single(struct virtproc_info *vrp, struct device *dev,
{
struct rpmsg_endpoint *ept;
struct scatterlist sg;
unsigned int msg_len = virtio16_to_cpu(vrp->vdev, msg->len);
bool little_endian = virtio_is_little_endian(vrp->vdev);
unsigned int msg_len = __rpmsg16_to_cpu(little_endian, msg->len);
int err;
dev_dbg(dev, "From: 0x%x, To: 0x%x, Len: %d, Flags: %d, Reserved: %d\n",
virtio32_to_cpu(vrp->vdev, msg->src),
virtio32_to_cpu(vrp->vdev, msg->dst), msg_len,
virtio16_to_cpu(vrp->vdev, msg->flags),
virtio32_to_cpu(vrp->vdev, msg->reserved));
__rpmsg32_to_cpu(little_endian, msg->src),
__rpmsg32_to_cpu(little_endian, msg->dst), msg_len,
__rpmsg16_to_cpu(little_endian, msg->flags),
__rpmsg32_to_cpu(little_endian, msg->reserved));
#if defined(CONFIG_DYNAMIC_DEBUG)
dynamic_hex_dump("rpmsg_virtio RX: ", DUMP_PREFIX_NONE, 16, 1,
msg, sizeof(*msg) + msg_len, true);
@ -736,7 +728,7 @@ static int rpmsg_recv_single(struct virtproc_info *vrp, struct device *dev,
/* use the dst addr to fetch the callback of the appropriate user */
mutex_lock(&vrp->endpoints_lock);
ept = idr_find(&vrp->endpoints, virtio32_to_cpu(vrp->vdev, msg->dst));
ept = idr_find(&vrp->endpoints, __rpmsg32_to_cpu(little_endian, msg->dst));
/* let's make sure no one deallocates ept while we use it */
if (ept)
@ -750,7 +742,7 @@ static int rpmsg_recv_single(struct virtproc_info *vrp, struct device *dev,
if (ept->cb)
ept->cb(ept->rpdev, msg->data, msg_len, ept->priv,
virtio32_to_cpu(vrp->vdev, msg->src));
__rpmsg32_to_cpu(little_endian, msg->src));
mutex_unlock(&ept->cb_lock);
@ -821,68 +813,14 @@ static void rpmsg_xmit_done(struct virtqueue *svq)
wake_up_interruptible(&vrp->sendq);
}
/* invoked when a name service announcement arrives */
static int rpmsg_ns_cb(struct rpmsg_device *rpdev, void *data, int len,
void *priv, u32 src)
{
struct rpmsg_ns_msg *msg = data;
struct rpmsg_device *newch;
struct rpmsg_channel_info chinfo;
struct virtproc_info *vrp = priv;
struct device *dev = &vrp->vdev->dev;
int ret;
#if defined(CONFIG_DYNAMIC_DEBUG)
dynamic_hex_dump("NS announcement: ", DUMP_PREFIX_NONE, 16, 1,
data, len, true);
#endif
if (len != sizeof(*msg)) {
dev_err(dev, "malformed ns msg (%d)\n", len);
return -EINVAL;
}
/*
* the name service ept does _not_ belong to a real rpmsg channel,
* and is handled by the rpmsg bus itself.
* for sanity reasons, make sure a valid rpdev has _not_ sneaked
* in somehow.
*/
if (rpdev) {
dev_err(dev, "anomaly: ns ept has an rpdev handle\n");
return -EINVAL;
}
/* don't trust the remote processor for null terminating the name */
msg->name[RPMSG_NAME_SIZE - 1] = '\0';
strncpy(chinfo.name, msg->name, sizeof(chinfo.name));
chinfo.src = RPMSG_ADDR_ANY;
chinfo.dst = virtio32_to_cpu(vrp->vdev, msg->addr);
dev_info(dev, "%sing channel %s addr 0x%x\n",
virtio32_to_cpu(vrp->vdev, msg->flags) & RPMSG_NS_DESTROY ?
"destroy" : "creat", msg->name, chinfo.dst);
if (virtio32_to_cpu(vrp->vdev, msg->flags) & RPMSG_NS_DESTROY) {
ret = rpmsg_unregister_device(&vrp->vdev->dev, &chinfo);
if (ret)
dev_err(dev, "rpmsg_destroy_channel failed: %d\n", ret);
} else {
newch = rpmsg_create_channel(vrp, &chinfo);
if (!newch)
dev_err(dev, "rpmsg_create_channel failed\n");
}
return 0;
}
static int rpmsg_probe(struct virtio_device *vdev)
{
vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done };
static const char * const names[] = { "input", "output" };
struct virtqueue *vqs[2];
struct virtproc_info *vrp;
struct virtio_rpmsg_channel *vch;
struct rpmsg_device *rpdev_ns;
void *bufs_va;
int err = 0, i;
size_t total_buf_space;
@ -958,14 +896,26 @@ static int rpmsg_probe(struct virtio_device *vdev)
/* if supported by the remote processor, enable the name service */
if (virtio_has_feature(vdev, VIRTIO_RPMSG_F_NS)) {
/* a dedicated endpoint handles the name service msgs */
vrp->ns_ept = __rpmsg_create_ept(vrp, NULL, rpmsg_ns_cb,
vrp, RPMSG_NS_ADDR);
if (!vrp->ns_ept) {
dev_err(&vdev->dev, "failed to create the ns ept\n");
vch = kzalloc(sizeof(*vch), GFP_KERNEL);
if (!vch) {
err = -ENOMEM;
goto free_coherent;
}
/* Link the channel to our vrp */
vch->vrp = vrp;
/* Assign public information to the rpmsg_device */
rpdev_ns = &vch->rpdev;
rpdev_ns->ops = &virtio_rpmsg_ops;
rpdev_ns->little_endian = virtio_is_little_endian(vrp->vdev);
rpdev_ns->dev.parent = &vrp->vdev->dev;
rpdev_ns->dev.release = virtio_rpmsg_release_device;
err = rpmsg_ns_register_device(rpdev_ns);
if (err)
goto free_coherent;
}
/*
@ -990,6 +940,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
return 0;
free_coherent:
kfree(vch);
dma_free_coherent(vdev->dev.parent, total_buf_space,
bufs_va, vrp->bufs_dma);
vqs_del:
@ -1018,9 +969,6 @@ static void rpmsg_remove(struct virtio_device *vdev)
if (ret)
dev_warn(&vdev->dev, "can't remove rpmsg device: %d\n", ret);
if (vrp->ns_ept)
__rpmsg_destroy_ept(vrp, vrp->ns_ept);
idr_destroy(&vrp->endpoints);
vdev->config->del_vqs(vrp->vdev);

View File

@ -3,7 +3,7 @@
obj-$(CONFIG_BTRFS_FS) := btrfs.o
btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
file-item.o inode-item.o inode-map.o disk-io.o \
file-item.o inode-item.o disk-io.o \
transaction.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
@ -16,6 +16,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \

View File

@ -783,8 +783,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
BUG_ON(ref->key_for_search.type);
BUG_ON(!ref->wanted_disk_byte);
eb = read_tree_block(fs_info, ref->wanted_disk_byte, 0,
ref->level - 1, NULL);
eb = read_tree_block(fs_info, ref->wanted_disk_byte,
ref->root_id, 0, ref->level - 1, NULL);
if (IS_ERR(eb)) {
free_pref(ref);
return PTR_ERR(eb);
@ -1331,7 +1331,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct extent_buffer *eb;
eb = read_tree_block(fs_info, ref->parent, 0,
ref->level, NULL);
0, ref->level, NULL);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
@ -1341,14 +1341,12 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
goto out;
}
if (!path->skip_locking) {
if (!path->skip_locking)
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_read(eb);
}
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie, ignore_offset);
if (!path->skip_locking)
btrfs_tree_read_unlock_blocking(eb);
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
if (ret < 0)
goto out;
@ -1671,13 +1669,11 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
s64 bytes_left = ((s64)size) - 1;
struct extent_buffer *eb = eb_in;
struct btrfs_key found_key;
int leave_spinning = path->leave_spinning;
struct btrfs_inode_ref *iref;
if (bytes_left >= 0)
dest[bytes_left] = '\0';
path->leave_spinning = 1;
while (1) {
bytes_left -= name_len;
if (bytes_left >= 0)
@ -1685,7 +1681,7 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
name_off, name_len);
if (eb != eb_in) {
if (!path->skip_locking)
btrfs_tree_read_unlock_blocking(eb);
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
}
ret = btrfs_find_item(fs_root, path, parent, 0,
@ -1705,8 +1701,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
eb = path->nodes[0];
/* make sure we can use eb after releasing the path */
if (eb != eb_in) {
if (!path->skip_locking)
btrfs_set_lock_blocking_read(eb);
path->nodes[0] = NULL;
path->locks[0] = 0;
}
@ -1723,7 +1717,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
}
btrfs_release_path(path);
path->leave_spinning = leave_spinning;
if (ret)
return ERR_PTR(ret);

View File

@ -424,6 +424,23 @@ int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
return ret;
}
static bool space_cache_v1_done(struct btrfs_block_group *cache)
{
bool ret;
spin_lock(&cache->lock);
ret = cache->cached != BTRFS_CACHE_FAST;
spin_unlock(&cache->lock);
return ret;
}
void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
struct btrfs_caching_control *caching_ctl)
{
wait_event(caching_ctl->wait, space_cache_v1_done(cache));
}
#ifdef CONFIG_BTRFS_DEBUG
static void fragment_free_space(struct btrfs_block_group *block_group)
{
@ -639,11 +656,28 @@ static noinline void caching_thread(struct btrfs_work *work)
mutex_lock(&caching_ctl->mutex);
down_read(&fs_info->commit_root_sem);
if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
ret = load_free_space_cache(block_group);
if (ret == 1) {
ret = 0;
goto done;
}
/*
* We failed to load the space cache, set ourselves to
* CACHE_STARTED and carry on.
*/
spin_lock(&block_group->lock);
block_group->cached = BTRFS_CACHE_STARTED;
spin_unlock(&block_group->lock);
wake_up(&caching_ctl->wait);
}
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
ret = load_free_space_tree(caching_ctl);
else
ret = load_extent_tree_free(caching_ctl);
done:
spin_lock(&block_group->lock);
block_group->caching_ctl = NULL;
block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
@ -679,7 +713,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
{
DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl;
struct btrfs_caching_control *caching_ctl = NULL;
int ret = 0;
caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
@ -691,119 +725,41 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
caching_ctl->progress = cache->start;
refcount_set(&caching_ctl->count, 1);
refcount_set(&caching_ctl->count, 2);
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
spin_lock(&cache->lock);
/*
* This should be a rare occasion, but this could happen I think in the
* case where one thread starts to load the space cache info, and then
* some other thread starts a transaction commit which tries to do an
* allocation while the other thread is still loading the space cache
* info. The previous loop should have kept us from choosing this block
* group, but if we've moved to the state where we will wait on caching
* block groups we need to first check if we're doing a fast load here,
* so we can wait for it to finish, otherwise we could end up allocating
* from a block group who's cache gets evicted for one reason or
* another.
*/
while (cache->cached == BTRFS_CACHE_FAST) {
struct btrfs_caching_control *ctl;
ctl = cache->caching_ctl;
refcount_inc(&ctl->count);
prepare_to_wait(&ctl->wait, &wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&cache->lock);
schedule();
finish_wait(&ctl->wait, &wait);
btrfs_put_caching_control(ctl);
spin_lock(&cache->lock);
}
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
kfree(caching_ctl);
return 0;
caching_ctl = cache->caching_ctl;
if (caching_ctl)
refcount_inc(&caching_ctl->count);
spin_unlock(&cache->lock);
goto out;
}
WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_FAST;
if (btrfs_test_opt(fs_info, SPACE_CACHE))
cache->cached = BTRFS_CACHE_FAST;
else
cache->cached = BTRFS_CACHE_STARTED;
cache->has_caching_ctl = 1;
spin_unlock(&cache->lock);
if (btrfs_test_opt(fs_info, SPACE_CACHE)) {
mutex_lock(&caching_ctl->mutex);
ret = load_free_space_cache(cache);
spin_lock(&cache->lock);
if (ret == 1) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_FINISHED;
cache->last_byte_to_unpin = (u64)-1;
caching_ctl->progress = (u64)-1;
} else {
if (load_cache_only) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_NO;
} else {
cache->cached = BTRFS_CACHE_STARTED;
cache->has_caching_ctl = 1;
}
}
spin_unlock(&cache->lock);
#ifdef CONFIG_BTRFS_DEBUG
if (ret == 1 &&
btrfs_should_fragment_free_space(cache)) {
u64 bytes_used;
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
bytes_used = cache->length - cache->used;
cache->space_info->bytes_used += bytes_used >> 1;
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
fragment_free_space(cache);
}
#endif
mutex_unlock(&caching_ctl->mutex);
wake_up(&caching_ctl->wait);
if (ret == 1) {
btrfs_put_caching_control(caching_ctl);
btrfs_free_excluded_extents(cache);
return 0;
}
} else {
/*
* We're either using the free space tree or no caching at all.
* Set cached to the appropriate value and wakeup any waiters.
*/
spin_lock(&cache->lock);
if (load_cache_only) {
cache->caching_ctl = NULL;
cache->cached = BTRFS_CACHE_NO;
} else {
cache->cached = BTRFS_CACHE_STARTED;
cache->has_caching_ctl = 1;
}
spin_unlock(&cache->lock);
wake_up(&caching_ctl->wait);
}
if (load_cache_only) {
btrfs_put_caching_control(caching_ctl);
return 0;
}
down_write(&fs_info->commit_root_sem);
spin_lock(&fs_info->block_group_cache_lock);
refcount_inc(&caching_ctl->count);
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->commit_root_sem);
spin_unlock(&fs_info->block_group_cache_lock);
btrfs_get_block_group(cache);
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
out:
if (load_cache_only && caching_ctl)
btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
if (caching_ctl)
btrfs_put_caching_control(caching_ctl);
return ret;
}
@ -892,8 +848,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_block_group *block_group;
struct btrfs_free_cluster *cluster;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_key key;
struct inode *inode;
struct kobject *kobj = NULL;
int ret;
@ -971,42 +925,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&trans->transaction->dirty_bgs_lock);
mutex_unlock(&trans->transaction->cache_write_mutex);
if (!IS_ERR(inode)) {
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
goto out;
}
clear_nlink(inode);
/* One for the block groups ref */
spin_lock(&block_group->lock);
if (block_group->iref) {
block_group->iref = 0;
block_group->inode = NULL;
spin_unlock(&block_group->lock);
iput(inode);
} else {
spin_unlock(&block_group->lock);
}
/* One for our lookup ref */
btrfs_add_delayed_iput(inode);
}
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.type = 0;
key.offset = block_group->start;
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
ret = btrfs_remove_free_space_inode(trans, inode, block_group);
if (ret)
goto out;
if (ret > 0)
btrfs_release_path(path);
if (ret == 0) {
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
goto out;
btrfs_release_path(path);
}
spin_lock(&fs_info->block_group_cache_lock);
rb_erase(&block_group->cache_node,
@ -1043,7 +964,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
if (block_group->cached == BTRFS_CACHE_STARTED)
btrfs_wait_block_group_cache_done(block_group);
if (block_group->has_caching_ctl) {
down_write(&fs_info->commit_root_sem);
spin_lock(&fs_info->block_group_cache_lock);
if (!caching_ctl) {
struct btrfs_caching_control *ctl;
@ -1057,7 +978,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
}
if (caching_ctl)
list_del_init(&caching_ctl->list);
up_write(&fs_info->commit_root_sem);
spin_unlock(&fs_info->block_group_cache_lock);
if (caching_ctl) {
/* Once for the caching bgs list and once for us. */
btrfs_put_caching_control(caching_ctl);
@ -1723,6 +1644,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
static int exclude_super_stripes(struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
const bool zoned = btrfs_is_zoned(fs_info);
u64 bytenr;
u64 *logical;
int stripe_len;
@ -1744,6 +1666,14 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
if (ret)
return ret;
/* Shouldn't have super stripes in sequential zones */
if (zoned && nr) {
btrfs_err(fs_info,
"zoned: block group %llu must not contain super block",
cache->start);
return -EUCLEAN;
}
while (nr--) {
u64 len = min_t(u64, stripe_len,
cache->start + cache->length - logical[nr]);
@ -1805,7 +1735,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
INIT_LIST_HEAD(&cache->discard_list);
INIT_LIST_HEAD(&cache->dirty_list);
INIT_LIST_HEAD(&cache->io_list);
btrfs_init_free_space_ctl(cache);
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock);
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
@ -1985,6 +1915,51 @@ static int read_one_block_group(struct btrfs_fs_info *info,
return ret;
}
static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
{
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
struct btrfs_space_info *space_info;
struct rb_node *node;
int ret = 0;
for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
struct extent_map *em;
struct map_lookup *map;
struct btrfs_block_group *bg;
em = rb_entry(node, struct extent_map, rb_node);
map = em->map_lookup;
bg = btrfs_create_block_group_cache(fs_info, em->start);
if (!bg) {
ret = -ENOMEM;
break;
}
/* Fill dummy cache as FULL */
bg->length = em->len;
bg->flags = map->type;
bg->last_byte_to_unpin = (u64)-1;
bg->cached = BTRFS_CACHE_FINISHED;
bg->used = em->len;
bg->flags = map->type;
ret = btrfs_add_block_group_cache(fs_info, bg);
if (ret) {
btrfs_remove_free_space_cache(bg);
btrfs_put_block_group(bg);
break;
}
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
0, &space_info);
bg->space_info = space_info;
link_block_group(bg);
set_avail_alloc_bits(fs_info, bg->flags);
}
if (!ret)
btrfs_init_global_block_rsv(fs_info);
return ret;
}
int btrfs_read_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_path *path;
@ -1995,6 +1970,9 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
int need_clear = 0;
u64 cache_gen;
if (!info->extent_root)
return fill_dummy_bgs(info);
key.objectid = 0;
key.offset = 0;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
@ -2152,7 +2130,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
cache->needs_free_space = 1;
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
cache->needs_free_space = 1;
ret = exclude_super_stripes(cache);
if (ret) {
/* We may have excluded something, so call this just in case */
@ -2361,6 +2340,9 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
int retries = 0;
int ret = 0;
if (!btrfs_test_opt(fs_info, SPACE_CACHE))
return 0;
/*
* If this block group is smaller than 100 megs don't bother caching the
* block group.
@ -2401,7 +2383,7 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
* time.
*/
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret) {
/*
* So theoretically we could recover from this, simply set the
@ -3307,14 +3289,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
struct btrfs_caching_control *caching_ctl;
struct rb_node *n;
down_write(&info->commit_root_sem);
spin_lock(&info->block_group_cache_lock);
while (!list_empty(&info->caching_block_groups)) {
caching_ctl = list_entry(info->caching_block_groups.next,
struct btrfs_caching_control, list);
list_del(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
}
up_write(&info->commit_root_sem);
spin_unlock(&info->block_group_cache_lock);
spin_lock(&info->unused_bgs_lock);
while (!list_empty(&info->unused_bgs)) {

View File

@ -268,6 +268,8 @@ void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
struct btrfs_caching_control *caching_ctl);
static inline u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
{

View File

@ -426,6 +426,14 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->delayed_block_rsv.space_info = space_info;
fs_info->delayed_refs_rsv.space_info = space_info;
/*
* Our various recovery options can leave us with NULL roots, so check
* here and just bail before we go dereferencing NULLs everywhere.
*/
if (!fs_info->extent_root || !fs_info->csum_root ||
!fs_info->dev_root || !fs_info->chunk_root || !fs_info->tree_root)
return;
fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;

View File

@ -35,6 +35,13 @@ enum {
BTRFS_INODE_IN_DELALLOC_LIST,
BTRFS_INODE_HAS_PROPS,
BTRFS_INODE_SNAPSHOT_FLUSH,
/*
* Set and used when logging an inode and it serves to signal that an
* inode does not have xattrs, so subsequent fsyncs can avoid searching
* for xattrs to log. This bit must be cleared whenever a xattr is added
* to an inode.
*/
BTRFS_INODE_NO_XATTRS,
};
/* in memory btrfs inode */
@ -50,7 +57,8 @@ struct btrfs_inode {
/*
* Lock for counters and all fields used to determine if the inode is in
* the log or not (last_trans, last_sub_trans, last_log_commit,
* logged_trans).
* logged_trans), to access/update new_delalloc_bytes and to update the
* VFS' inode number of bytes used.
*/
spinlock_t lock;
@ -203,16 +211,6 @@ struct btrfs_inode {
/* Hook into fs_info->delayed_iputs */
struct list_head delayed_iput;
/*
* To avoid races between lockless (i_mutex not held) direct IO writes
* and concurrent fsync requests. Direct IO writes must acquire read
* access on this semaphore for creating an extent map and its
* corresponding ordered extent. The fast fsync path must acquire write
* access on this semaphore before it collects ordered extents and
* extent maps.
*/
struct rw_semaphore dio_sem;
struct inode vfs_inode;
};
@ -341,8 +339,7 @@ static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
{
struct btrfs_root *root = inode->root;
struct btrfs_super_block *sb = root->fs_info->super_copy;
const u16 csum_size = btrfs_super_csum_size(sb);
const u32 csum_size = root->fs_info->csum_size;
/* Output minus objectid, which is more meaningful */
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)

View File

@ -233,7 +233,6 @@ struct btrfsic_stack_frame {
struct btrfsic_state {
u32 print_mask;
int include_extent_data;
int csum_size;
struct list_head all_blocks_list;
struct btrfsic_block_hashtable block_hashtable;
struct btrfsic_block_link_hashtable block_link_hashtable;
@ -660,8 +659,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
return -1;
}
state->csum_size = btrfs_super_csum_size(selected_super);
for (pass = 0; pass < 3; pass++) {
int num_copies;
int mirror_num;
@ -954,7 +951,7 @@ static noinline_for_stack int btrfsic_process_metablock(
sf->prev = NULL;
continue_with_new_stack_frame:
sf->block->generation = le64_to_cpu(sf->hdr->generation);
sf->block->generation = btrfs_stack_header_generation(sf->hdr);
if (0 == sf->hdr->level) {
struct btrfs_leaf *const leafhdr =
(struct btrfs_leaf *)sf->hdr;
@ -1723,7 +1720,7 @@ static noinline_for_stack int btrfsic_test_for_metadata(
crypto_shash_update(shash, data, sublen);
}
crypto_shash_final(shash, csum);
if (memcmp(csum, h->csum, state->csum_size))
if (memcmp(csum, h->csum, fs_info->csum_size))
return 1;
return 0; /* is metadata */
@ -2695,8 +2692,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_disk=%p)\n",
bio_op(bio), bio->bi_opf, segs,
(unsigned long long)bio->bi_iter.bi_sector,
dev_bytenr, bio->bi_disk);
bio->bi_iter.bi_sector, dev_bytenr, bio->bi_disk);
mapped_datav = kmalloc_array(segs,
sizeof(*mapped_datav), GFP_NOFS);
@ -2797,7 +2793,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
state->fs_info = fs_info;
state->print_mask = print_mask;
state->include_extent_data = including_extent_data;
state->csum_size = 0;
state->metablock_size = fs_info->nodesize;
state->datablock_size = fs_info->sectorsize;
INIT_LIST_HEAD(&state->all_blocks_list);

View File

@ -131,10 +131,8 @@ static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
unsigned long disk_size)
{
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
return sizeof(struct compressed_bio) +
(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * csum_size;
(DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * fs_info->csum_size;
}
static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
@ -142,7 +140,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
const u32 csum_size = fs_info->csum_size;
struct page *page;
unsigned long i;
char *kaddr;
@ -150,7 +148,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
struct compressed_bio *cb = bio->bi_private;
u8 *cb_sum = cb->sums;
if (inode->flags & BTRFS_INODE_NODATASUM)
if (!fs_info->csum_root || (inode->flags & BTRFS_INODE_NODATASUM))
return 0;
shash->tfm = fs_info->csum_shash;
@ -220,7 +218,7 @@ static void end_compressed_bio_read(struct bio *bio)
inode = cb->inode;
ret = check_compressed_csum(BTRFS_I(inode), bio,
(u64)bio->bi_iter.bi_sector << 9);
bio->bi_iter.bi_sector << 9);
if (ret)
goto csum_failed;
@ -622,13 +620,12 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
unsigned long pg_index;
struct page *page;
struct bio *comp_bio;
u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
u64 em_len;
u64 em_start;
struct extent_map *em;
blk_status_t ret = BLK_STS_RESOURCE;
int faili = 0;
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
u8 *sums;
em_tree = &BTRFS_I(inode)->extent_tree;
@ -722,15 +719,12 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
*/
refcount_inc(&cb->pending_bios);
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_lookup_bio_sums(inode, comp_bio,
(u64)-1, sums);
BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
BUG_ON(ret); /* -ENOMEM */
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
fs_info->sectorsize);
sums += csum_size * nr_sectors;
sums += fs_info->csum_size * nr_sectors;
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
if (ret) {
@ -751,10 +745,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
BUG_ON(ret); /* -ENOMEM */
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
ret = btrfs_lookup_bio_sums(inode, comp_bio, (u64)-1, sums);
BUG_ON(ret); /* -ENOMEM */
}
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
if (ret) {

View File

@ -1278,14 +1278,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
if (!tm)
return eb;
btrfs_set_path_blocking(path);
btrfs_set_lock_blocking_read(eb);
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
BUG_ON(tm->slot != 0);
eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
if (!eb_rewin) {
btrfs_tree_read_unlock_blocking(eb);
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
return NULL;
}
@ -1297,13 +1294,13 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
} else {
eb_rewin = btrfs_clone_extent_buffer(eb);
if (!eb_rewin) {
btrfs_tree_read_unlock_blocking(eb);
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
return NULL;
}
}
btrfs_tree_read_unlock_blocking(eb);
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
@ -1356,7 +1353,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
old = read_tree_block(fs_info, logical, 0, level, NULL);
old = read_tree_block(fs_info, logical, root->root_key.objectid,
0, level, NULL);
if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
if (!IS_ERR(old))
free_extent_buffer(old);
@ -1373,9 +1371,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
free_extent_buffer(eb_root);
eb = alloc_dummy_extent_buffer(fs_info, logical);
} else {
btrfs_set_lock_blocking_read(eb_root);
eb = btrfs_clone_extent_buffer(eb_root);
btrfs_tree_read_unlock_blocking(eb_root);
btrfs_tree_read_unlock(eb_root);
free_extent_buffer(eb_root);
}
@ -1483,10 +1480,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
search_start = buf->start & ~((u64)SZ_1G - 1);
if (parent)
btrfs_set_lock_blocking_write(parent);
btrfs_set_lock_blocking_write(buf);
/*
* Before CoWing this block for later modification, check if it's
* the subtree root and do the delayed subtree trace if needed.
@ -1578,7 +1571,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *cur;
u64 blocknr;
u64 gen;
u64 search_start = *last_ret;
u64 last_block = 0;
u64 other;
@ -1586,14 +1578,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
int end_slot;
int i;
int err = 0;
int parent_level;
int uptodate;
u32 blocksize;
int progress_passed = 0;
struct btrfs_disk_key disk_key;
parent_level = btrfs_header_level(parent);
WARN_ON(trans->transaction != fs_info->running_transaction);
WARN_ON(trans->transid != fs_info->generation);
@ -1604,10 +1592,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
if (parent_nritems <= 1)
return 0;
btrfs_set_lock_blocking_write(parent);
for (i = start_slot; i <= end_slot; i++) {
struct btrfs_key first_key;
int close = 1;
btrfs_node_key(parent, &disk_key, i);
@ -1616,8 +1601,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
progress_passed = 1;
blocknr = btrfs_node_blockptr(parent, i);
gen = btrfs_node_ptr_generation(parent, i);
btrfs_node_key_to_cpu(parent, &first_key, i);
if (last_block == 0)
last_block = blocknr;
@ -1634,36 +1617,13 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
continue;
}
cur = find_extent_buffer(fs_info, blocknr);
if (cur)
uptodate = btrfs_buffer_uptodate(cur, gen, 0);
else
uptodate = 0;
if (!cur || !uptodate) {
if (!cur) {
cur = read_tree_block(fs_info, blocknr, gen,
parent_level - 1,
&first_key);
if (IS_ERR(cur)) {
return PTR_ERR(cur);
} else if (!extent_buffer_uptodate(cur)) {
free_extent_buffer(cur);
return -EIO;
}
} else if (!uptodate) {
err = btrfs_read_buffer(cur, gen,
parent_level - 1,&first_key);
if (err) {
free_extent_buffer(cur);
return err;
}
}
}
cur = btrfs_read_node_slot(parent, i);
if (IS_ERR(cur))
return PTR_ERR(cur);
if (search_start == 0)
search_start = last_block;
btrfs_tree_lock(cur);
btrfs_set_lock_blocking_write(cur);
err = __btrfs_cow_block(trans, root, cur, parent, i,
&cur, search_start,
min(16 * blocksize,
@ -1723,9 +1683,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
oip = offset_in_page(offset);
if (oip + key_size <= PAGE_SIZE) {
const unsigned long idx = offset >> PAGE_SHIFT;
const unsigned long idx = get_eb_page_index(offset);
char *kaddr = page_address(eb->pages[idx]);
oip = get_eb_offset_in_page(eb, offset);
tmp = (struct btrfs_disk_key *)(kaddr + oip);
} else {
read_extent_buffer(eb, &unaligned, offset, key_size);
@ -1801,6 +1762,7 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
btrfs_node_key_to_cpu(parent, &first_key, slot);
eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
btrfs_header_owner(parent),
btrfs_node_ptr_generation(parent, slot),
level - 1, &first_key);
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
@ -1835,8 +1797,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
mid = path->nodes[level];
WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
WARN_ON(btrfs_header_generation(mid) != trans->transid);
orig_ptr = btrfs_node_blockptr(mid, orig_slot);
@ -1865,7 +1826,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
}
btrfs_tree_lock(child);
btrfs_set_lock_blocking_write(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
BTRFS_NESTING_COW);
if (ret) {
@ -1904,7 +1864,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (left) {
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
btrfs_set_lock_blocking_write(left);
wret = btrfs_cow_block(trans, root, left,
parent, pslot - 1, &left,
BTRFS_NESTING_LEFT_COW);
@ -1920,7 +1879,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (right) {
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
btrfs_set_lock_blocking_write(right);
wret = btrfs_cow_block(trans, root, right,
parent, pslot + 1, &right,
BTRFS_NESTING_RIGHT_COW);
@ -2084,7 +2042,6 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
u32 left_nr;
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
btrfs_set_lock_blocking_write(left);
left_nr = btrfs_header_nritems(left);
if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
@ -2139,7 +2096,6 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
u32 right_nr;
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
btrfs_set_lock_blocking_write(right);
right_nr = btrfs_header_nritems(right);
if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
@ -2243,7 +2199,7 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
search = btrfs_node_blockptr(node, nr);
if ((search <= target && target - search <= 65536) ||
(search > target && search - target <= 65536)) {
readahead_tree_block(fs_info, search);
btrfs_readahead_node_child(node, nr);
nread += blocksize;
}
nscan++;
@ -2252,16 +2208,11 @@ static void reada_for_search(struct btrfs_fs_info *fs_info,
}
}
static noinline void reada_for_balance(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, int level)
static noinline void reada_for_balance(struct btrfs_path *path, int level)
{
struct extent_buffer *parent;
int slot;
int nritems;
struct extent_buffer *parent;
struct extent_buffer *eb;
u64 gen;
u64 block1 = 0;
u64 block2 = 0;
parent = path->nodes[level + 1];
if (!parent)
@ -2270,32 +2221,10 @@ static noinline void reada_for_balance(struct btrfs_fs_info *fs_info,
nritems = btrfs_header_nritems(parent);
slot = path->slots[level + 1];
if (slot > 0) {
block1 = btrfs_node_blockptr(parent, slot - 1);
gen = btrfs_node_ptr_generation(parent, slot - 1);
eb = find_extent_buffer(fs_info, block1);
/*
* if we get -eagain from btrfs_buffer_uptodate, we
* don't want to return eagain here. That will loop
* forever
*/
if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
block1 = 0;
free_extent_buffer(eb);
}
if (slot + 1 < nritems) {
block2 = btrfs_node_blockptr(parent, slot + 1);
gen = btrfs_node_ptr_generation(parent, slot + 1);
eb = find_extent_buffer(fs_info, block2);
if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
block2 = 0;
free_extent_buffer(eb);
}
if (block1)
readahead_tree_block(fs_info, block1);
if (block2)
readahead_tree_block(fs_info, block2);
if (slot > 0)
btrfs_readahead_node_child(parent, slot - 1);
if (slot + 1 < nritems)
btrfs_readahead_node_child(parent, slot + 1);
}
@ -2399,14 +2328,6 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
return 0;
}
/* the pages were up to date, but we failed
* the generation number check. Do a full
* read for the generation number that is correct.
* We must do this without dropping locks so
* we can trust our generation number
*/
btrfs_set_path_blocking(p);
/* now we're allowed to do a blocking uptodate check */
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
if (!ret) {
@ -2426,14 +2347,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
* out which blocks to read.
*/
btrfs_unlock_up_safe(p, level + 1);
btrfs_set_path_blocking(p);
if (p->reada != READA_NONE)
reada_for_search(fs_info, p, level, slot, key->objectid);
ret = -EAGAIN;
tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
&first_key);
tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
gen, parent_level - 1, &first_key);
if (!IS_ERR(tmp)) {
/*
* If the read above didn't mark this buffer up to date,
@ -2468,58 +2388,42 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
int *write_lock_level)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
int ret = 0;
if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
int sret;
if (*write_lock_level < level + 1) {
*write_lock_level = level + 1;
btrfs_release_path(p);
goto again;
return -EAGAIN;
}
btrfs_set_path_blocking(p);
reada_for_balance(fs_info, p, level);
sret = split_node(trans, root, p, level);
reada_for_balance(p, level);
ret = split_node(trans, root, p, level);
BUG_ON(sret > 0);
if (sret) {
ret = sret;
goto done;
}
b = p->nodes[level];
} else if (ins_len < 0 && btrfs_header_nritems(b) <
BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
int sret;
if (*write_lock_level < level + 1) {
*write_lock_level = level + 1;
btrfs_release_path(p);
goto again;
return -EAGAIN;
}
btrfs_set_path_blocking(p);
reada_for_balance(fs_info, p, level);
sret = balance_level(trans, root, p, level);
reada_for_balance(p, level);
ret = balance_level(trans, root, p, level);
if (ret)
return ret;
if (sret) {
ret = sret;
goto done;
}
b = p->nodes[level];
if (!b) {
btrfs_release_path(p);
goto again;
return -EAGAIN;
}
BUG_ON(btrfs_header_nritems(b) == 1);
}
return 0;
again:
ret = -EAGAIN;
done:
return ret;
}
@ -2616,7 +2520,7 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
* We don't know the level of the root node until we actually
* have it read locked
*/
b = __btrfs_read_lock_root_node(root, p->recurse);
b = btrfs_read_lock_root_node(root);
level = btrfs_header_level(b);
if (level > write_lock_level)
goto out;
@ -2752,7 +2656,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
goto again;
}
btrfs_set_path_blocking(p);
if (last_level)
err = btrfs_cow_block(trans, root, b, NULL, 0,
&b,
@ -2822,7 +2725,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
goto again;
}
btrfs_set_path_blocking(p);
err = split_leaf(trans, root, key,
p, ins_len, ret == 0);
@ -2884,17 +2786,10 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (!p->skip_locking) {
level = btrfs_header_level(b);
if (level <= write_lock_level) {
if (!btrfs_try_tree_write_lock(b)) {
btrfs_set_path_blocking(p);
btrfs_tree_lock(b);
}
btrfs_tree_lock(b);
p->locks[level] = BTRFS_WRITE_LOCK;
} else {
if (!btrfs_tree_read_lock_atomic(b)) {
btrfs_set_path_blocking(p);
__btrfs_tree_read_lock(b, BTRFS_NESTING_NORMAL,
p->recurse);
}
btrfs_tree_read_lock(b);
p->locks[level] = BTRFS_READ_LOCK;
}
p->nodes[level] = b;
@ -2902,12 +2797,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
ret = 1;
done:
/*
* we don't really know what they plan on doing with the path
* from here on, so for now just mark it as blocking
*/
if (!p->leave_spinning)
btrfs_set_path_blocking(p);
if (ret < 0 && !p->skip_release_on_error)
btrfs_release_path(p);
return ret;
@ -2999,10 +2888,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
}
level = btrfs_header_level(b);
if (!btrfs_tree_read_lock_atomic(b)) {
btrfs_set_path_blocking(p);
btrfs_tree_read_lock(b);
}
btrfs_tree_read_lock(b);
b = tree_mod_log_rewind(fs_info, p, b, time_seq);
if (!b) {
ret = -ENOMEM;
@ -3013,8 +2899,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
}
ret = 1;
done:
if (!p->leave_spinning)
btrfs_set_path_blocking(p);
if (ret < 0)
btrfs_release_path(p);
@ -3441,7 +3325,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
add_root_to_dirty_list(root);
atomic_inc(&c->refs);
path->nodes[level] = c;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->locks[level] = BTRFS_WRITE_LOCK;
path->slots[level] = 0;
return 0;
}
@ -3562,7 +3446,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
(c_nritems - mid) * sizeof(struct btrfs_key_ptr));
btrfs_set_header_nritems(split, c_nritems - mid);
btrfs_set_header_nritems(c, mid);
ret = 0;
btrfs_mark_buffer_dirty(c);
btrfs_mark_buffer_dirty(split);
@ -3580,7 +3463,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(split);
free_extent_buffer(split);
}
return ret;
return 0;
}
/*
@ -3814,7 +3697,6 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
return 1;
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
btrfs_set_lock_blocking_write(right);
free_space = btrfs_leaf_free_space(right);
if (free_space < data_size)
@ -4053,7 +3935,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
return 1;
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
btrfs_set_lock_blocking_write(left);
free_space = btrfs_leaf_free_space(left);
if (free_space < data_size) {
@ -4448,7 +4329,6 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
goto err;
}
btrfs_set_path_blocking(path);
ret = split_leaf(trans, root, &key, path, ins_len, 1);
if (ret)
goto err;
@ -4478,8 +4358,6 @@ static noinline int split_item(struct btrfs_path *path,
leaf = path->nodes[0];
BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
btrfs_set_path_blocking(path);
item = btrfs_item_nr(path->slots[0]);
orig_offset = btrfs_item_offset(leaf, item);
item_size = btrfs_item_size(leaf, item);
@ -5055,7 +4933,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (leaf == root->node) {
btrfs_set_header_level(leaf, 0);
} else {
btrfs_set_path_blocking(path);
btrfs_clean_tree_block(leaf);
btrfs_del_leaf(trans, root, path, leaf);
}
@ -5077,7 +4954,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
slot = path->slots[1];
atomic_inc(&leaf->refs);
btrfs_set_path_blocking(path);
wret = push_leaf_left(trans, root, path, 1, 1,
1, (u32)-1);
if (wret < 0 && wret != -ENOSPC)
@ -5248,7 +5124,6 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
*/
if (slot >= nritems) {
path->slots[level] = slot;
btrfs_set_path_blocking(path);
sret = btrfs_find_next_key(root, path, min_key, level,
min_trans);
if (sret == 0) {
@ -5265,7 +5140,6 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
ret = 0;
goto out;
}
btrfs_set_path_blocking(path);
cur = btrfs_read_node_slot(cur, slot);
if (IS_ERR(cur)) {
ret = PTR_ERR(cur);
@ -5282,7 +5156,6 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
path->keep_locks = keep_locks;
if (ret == 0) {
btrfs_unlock_up_safe(path, path->lowest_level + 1);
btrfs_set_path_blocking(path);
memcpy(min_key, &found_key, sizeof(found_key));
}
return ret;
@ -5384,8 +5257,7 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key key;
u32 nritems;
int ret;
int old_spinning = path->leave_spinning;
int next_rw_lock = 0;
int i;
nritems = btrfs_header_nritems(path->nodes[0]);
if (nritems == 0)
@ -5395,11 +5267,9 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
again:
level = 1;
next = NULL;
next_rw_lock = 0;
btrfs_release_path(path);
path->keep_locks = 1;
path->leave_spinning = 1;
if (time_seq)
ret = btrfs_search_old_slot(root, &key, path, time_seq);
@ -5459,13 +5329,22 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
continue;
}
if (next) {
btrfs_tree_unlock_rw(next, next_rw_lock);
free_extent_buffer(next);
/*
* Our current level is where we're going to start from, and to
* make sure lockdep doesn't complain we need to drop our locks
* and nodes from 0 to our current level.
*/
for (i = 0; i < level; i++) {
if (path->locks[level]) {
btrfs_tree_read_unlock(path->nodes[i]);
path->locks[i] = 0;
}
free_extent_buffer(path->nodes[i]);
path->nodes[i] = NULL;
}
next = c;
next_rw_lock = path->locks[level];
ret = read_block_for_search(root, path, &next, level,
slot, &key);
if (ret == -EAGAIN)
@ -5491,28 +5370,18 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
cond_resched();
goto again;
}
if (!ret) {
btrfs_set_path_blocking(path);
__btrfs_tree_read_lock(next,
BTRFS_NESTING_RIGHT,
path->recurse);
}
next_rw_lock = BTRFS_READ_LOCK;
if (!ret)
btrfs_tree_read_lock(next);
}
break;
}
path->slots[level] = slot;
while (1) {
level--;
c = path->nodes[level];
if (path->locks[level])
btrfs_tree_unlock_rw(c, path->locks[level]);
free_extent_buffer(c);
path->nodes[level] = next;
path->slots[level] = 0;
if (!path->skip_locking)
path->locks[level] = next_rw_lock;
path->locks[level] = BTRFS_READ_LOCK;
if (!level)
break;
@ -5526,23 +5395,12 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
goto done;
}
if (!path->skip_locking) {
ret = btrfs_try_tree_read_lock(next);
if (!ret) {
btrfs_set_path_blocking(path);
__btrfs_tree_read_lock(next,
BTRFS_NESTING_RIGHT,
path->recurse);
}
next_rw_lock = BTRFS_READ_LOCK;
}
if (!path->skip_locking)
btrfs_tree_read_lock(next);
}
ret = 0;
done:
unlock_up(path, 0, 1, 0, NULL);
path->leave_spinning = old_spinning;
if (!old_spinning)
btrfs_set_path_blocking(path);
return ret;
}
@ -5564,7 +5422,6 @@ int btrfs_previous_item(struct btrfs_root *root,
while (1) {
if (path->slots[0] == 0) {
btrfs_set_path_blocking(path);
ret = btrfs_prev_leaf(root, path);
if (ret != 0)
return ret;
@ -5606,7 +5463,6 @@ int btrfs_previous_extent_item(struct btrfs_root *root,
while (1) {
if (path->slots[0] == 0) {
btrfs_set_path_blocking(path);
ret = btrfs_prev_leaf(root, path);
if (ret != 0)
return ret;

View File

@ -27,6 +27,7 @@
#include <linux/dynamic_debug.h>
#include <linux/refcount.h>
#include <linux/crc32c.h>
#include <linux/iomap.h>
#include "extent-io-tree.h"
#include "extent_io.h"
#include "extent_map.h"
@ -65,12 +66,6 @@ struct btrfs_ref;
#define BTRFS_OLDEST_GENERATION 0ULL
/*
* the max metadata block size. This limit is somewhat artificial,
* but the memmove costs go through the roof for larger blocks.
*/
#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
/*
* we can actually store much bigger names, but lets not confuse the rest
* of linux
@ -369,11 +364,9 @@ struct btrfs_path {
unsigned int search_for_split:1;
unsigned int keep_locks:1;
unsigned int skip_locking:1;
unsigned int leave_spinning:1;
unsigned int search_commit_root:1;
unsigned int need_commit_sem:1;
unsigned int skip_release_on_error:1;
unsigned int recurse:1;
};
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
sizeof(struct btrfs_item))
@ -468,10 +461,11 @@ struct btrfs_discard_ctl {
struct btrfs_block_group *block_group;
struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
u64 prev_discard;
u64 prev_discard_time;
atomic_t discardable_extents;
atomic64_t discardable_bytes;
u64 max_discard_size;
unsigned long delay;
u64 delay_ms;
u32 iops_limit;
u32 kbps_limit;
u64 discard_extent_bytes;
@ -558,6 +552,9 @@ enum {
/* Indicate that the discard workqueue can service discards. */
BTRFS_FS_DISCARD_RUNNING,
/* Indicate that we need to cleanup space cache v1 */
BTRFS_FS_CLEANUP_SPACE_CACHE_V1,
};
/*
@ -911,6 +908,7 @@ struct btrfs_fs_info {
/* Extent buffer radix tree */
spinlock_t buffer_lock;
/* Entries are eb->start / sectorsize */
struct radix_tree_root buffer_radix;
/* next backup root to be overwritten */
@ -933,6 +931,10 @@ struct btrfs_fs_info {
/* Cached block sizes */
u32 nodesize;
u32 sectorsize;
/* ilog2 of sectorsize, use to avoid 64bit division */
u32 sectorsize_bits;
u32 csum_size;
u32 csums_per_leaf;
u32 stripesize;
/* Block groups and devices containing active swapfiles. */
@ -950,6 +952,18 @@ struct btrfs_fs_info {
/* Type of exclusive operation running */
unsigned long exclusive_operation;
/*
* Zone size > 0 when in ZONED mode, otherwise it's used for a check
* if the mode is enabled
*/
union {
u64 zone_size;
u64 zoned;
};
/* Max size to emit ZONE_APPEND write command */
u64 max_zone_append_size;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
@ -1020,7 +1034,7 @@ enum {
BTRFS_ROOT_DEAD_RELOC_TREE,
/* Mark dead root stored on device whose cleanup needs to be resumed */
BTRFS_ROOT_DEAD_TREE,
/* The root has a log tree. Used only for subvolume roots. */
/* The root has a log tree. Used for subvolume roots and the tree root. */
BTRFS_ROOT_HAS_LOG_TREE,
/* Qgroup flushing is in progress */
BTRFS_ROOT_QGROUP_FLUSHING,
@ -1059,15 +1073,6 @@ struct btrfs_root {
spinlock_t accounting_lock;
struct btrfs_block_rsv *block_rsv;
/* free ino cache stuff */
struct btrfs_free_space_ctl *free_ino_ctl;
enum btrfs_caching_type ino_cache_state;
spinlock_t ino_cache_lock;
wait_queue_head_t ino_cache_wait;
struct btrfs_free_space_ctl *free_ino_pinned;
u64 ino_cache_progress;
struct inode *ino_cache_inode;
struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
@ -1226,6 +1231,63 @@ struct btrfs_replace_extent_info {
int insertions;
};
/* Arguments for btrfs_drop_extents() */
struct btrfs_drop_extents_args {
/* Input parameters */
/*
* If NULL, btrfs_drop_extents() will allocate and free its own path.
* If 'replace_extent' is true, this must not be NULL. Also the path
* is always released except if 'replace_extent' is true and
* btrfs_drop_extents() sets 'extent_inserted' to true, in which case
* the path is kept locked.
*/
struct btrfs_path *path;
/* Start offset of the range to drop extents from */
u64 start;
/* End (exclusive, last byte + 1) of the range to drop extents from */
u64 end;
/* If true drop all the extent maps in the range */
bool drop_cache;
/*
* If true it means we want to insert a new extent after dropping all
* the extents in the range. If this is true, the 'extent_item_size'
* parameter must be set as well and the 'extent_inserted' field will
* be set to true by btrfs_drop_extents() if it could insert the new
* extent.
* Note: when this is set to true the path must not be NULL.
*/
bool replace_extent;
/*
* Used if 'replace_extent' is true. Size of the file extent item to
* insert after dropping all existing extents in the range
*/
u32 extent_item_size;
/* Output parameters */
/*
* Set to the minimum between the input parameter 'end' and the end
* (exclusive, last byte + 1) of the last dropped extent. This is always
* set even if btrfs_drop_extents() returns an error.
*/
u64 drop_end;
/*
* The number of allocated bytes found in the range. This can be smaller
* than the range's length when there are holes in the range.
*/
u64 bytes_found;
/*
* Only set if 'replace_extent' is true. Set to true if we were able
* to insert a replacement extent after dropping all extents in the
* range, otherwise set to false by btrfs_drop_extents().
* Also, if btrfs_drop_extents() has set this to true it means it
* returned with the path locked, otherwise if it has set this to
* false it has returned with the path released.
*/
bool extent_inserted;
};
struct btrfs_file_private {
void *filldir_buf;
};
@ -1284,7 +1346,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
/* bit 17 is free */
#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18)
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
@ -1297,6 +1359,8 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
#define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29)
#define BTRFS_MOUNT_IGNOREBADROOTS (1 << 30)
#define BTRFS_MOUNT_IGNOREDATACSUMS (1 << 31)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (2048)
@ -1329,9 +1393,7 @@ do { \
* transaction commit)
*/
#define BTRFS_PENDING_SET_INODE_MAP_CACHE (0)
#define BTRFS_PENDING_CLEAR_INODE_MAP_CACHE (1)
#define BTRFS_PENDING_COMMIT (2)
#define BTRFS_PENDING_COMMIT (0)
#define btrfs_test_pending(info, opt) \
test_bit(BTRFS_PENDING_##opt, &(info)->pending_changes)
@ -1404,7 +1466,7 @@ struct btrfs_map_token {
};
#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
((bytes) >> (fs_info)->sb->s_blocksize_bits)
((bytes) >> (fs_info)->sectorsize_bits)
static inline void btrfs_init_map_token(struct btrfs_map_token *token,
struct extent_buffer *eb)
@ -1489,13 +1551,14 @@ static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
{ \
const type *p = page_address(eb->pages[0]); \
const type *p = page_address(eb->pages[0]) + \
offset_in_page(eb->start); \
return get_unaligned_le##bits(&p->member); \
} \
static inline void btrfs_set_##name(const struct extent_buffer *eb, \
u##bits val) \
{ \
type *p = page_address(eb->pages[0]); \
type *p = page_address(eb->pages[0]) + offset_in_page(eb->start); \
put_unaligned_le##bits(val, &p->member); \
}
@ -2085,6 +2148,7 @@ BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item,
generation, 64);
BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
BTRFS_SETGET_STACK_FUNCS(root_drop_level, struct btrfs_root_item, drop_level, 8);
BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
@ -2517,7 +2581,17 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
enum btrfs_inline_ref_type is_data);
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes);
/*
* Take the number of bytes to be checksummmed and figure out how many leaves
* it would require to store the csums for that many bytes.
*/
static inline u64 btrfs_csum_bytes_to_leaves(
const struct btrfs_fs_info *fs_info, u64 csum_bytes)
{
const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;
return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf);
}
/*
* Use this if we would be adding new items, as we could split nodes as we cow
@ -2592,7 +2666,6 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len, int delalloc);
int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
u64 len);
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_ref *generic_ref);
@ -2939,8 +3012,7 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 len);
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u64 offset, u8 *dst);
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
@ -2967,13 +3039,13 @@ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len);
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len);
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size);
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size);
u64 btrfs_file_extent_end(const struct btrfs_path *path);
/* inode.c */
blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u64 phy_offset,
int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
struct page *page, u64 start, u64 end, int mirror);
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
u64 start, u64 len);
@ -2993,11 +3065,11 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const char *name, int name_len, int add_backref, u64 index);
int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
int front);
int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
int front);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode, u64 new_size,
struct btrfs_inode *inode, u64 new_size,
u32 min_type);
int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
@ -3037,14 +3109,13 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset,
u64 start, u64 end);
int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
struct btrfs_root *root, struct btrfs_inode *inode);
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
struct btrfs_root *root, struct btrfs_inode *inode);
int btrfs_orphan_add(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
int btrfs_orphan_cleanup(struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info);
int btrfs_wait_on_delayed_iputs(struct btrfs_fs_info *fs_info);
@ -3062,7 +3133,18 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
u64 end, int uptodate);
extern const struct dentry_operations btrfs_dentry_operations;
ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern const struct iomap_ops btrfs_dio_iomap_ops;
extern const struct iomap_dio_ops btrfs_dio_ops;
/* Inode locking type flags, by default the exclusive lock is taken */
#define BTRFS_ILOCK_SHARED (1U << 0)
#define BTRFS_ILOCK_TRY (1U << 1)
int btrfs_inode_lock(struct inode *inode, unsigned int ilock_flags);
void btrfs_inode_unlock(struct inode *inode, unsigned int ilock_flags);
void btrfs_update_inode_bytes(struct btrfs_inode *inode,
const u64 add_bytes,
const u64 del_bytes);
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
@ -3092,16 +3174,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
int skip_pinned);
extern const struct file_operations btrfs_file_operations;
int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_path *path, u64 start, u64 end,
u64 *drop_end, int drop_cache,
int replace_extent,
u32 extent_item_size,
int *key_inserted);
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, u64 start,
u64 end, int drop_cache);
struct btrfs_root *root, struct btrfs_inode *inode,
struct btrfs_drop_extents_args *args);
int btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path,
const u64 start, const u64 end,
struct btrfs_replace_extent_info *extent_info,
@ -3111,7 +3186,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
int btrfs_release_file(struct inode *inode, struct file *file);
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached);
struct extent_state **cached, bool noreserve);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes);
@ -3290,6 +3365,39 @@ static inline void assertfail(const char *expr, const char* file, int line) { }
#define ASSERT(expr) (void)(expr)
#endif
/*
* Get the correct offset inside the page of extent buffer.
*
* @eb: target extent buffer
* @start: offset inside the extent buffer
*
* Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases.
*/
static inline size_t get_eb_offset_in_page(const struct extent_buffer *eb,
unsigned long offset)
{
/*
* For sectorsize == PAGE_SIZE case, eb->start will always be aligned
* to PAGE_SIZE, thus adding it won't cause any difference.
*
* For sectorsize < PAGE_SIZE, we must only read the data that belongs
* to the eb, thus we have to take the eb->start into consideration.
*/
return offset_in_page(offset + eb->start);
}
static inline unsigned long get_eb_page_index(unsigned long offset)
{
/*
* For sectorsize == PAGE_SIZE case, plain >> PAGE_SHIFT is enough.
*
* For sectorsize < PAGE_SIZE case, we only support 64K PAGE_SIZE,
* and have ensured that all tree blocks are contained in one page,
* thus we always get index == 0.
*/
return offset >> PAGE_SHIFT;
}
/*
* Use that for functions that are conditionally exported for sanity tests but
* otherwise static
@ -3599,4 +3707,9 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
}
#endif
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
{
return fs_info->zoned != 0;
}
#endif

View File

@ -740,13 +740,6 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
goto out;
}
/*
* we need allocate some memory space, but it might cause the task
* to sleep, so we set all locked nodes in the path to blocking locks
* first.
*/
btrfs_set_path_blocking(path);
keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
if (!keys) {
ret = -ENOMEM;
@ -1154,7 +1147,6 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &fs_info->delayed_block_rsv;
@ -1219,7 +1211,6 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
btrfs_release_delayed_node(delayed_node);
return -ENOMEM;
}
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
@ -1264,7 +1255,6 @@ int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
ret = -ENOMEM;
goto trans_out;
}
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
trans->block_rsv = &fs_info->delayed_block_rsv;
@ -1333,7 +1323,6 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
if (!delayed_node)
break;
path->leave_spinning = 1;
root = delayed_node->root;
trans = btrfs_join_transaction(root);
@ -1826,27 +1815,29 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
}
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode)
struct btrfs_root *root,
struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node;
int ret = 0;
delayed_node = btrfs_get_or_create_delayed_node(BTRFS_I(inode));
delayed_node = btrfs_get_or_create_delayed_node(inode);
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
mutex_lock(&delayed_node->mutex);
if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
fill_stack_inode_item(trans, &delayed_node->inode_item,
&inode->vfs_inode);
goto release_node;
}
ret = btrfs_delayed_inode_reserve_metadata(trans, root, BTRFS_I(inode),
ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
delayed_node);
if (ret)
goto release_node;
fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
fill_stack_inode_item(trans, &delayed_node->inode_item, &inode->vfs_inode);
set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
delayed_node->count++;
atomic_inc(&root->fs_info->delayed_root->items);

View File

@ -110,7 +110,8 @@ int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode);
int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
struct btrfs_root *root,
struct btrfs_inode *inode);
int btrfs_fill_inode(struct inode *inode, u32 *rdev);
int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode);

View File

@ -21,6 +21,7 @@
#include "rcu-string.h"
#include "dev-replace.h"
#include "sysfs.h"
#include "zoned.h"
/*
* Device replace overview
@ -96,7 +97,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
* a replace target, fail the mount.
*/
if (btrfs_find_device(fs_info->fs_devices,
BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) {
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
btrfs_err(fs_info,
"found replace target device without a valid replace item");
ret = -EUCLEAN;
@ -159,7 +160,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
* replace target, fail the mount.
*/
if (btrfs_find_device(fs_info->fs_devices,
BTRFS_DEV_REPLACE_DEVID, NULL, NULL, false)) {
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
btrfs_err(fs_info,
"replace devid present without an active replace item");
ret = -EUCLEAN;
@ -171,10 +172,10 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices,
src_devid, NULL, NULL, true);
src_devid, NULL, NULL);
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices,
BTRFS_DEV_REPLACE_DEVID,
NULL, NULL, true);
NULL, NULL);
/*
* allow 'btrfs dev replace_cancel' if src/tgt device is
* missing
@ -259,6 +260,13 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
return PTR_ERR(bdev);
}
if (!btrfs_check_device_zone_type(fs_info, bdev)) {
btrfs_err(fs_info,
"dev-replace: zoned type of target device mismatch with filesystem");
ret = -EINVAL;
goto error;
}
sync_blockdev(bdev);
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
@ -313,6 +321,10 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
device->fs_devices = fs_info->fs_devices;
ret = btrfs_get_dev_zone_info(device);
if (ret)
goto error;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
list_add(&device->dev_list, &fs_info->fs_devices->devices);
fs_info->fs_devices->num_devices++;

View File

@ -127,7 +127,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
btrfs_cpu_key_to_disk(&disk_key, location);

View File

@ -355,7 +355,7 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
block_group = find_next_block_group(discard_ctl, now);
if (block_group) {
unsigned long delay = discard_ctl->delay;
u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
/*
@ -366,9 +366,9 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
if (kbps_limit && discard_ctl->prev_discard) {
u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
u64 bps_delay = div64_u64(discard_ctl->prev_discard *
MSEC_PER_SEC, bps_limit);
NSEC_PER_SEC, bps_limit);
delay = max(delay, msecs_to_jiffies(bps_delay));
delay = max(delay, bps_delay);
}
/*
@ -378,11 +378,20 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
if (now < block_group->discard_eligible_time) {
u64 bg_timeout = block_group->discard_eligible_time - now;
delay = max(delay, nsecs_to_jiffies(bg_timeout));
delay = max(delay, bg_timeout);
}
if (override && discard_ctl->prev_discard) {
u64 elapsed = now - discard_ctl->prev_discard_time;
if (delay > elapsed)
delay -= elapsed;
else
delay = 0;
}
mod_delayed_work(discard_ctl->discard_workers,
&discard_ctl->work, delay);
&discard_ctl->work, nsecs_to_jiffies(delay));
}
out:
spin_unlock(&discard_ctl->lock);
@ -465,7 +474,12 @@ static void btrfs_discard_workfn(struct work_struct *work)
discard_ctl->discard_extent_bytes += trimmed;
}
/*
* Updated without locks as this is inside the workfn and nothing else
* is reading the values
*/
discard_ctl->prev_discard = trimmed;
discard_ctl->prev_discard_time = ktime_get_ns();
/* Determine next steps for a block_group */
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
@ -519,7 +533,6 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
s64 discardable_bytes;
u32 iops_limit;
unsigned long delay;
unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC;
discardable_extents = atomic_read(&discard_ctl->discardable_extents);
if (!discardable_extents)
@ -550,12 +563,13 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
iops_limit = READ_ONCE(discard_ctl->iops_limit);
if (iops_limit)
lower_limit = max_t(unsigned long, lower_limit,
MSEC_PER_SEC / iops_limit);
delay = MSEC_PER_SEC / iops_limit;
else
delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC);
discard_ctl->delay = msecs_to_jiffies(delay);
delay = clamp(delay, BTRFS_DISCARD_MIN_DELAY_MSEC,
BTRFS_DISCARD_MAX_DELAY_MSEC);
discard_ctl->delay_ms = delay;
spin_unlock(&discard_ctl->lock);
}
@ -563,15 +577,14 @@ void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
/**
* btrfs_discard_update_discardable - propagate discard counters
* @block_group: block_group of interest
* @ctl: free_space_ctl of @block_group
*
* This propagates deltas of counters up to the discard_ctl. It maintains a
* current counter and a previous counter passing the delta up to the global
* stat. Then the current counter value becomes the previous counter value.
*/
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl)
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
{
struct btrfs_free_space_ctl *ctl;
struct btrfs_discard_ctl *discard_ctl;
s32 extents_delta;
s64 bytes_delta;
@ -581,8 +594,10 @@ void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
!btrfs_is_block_group_data_only(block_group))
return;
ctl = block_group->free_space_ctl;
discard_ctl = &block_group->fs_info->discard_ctl;
lockdep_assert_held(&ctl->tree_lock);
extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
ctl->discardable_extents[BTRFS_STAT_PREV];
if (extents_delta) {
@ -684,10 +699,11 @@ void btrfs_discard_init(struct btrfs_fs_info *fs_info)
INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
discard_ctl->prev_discard = 0;
discard_ctl->prev_discard_time = 0;
atomic_set(&discard_ctl->discardable_extents, 0);
atomic64_set(&discard_ctl->discardable_bytes, 0);
discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC;
discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
discard_ctl->kbps_limit = 0;
discard_ctl->discard_extent_bytes = 0;

View File

@ -28,8 +28,7 @@ bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl);
/* Update operations */
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl);
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl);
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group);
/* Setup/cleanup operations */
void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info);

File diff suppressed because it is too large Load Diff

View File

@ -43,13 +43,15 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 parent_transid, int level,
struct btrfs_key *first_key);
void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr);
u64 owner_root, u64 parent_transid,
int level, struct btrfs_key *first_key);
struct extent_buffer *btrfs_find_create_tree_block(
struct btrfs_fs_info *fs_info,
u64 bytenr);
u64 bytenr, u64 owner_root,
int level);
void btrfs_clean_tree_block(struct extent_buffer *buf);
void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info);
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
int __cold open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
@ -79,7 +81,7 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, u64 phy_offset,
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
struct page *page, u64 start, u64 end,
int mirror);
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
@ -112,10 +114,10 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
struct btrfs_key *first_key);
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
enum btrfs_wq_endio_type metadata);
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 bio_offset, void *private_data,
extent_submit_bio_start_t *submit_bio_start);
blk_status_t btrfs_wq_submit_bio(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags,
u64 dio_file_offset,
extent_submit_bio_start_t *submit_bio_start);
blk_status_t btrfs_submit_bio_done(void *private_data, struct bio *bio,
int mirror_num);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
@ -131,16 +133,15 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
int __init btrfs_end_io_wq_init(void);
void __cold btrfs_end_io_wq_exit(void);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);
void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level);
#else
static inline void btrfs_init_lockdep(void)
{ }
static inline void btrfs_set_buffer_lockdep_class(u64 objectid,
struct extent_buffer *eb, int level)
{

View File

@ -222,7 +222,6 @@ static int btrfs_get_name(struct dentry *parent, char *name,
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
if (ino == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = BTRFS_I(inode)->root->root_key.objectid;

View File

@ -21,10 +21,24 @@ struct io_failure_record;
#define EXTENT_NORESERVE (1U << 11)
#define EXTENT_QGROUP_RESERVED (1U << 12)
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
/*
* Must be cleared only during ordered extent completion or on error paths if we
* did not manage to submit bios and create the ordered extents for the range.
* Should not be cleared during page release and page invalidation (if there is
* an ordered extent in flight), that is left for the ordered extent completion.
*/
#define EXTENT_DELALLOC_NEW (1U << 14)
/*
* When an ordered extent successfully completes for a region marked as a new
* delalloc range, use this flag when clearing a new delalloc range to indicate
* that the VFS' inode number of bytes should be incremented and the inode's new
* delalloc bytes decremented, in an atomic way to prevent races with stat(2).
*/
#define EXTENT_ADD_INODE_BYTES (1U << 15)
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
EXTENT_CLEAR_DATA_RESV)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | \
EXTENT_ADD_INODE_BYTES)
/*
* Redefined bits above which are used only in the device allocation tree,
@ -73,7 +87,7 @@ struct extent_state {
/* ADD NEW ELEMENTS AFTER THIS */
wait_queue_head_t wq;
refcount_t refs;
unsigned state;
u32 state;
struct io_failure_record *failrec;
@ -105,19 +119,18 @@ void __cold extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
u64 max_bytes, unsigned bits, int contig);
u64 max_bytes, u32 bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int filled,
struct extent_state *cached_state);
u32 bits, int filled, struct extent_state *cached_state);
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, struct extent_changeset *changeset);
u32 bits, struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
u32 bits, int wake, int delete,
struct extent_state **cached);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
u32 bits, int wake, int delete,
struct extent_state **cached, gfp_t mask,
struct extent_changeset *changeset);
@ -141,7 +154,7 @@ static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, unsigned bits)
u64 end, u32 bits)
{
int wake = 0;
@ -152,17 +165,19 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
}
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, struct extent_changeset *changeset);
u32 bits, struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
u32 bits, unsigned exclusive_bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask,
struct extent_changeset *changeset);
int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits);
u32 bits);
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, unsigned bits)
u64 end, u32 bits)
{
return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS);
return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
NULL);
}
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@ -175,8 +190,8 @@ static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
NULL, mask);
return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, NULL,
mask, NULL);
}
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
@ -188,16 +203,16 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
}
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, unsigned clear_bits,
u32 bits, u32 clear_bits,
struct extent_state **cached_state);
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
u64 end, unsigned int extra_bits,
u64 end, u32 extra_bits,
struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
NULL, cached_state, GFP_NOFS);
0, NULL, cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
@ -205,30 +220,30 @@ static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
NULL, cached_state, GFP_NOFS);
0, NULL, cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
u64 end)
{
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL,
GFP_NOFS);
return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, NULL,
GFP_NOFS, NULL);
}
static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
cached_state, mask);
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
cached_state, mask, NULL);
}
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits,
u64 *start_ret, u64 *end_ret, u32 bits,
struct extent_state **cached_state);
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits);
u64 *start_ret, u64 *end_ret, u32 bits);
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits);
u64 *start_ret, u64 *end_ret, u32 bits);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,

View File

@ -1465,7 +1465,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
/* this will setup the path even if it fails to insert the back ref */
ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
parent, root_objectid, owner,
@ -1489,7 +1488,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
path->leave_spinning = 1;
/* now insert the actual backref */
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
BUG_ON(refs_to_add != 1);
@ -1605,7 +1603,6 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
}
again:
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
if (ret < 0) {
err = ret;
@ -2132,25 +2129,6 @@ static u64 find_middle(struct rb_root *root)
}
#endif
/*
* Takes the number of bytes to be csumm'ed and figures out how many leaves it
* would require to store the csums for that many bytes.
*/
u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes)
{
u64 csum_size;
u64 num_csums_per_leaf;
u64 num_csums;
csum_size = BTRFS_MAX_ITEM_SIZE(fs_info);
num_csums_per_leaf = div64_u64(csum_size,
(u64)btrfs_super_csum_size(fs_info->super_copy));
num_csums = div64_u64(csum_bytes, fs_info->sectorsize);
num_csums += num_csums_per_leaf - 1;
num_csums = div64_u64(num_csums, num_csums_per_leaf);
return num_csums;
}
/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
@ -2663,6 +2641,11 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
BUG_ON(!btrfs_block_group_done(block_group));
ret = btrfs_remove_free_space(block_group, start, num_bytes);
} else {
/*
* We must wait for v1 caching to finish, otherwise we may not
* remove our space.
*/
btrfs_wait_space_cache_v1_finished(block_group, caching_ctl);
mutex_lock(&caching_ctl->mutex);
if (start >= caching_ctl->progress) {
@ -2730,31 +2713,6 @@ btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
atomic_inc(&bg->reservations);
}
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
{
struct btrfs_caching_control *next;
struct btrfs_caching_control *caching_ctl;
struct btrfs_block_group *cache;
down_write(&fs_info->commit_root_sem);
list_for_each_entry_safe(caching_ctl, next,
&fs_info->caching_block_groups, list) {
cache = caching_ctl->block_group;
if (btrfs_block_group_done(cache)) {
cache->last_byte_to_unpin = (u64)-1;
list_del_init(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
} else {
cache->last_byte_to_unpin = caching_ctl->progress;
}
}
up_write(&fs_info->commit_root_sem);
btrfs_update_global_block_rsv(fs_info);
}
/*
* Returns the free cluster for the given space info and sets empty_cluster to
* what it should be based on the mount options.
@ -2816,11 +2774,13 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
len = cache->start + cache->length - start;
len = min(len, end + 1 - start);
if (start < cache->last_byte_to_unpin) {
len = min(len, cache->last_byte_to_unpin - start);
if (return_free_space)
btrfs_add_free_space(cache, start, len);
down_read(&fs_info->commit_root_sem);
if (start < cache->last_byte_to_unpin && return_free_space) {
u64 add_len = min(len, cache->last_byte_to_unpin - start);
btrfs_add_free_space(cache, start, add_len);
}
up_read(&fs_info->commit_root_sem);
start += len;
total_unpinned += len;
@ -3040,8 +3000,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
if (!is_data && refs_to_drop != 1) {
@ -3106,7 +3064,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
goto out;
}
btrfs_release_path(path);
path->leave_spinning = 1;
/* Slow path to locate EXTENT/METADATA_ITEM */
key.objectid = bytenr;
@ -4448,7 +4405,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
ins, size);
if (ret) {
@ -4533,7 +4489,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
&extent_key, size);
if (ret) {
@ -4662,7 +4617,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *buf;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
if (IS_ERR(buf))
return buf;
@ -4679,12 +4634,16 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return ERR_PTR(-EUCLEAN);
}
/*
* This needs to stay, because we could allocate a freed block from an
* old tree into a new tree, so we need to make sure this new block is
* set to the appropriate level and owner.
*/
btrfs_set_buffer_lockdep_class(owner, buf, level);
__btrfs_tree_lock(buf, nest);
btrfs_clean_tree_block(buf);
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
btrfs_set_lock_blocking_write(buf);
set_extent_buffer_uptodate(buf);
memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
@ -4905,7 +4864,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
continue;
}
reada:
readahead_tree_block(fs_info, bytenr);
btrfs_readahead_node_child(eb, slot);
nread++;
}
wc->reada_slot = slot;
@ -5064,16 +5023,13 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
next = find_extent_buffer(fs_info, bytenr);
if (!next) {
next = btrfs_find_create_tree_block(fs_info, bytenr);
next = btrfs_find_create_tree_block(fs_info, bytenr,
root->root_key.objectid, level - 1);
if (IS_ERR(next))
return PTR_ERR(next);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
level - 1);
reada = 1;
}
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
&wc->refs[level - 1],
@ -5124,8 +5080,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
if (!next) {
if (reada && level == 1)
reada_walk_down(trans, root, wc, path);
next = read_tree_block(fs_info, bytenr, generation, level - 1,
&first_key);
next = read_tree_block(fs_info, bytenr, root->root_key.objectid,
generation, level - 1, &first_key);
if (IS_ERR(next)) {
return PTR_ERR(next);
} else if (!extent_buffer_uptodate(next)) {
@ -5133,7 +5089,6 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
return -EIO;
}
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
}
level--;
@ -5145,7 +5100,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
}
path->nodes[level] = next;
path->slots[level] = 0;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->locks[level] = BTRFS_WRITE_LOCK;
wc->level = level;
if (wc->level == 1)
wc->reada_slot = 0;
@ -5273,8 +5228,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (!path->locks[level]) {
BUG_ON(level == 0);
btrfs_tree_lock(eb);
btrfs_set_lock_blocking_write(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->locks[level] = BTRFS_WRITE_LOCK;
ret = btrfs_lookup_extent_info(trans, fs_info,
eb->start, level, 1,
@ -5317,8 +5271,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (!path->locks[level] &&
btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb);
btrfs_set_lock_blocking_write(eb);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->locks[level] = BTRFS_WRITE_LOCK;
}
btrfs_clean_tree_block(eb);
}
@ -5486,9 +5439,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
level = btrfs_header_level(root->node);
path->nodes[level] = btrfs_lock_root_node(root);
btrfs_set_lock_blocking_write(path->nodes[level]);
path->slots[level] = 0;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->locks[level] = BTRFS_WRITE_LOCK;
memset(&wc->update_progress, 0,
sizeof(wc->update_progress));
} else {
@ -5496,7 +5448,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
memcpy(&wc->update_progress, &key,
sizeof(wc->update_progress));
level = root_item->drop_level;
level = btrfs_root_drop_level(root_item);
BUG_ON(level == 0);
path->lowest_level = level;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@ -5516,8 +5468,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
level = btrfs_header_level(root->node);
while (1) {
btrfs_tree_lock(path->nodes[level]);
btrfs_set_lock_blocking_write(path->nodes[level]);
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->locks[level] = BTRFS_WRITE_LOCK;
ret = btrfs_lookup_extent_info(trans, fs_info,
path->nodes[level]->start,
@ -5529,7 +5480,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
}
BUG_ON(wc->refs[level] == 0);
if (level == root_item->drop_level)
if (level == btrfs_root_drop_level(root_item))
break;
btrfs_tree_unlock(path->nodes[level]);
@ -5574,7 +5525,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
}
btrfs_cpu_key_to_disk(&root_item->drop_progress,
&wc->drop_progress);
root_item->drop_level = wc->drop_level;
btrfs_set_root_drop_level(root_item, wc->drop_level);
BUG_ON(wc->level == 0);
if (btrfs_should_end_transaction(trans) ||
@ -5704,7 +5655,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
level = btrfs_header_level(node);
path->nodes[level] = node;
path->slots[level] = 0;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->locks[level] = BTRFS_WRITE_LOCK;
wc->refs[parent_level] = 1;
wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;

File diff suppressed because it is too large Load Diff

View File

@ -6,6 +6,7 @@
#include <linux/rbtree.h>
#include <linux/refcount.h>
#include <linux/fiemap.h>
#include <linux/btrfs_tree.h>
#include "ulist.h"
/*
@ -71,11 +72,10 @@ typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
int mirror_num,
unsigned long bio_flags);
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
typedef blk_status_t (extent_submit_bio_start_t)(struct inode *inode,
struct bio *bio, u64 dio_file_offset);
#define INLINE_EXTENT_BUFFER_PAGES 16
#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
#define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE)
struct extent_buffer {
u64 start;
unsigned long len;
@ -87,31 +87,13 @@ struct extent_buffer {
int read_mirror;
struct rcu_head rcu_head;
pid_t lock_owner;
int blocking_writers;
atomic_t blocking_readers;
bool lock_recursed;
/* >= 0 if eb belongs to a log tree, -1 otherwise */
short log_index;
s8 log_index;
/* protects write locks */
rwlock_t lock;
struct rw_semaphore lock;
/* readers use lock_wq while they wait for the write
* lock holders to unlock
*/
wait_queue_head_t write_lock_wq;
/* writers use read_lock_wq while they wait for readers
* to unlock
*/
wait_queue_head_t read_lock_wq;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
#ifdef CONFIG_BTRFS_DEBUG
int spinning_writers;
atomic_t spinning_readers;
atomic_t read_locks;
int write_locks;
struct list_head leak_list;
#endif
};
@ -199,7 +181,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
u64 start, u64 owner_root, int level);
struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
@ -215,11 +197,20 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
int read_extent_buffer_pages(struct extent_buffer *eb, int wait,
int mirror_num);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 owner_root, u64 gen, int level);
void btrfs_readahead_node_child(struct extent_buffer *node, int slot);
static inline int num_extent_pages(const struct extent_buffer *eb)
{
return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) -
(eb->start >> PAGE_SHIFT);
/*
* For sectorsize == PAGE_SIZE case, since nodesize is always aligned to
* sectorsize, it's just eb->len >> PAGE_SHIFT.
*
* For sectorsize < PAGE_SIZE case, we could have nodesize < PAGE_SIZE,
* thus have to ensure we get at least one page.
*/
return (eb->len >> PAGE_SHIFT) ?: 1;
}
static inline int extent_buffer_uptodate(const struct extent_buffer *eb)
@ -270,8 +261,7 @@ void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page,
unsigned bits_to_clear,
unsigned long page_ops);
u32 bits_to_clear, unsigned long page_ops);
struct bio *btrfs_bio_alloc(u64 first_byte);
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct bio *bio);
@ -307,7 +297,7 @@ struct io_failure_record {
blk_status_t btrfs_submit_read_repair(struct inode *inode,
struct bio *failed_bio, u64 phy_offset,
struct bio *failed_bio, u32 bio_offset,
struct page *page, unsigned int pgoff,
u64 start, u64 end, int failed_mirror,
submit_bio_hook_t *submit_bio_hook);

View File

@ -38,27 +38,27 @@
* Finally new_i_size should only be set in the case of truncate where we're not
* ready to use i_size_read() as the limiter yet.
*/
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size)
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
u64 start, end, i_size;
int ret;
i_size = new_i_size ?: i_size_read(inode);
i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
BTRFS_I(inode)->disk_i_size = i_size;
inode->disk_i_size = i_size;
return;
}
spin_lock(&BTRFS_I(inode)->lock);
ret = find_contiguous_extent_bit(&BTRFS_I(inode)->file_extent_tree, 0,
&start, &end, EXTENT_DIRTY);
spin_lock(&inode->lock);
ret = find_contiguous_extent_bit(&inode->file_extent_tree, 0, &start,
&end, EXTENT_DIRTY);
if (!ret && start == 0)
i_size = min(i_size, end + 1);
else
i_size = 0;
BTRFS_I(inode)->disk_i_size = i_size;
spin_unlock(&BTRFS_I(inode)->lock);
inode->disk_i_size = i_size;
spin_unlock(&inode->lock);
}
/**
@ -142,7 +142,6 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
file_key.offset = pos;
file_key.type = BTRFS_EXTENT_DATA_KEY;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
sizeof(*item));
if (ret < 0)
@ -181,7 +180,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans,
struct btrfs_csum_item *item;
struct extent_buffer *leaf;
u64 csum_offset = 0;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
const u32 csum_size = fs_info->csum_size;
int csums_in_item;
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@ -201,7 +200,7 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans,
goto fail;
csum_offset = (bytenr - found_key.offset) >>
fs_info->sb->s_blocksize_bits;
fs_info->sectorsize_bits;
csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
csums_in_item /= csum_size;
@ -239,12 +238,117 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
return ret;
}
/*
* Find checksums for logical bytenr range [disk_bytenr, disk_bytenr + len) and
* estore the result to @dst.
*
* Return >0 for the number of sectors we found.
* Return 0 for the range [disk_bytenr, disk_bytenr + sectorsize) has no csum
* for it. Caller may want to try next sector until one range is hit.
* Return <0 for fatal error.
*/
static int search_csum_tree(struct btrfs_fs_info *fs_info,
struct btrfs_path *path, u64 disk_bytenr,
u64 len, u8 *dst)
{
struct btrfs_csum_item *item = NULL;
struct btrfs_key key;
const u32 sectorsize = fs_info->sectorsize;
const u32 csum_size = fs_info->csum_size;
u32 itemsize;
int ret;
u64 csum_start;
u64 csum_len;
ASSERT(IS_ALIGNED(disk_bytenr, sectorsize) &&
IS_ALIGNED(len, sectorsize));
/* Check if the current csum item covers disk_bytenr */
if (path->nodes[0]) {
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_csum_item);
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
csum_start = key.offset;
csum_len = (itemsize / csum_size) * sectorsize;
if (in_range(disk_bytenr, csum_start, csum_len))
goto found;
}
/* Current item doesn't contain the desired range, search again */
btrfs_release_path(path);
item = btrfs_lookup_csum(NULL, fs_info->csum_root, path, disk_bytenr, 0);
if (IS_ERR(item)) {
ret = PTR_ERR(item);
goto out;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
itemsize = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
csum_start = key.offset;
csum_len = (itemsize / csum_size) * sectorsize;
ASSERT(in_range(disk_bytenr, csum_start, csum_len));
found:
ret = (min(csum_start + csum_len, disk_bytenr + len) -
disk_bytenr) >> fs_info->sectorsize_bits;
read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
ret * csum_size);
out:
if (ret == -ENOENT)
ret = 0;
return ret;
}
/*
* Locate the file_offset of @cur_disk_bytenr of a @bio.
*
* Bio of btrfs represents read range of
* [bi_sector << 9, bi_sector << 9 + bi_size).
* Knowing this, we can iterate through each bvec to locate the page belong to
* @cur_disk_bytenr and get the file offset.
*
* @inode is used to determine if the bvec page really belongs to @inode.
*
* Return 0 if we can't find the file offset
* Return >0 if we find the file offset and restore it to @file_offset_ret
*/
static int search_file_offset_in_bio(struct bio *bio, struct inode *inode,
u64 disk_bytenr, u64 *file_offset_ret)
{
struct bvec_iter iter;
struct bio_vec bvec;
u64 cur = bio->bi_iter.bi_sector << SECTOR_SHIFT;
int ret = 0;
bio_for_each_segment(bvec, bio, iter) {
struct page *page = bvec.bv_page;
if (cur > disk_bytenr)
break;
if (cur + bvec.bv_len <= disk_bytenr) {
cur += bvec.bv_len;
continue;
}
ASSERT(in_range(disk_bytenr, cur, bvec.bv_len));
if (page->mapping && page->mapping->host &&
page->mapping->host == inode) {
ret = 1;
*file_offset_ret = page_offset(page) + bvec.bv_offset +
disk_bytenr - cur;
break;
}
}
return ret;
}
/**
* btrfs_lookup_bio_sums - Look up checksums for a bio.
* Lookup the checksum for the read bio in csum tree.
*
* @inode: inode that the bio is for.
* @bio: bio to look up.
* @offset: Unless (u64)-1, look up checksums for this offset in the file.
* If (u64)-1, use the page offsets from the bio instead.
* @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
* checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
* NULL, the checksum buffer is allocated and returned in
@ -252,31 +356,40 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
*
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
*/
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u64 offset, u8 *dst)
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct bio_vec bvec;
struct bvec_iter iter;
struct btrfs_csum_item *item = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_path *path;
const bool page_offsets = (offset == (u64)-1);
const u32 sectorsize = fs_info->sectorsize;
const u32 csum_size = fs_info->csum_size;
u32 orig_len = bio->bi_iter.bi_size;
u64 orig_disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 cur_disk_bytenr;
u8 *csum;
u64 item_start_offset = 0;
u64 item_last_offset = 0;
u64 disk_bytenr;
u64 page_bytes_left;
u32 diff;
int nblocks;
const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
int count = 0;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
if (!fs_info->csum_root || (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
return BLK_STS_OK;
/*
* This function is only called for read bio.
*
* This means two things:
* - All our csums should only be in csum tree
* No ordered extents csums, as ordered extents are only for write
* path.
* - No need to bother any other info from bvec
* Since we're looking up csums, the only important info is the
* disk_bytenr and the length, which can be extracted from bi_iter
* directly.
*/
ASSERT(bio_op(bio) == REQ_OP_READ);
path = btrfs_alloc_path();
if (!path)
return BLK_STS_RESOURCE;
nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
if (!dst) {
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
@ -295,7 +408,11 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
csum = dst;
}
if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
/*
* If requested number of sectors is larger than one leaf can contain,
* kick the readahead for csum tree.
*/
if (nblocks > fs_info->csums_per_leaf)
path->reada = READA_FORWARD;
/*
@ -309,85 +426,62 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
path->skip_locking = 1;
}
disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
for (cur_disk_bytenr = orig_disk_bytenr;
cur_disk_bytenr < orig_disk_bytenr + orig_len;
cur_disk_bytenr += (count * sectorsize)) {
u64 search_len = orig_disk_bytenr + orig_len - cur_disk_bytenr;
unsigned int sector_offset;
u8 *csum_dst;
bio_for_each_segment(bvec, bio, iter) {
page_bytes_left = bvec.bv_len;
if (count)
goto next;
if (page_offsets)
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
count = btrfs_find_ordered_sum(BTRFS_I(inode), offset,
disk_bytenr, csum, nblocks);
if (count)
goto found;
if (!item || disk_bytenr < item_start_offset ||
disk_bytenr >= item_last_offset) {
struct btrfs_key found_key;
u32 item_size;
if (item)
btrfs_release_path(path);
item = btrfs_lookup_csum(NULL, fs_info->csum_root,
path, disk_bytenr, 0);
if (IS_ERR(item)) {
count = 1;
memset(csum, 0, csum_size);
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_extent_bits(io_tree, offset,
offset + fs_info->sectorsize - 1,
EXTENT_NODATASUM);
} else {
btrfs_info_rl(fs_info,
"no csum found for inode %llu start %llu",
btrfs_ino(BTRFS_I(inode)), offset);
}
item = NULL;
btrfs_release_path(path);
goto found;
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
item_start_offset = found_key.offset;
item_size = btrfs_item_size_nr(path->nodes[0],
path->slots[0]);
item_last_offset = item_start_offset +
(item_size / csum_size) *
fs_info->sectorsize;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_csum_item);
}
/*
* this byte range must be able to fit inside
* a single leaf so it will also fit inside a u32
* Although both cur_disk_bytenr and orig_disk_bytenr is u64,
* we're calculating the offset to the bio start.
*
* Bio size is limited to UINT_MAX, thus unsigned int is large
* enough to contain the raw result, not to mention the right
* shifted result.
*/
diff = disk_bytenr - item_start_offset;
diff = diff / fs_info->sectorsize;
diff = diff * csum_size;
count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >>
inode->i_sb->s_blocksize_bits);
read_extent_buffer(path->nodes[0], csum,
((unsigned long)item) + diff,
csum_size * count);
found:
csum += count * csum_size;
nblocks -= count;
next:
while (count > 0) {
count--;
disk_bytenr += fs_info->sectorsize;
offset += fs_info->sectorsize;
page_bytes_left -= fs_info->sectorsize;
if (!page_bytes_left)
break; /* move to next bio */
ASSERT(cur_disk_bytenr - orig_disk_bytenr < UINT_MAX);
sector_offset = (cur_disk_bytenr - orig_disk_bytenr) >>
fs_info->sectorsize_bits;
csum_dst = csum + sector_offset * csum_size;
count = search_csum_tree(fs_info, path, cur_disk_bytenr,
search_len, csum_dst);
if (count <= 0) {
/*
* Either we hit a critical error or we didn't find
* the csum.
* Either way, we put zero into the csums dst, and skip
* to the next sector.
*/
memset(csum_dst, 0, csum_size);
count = 1;
/*
* For data reloc inode, we need to mark the range
* NODATASUM so that balance won't report false csum
* error.
*/
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset;
int ret;
ret = search_file_offset_in_bio(bio, inode,
cur_disk_bytenr, &file_offset);
if (ret)
set_extent_bits(io_tree, file_offset,
file_offset + sectorsize - 1,
EXTENT_NODATASUM);
} else {
btrfs_warn_rl(fs_info,
"csum hole found for disk bytenr range [%llu, %llu)",
cur_disk_bytenr, cur_disk_bytenr + sectorsize);
}
}
}
WARN_ON_ONCE(count);
btrfs_free_path(path);
return BLK_STS_OK;
}
@ -406,7 +500,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
int ret;
size_t size;
u64 csum_end;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
const u32 csum_size = fs_info->csum_size;
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(end + 1, fs_info->sectorsize));
@ -433,8 +527,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
key.type == BTRFS_EXTENT_CSUM_KEY) {
offset = (start - key.offset) >>
fs_info->sb->s_blocksize_bits;
offset = (start - key.offset) >> fs_info->sectorsize_bits;
if (offset * csum_size <
btrfs_item_size_nr(leaf, path->slots[0] - 1))
path->slots[0]--;
@ -484,10 +577,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
sums->bytenr = start;
sums->len = (int)size;
offset = (start - key.offset) >>
fs_info->sb->s_blocksize_bits;
offset = (start - key.offset) >> fs_info->sectorsize_bits;
offset *= csum_size;
size >>= fs_info->sb->s_blocksize_bits;
size >>= fs_info->sectorsize_bits;
read_extent_buffer(path->nodes[0],
sums->sums,
@ -539,7 +631,6 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
int i;
u64 offset;
unsigned nofs_flag;
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
nofs_flag = memalloc_nofs_save();
sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
@ -557,7 +648,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
else
offset = 0; /* shut up gcc */
sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
sums->bytenr = bio->bi_iter.bi_sector << 9;
index = 0;
shash->tfm = fs_info->csum_shash;
@ -596,7 +687,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
ordered = btrfs_lookup_ordered_extent(inode,
offset);
ASSERT(ordered); /* Logic error */
sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
sums->bytenr = (bio->bi_iter.bi_sector << 9)
+ total_bytes;
index = 0;
}
@ -607,7 +698,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
fs_info->sectorsize,
sums->sums + index);
kunmap_atomic(data);
index += csum_size;
index += fs_info->csum_size;
offset += fs_info->sectorsize;
this_sum_bytes += fs_info->sectorsize;
total_bytes += fs_info->sectorsize;
@ -637,14 +728,14 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 len)
{
struct extent_buffer *leaf;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
const u32 csum_size = fs_info->csum_size;
u64 csum_end;
u64 end_byte = bytenr + len;
u32 blocksize_bits = fs_info->sb->s_blocksize_bits;
u32 blocksize_bits = fs_info->sectorsize_bits;
leaf = path->nodes[0];
csum_end = btrfs_item_size_nr(leaf, path->slots[0]) / csum_size;
csum_end <<= fs_info->sb->s_blocksize_bits;
csum_end <<= blocksize_bits;
csum_end += key->offset;
if (key->offset < bytenr && csum_end <= end_byte) {
@ -691,8 +782,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
u64 csum_end;
struct extent_buffer *leaf;
int ret;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int blocksize_bits = fs_info->sb->s_blocksize_bits;
const u32 csum_size = fs_info->csum_size;
u32 blocksize_bits = fs_info->sectorsize_bits;
ASSERT(root == fs_info->csum_root ||
root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
@ -706,7 +797,6 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
key.offset = end_byte - 1;
key.type = BTRFS_EXTENT_CSUM_KEY;
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
if (path->slots[0] == 0)
@ -846,7 +936,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
int index = 0;
int found_next;
int ret;
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
const u32 csum_size = fs_info->csum_size;
path = btrfs_alloc_path();
if (!path)
@ -921,7 +1011,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
if (btrfs_leaf_free_space(leaf) >= csum_size) {
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
csum_offset = (bytenr - found_key.offset) >>
fs_info->sb->s_blocksize_bits;
fs_info->sectorsize_bits;
goto extend_csum;
}
@ -939,8 +1029,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
csum_offset = (bytenr - found_key.offset) >>
fs_info->sb->s_blocksize_bits;
csum_offset = (bytenr - found_key.offset) >> fs_info->sectorsize_bits;
if (found_key.type != BTRFS_EXTENT_CSUM_KEY ||
found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
@ -956,7 +1045,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
u32 diff;
tmp = sums->len - total_bytes;
tmp >>= fs_info->sb->s_blocksize_bits;
tmp >>= fs_info->sectorsize_bits;
WARN_ON(tmp < 1);
extend_nr = max_t(int, 1, (int)tmp);
@ -981,9 +1070,9 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
u64 tmp;
tmp = sums->len - total_bytes;
tmp >>= fs_info->sb->s_blocksize_bits;
tmp >>= fs_info->sectorsize_bits;
tmp = min(tmp, (next_offset - file_key.offset) >>
fs_info->sb->s_blocksize_bits);
fs_info->sectorsize_bits);
tmp = max_t(u64, 1, tmp);
tmp = min_t(u64, tmp, MAX_CSUM_ITEMS(fs_info, csum_size));
@ -991,10 +1080,8 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
} else {
ins_size = csum_size;
}
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &file_key,
ins_size);
path->leave_spinning = 0;
if (ret < 0)
goto out;
if (WARN_ON(ret != 0))
@ -1007,8 +1094,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
item = (struct btrfs_csum_item *)((unsigned char *)item +
csum_offset * csum_size);
found:
ins_size = (u32)(sums->len - total_bytes) >>
fs_info->sb->s_blocksize_bits;
ins_size = (u32)(sums->len - total_bytes) >> fs_info->sectorsize_bits;
ins_size *= csum_size;
ins_size = min_t(u32, (unsigned long)item_end - (unsigned long)item,
ins_size);

File diff suppressed because it is too large Load Diff

View File

@ -16,7 +16,6 @@
#include "transaction.h"
#include "disk-io.h"
#include "extent_io.h"
#include "inode-map.h"
#include "volumes.h"
#include "space-info.h"
#include "delalloc-space.h"
@ -33,16 +32,18 @@ struct btrfs_trim_range {
struct list_head list;
};
static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info);
static int link_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
static int btrfs_wait_cache_io_root(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_io_ctl *io_ctl,
struct btrfs_path *path);
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info, u64 *offset,
u64 *bytes, bool for_alloc);
static void free_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info);
static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, u64 offset,
u64 bytes);
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_path *path,
@ -141,17 +142,15 @@ static int __create_free_space_inode(struct btrfs_root *root,
struct btrfs_free_space_header *header;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
/* We inline CRCs for the free disk space cache */
const u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC |
BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
int ret;
ret = btrfs_insert_empty_inode(trans, root, path, ino);
if (ret)
return ret;
/* We inline crc's for the free disk space cache */
if (ino != BTRFS_FREE_INO_OBJECTID)
flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
@ -207,6 +206,65 @@ int create_free_space_inode(struct btrfs_trans_handle *trans,
ino, block_group->start);
}
/*
* inode is an optional sink: if it is NULL, btrfs_remove_free_space_inode
* handles lookup, otherwise it takes ownership and iputs the inode.
* Don't reuse an inode pointer after passing it into this function.
*/
int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_block_group *block_group)
{
struct btrfs_path *path;
struct btrfs_key key;
int ret = 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
if (!inode)
inode = lookup_free_space_inode(block_group, path);
if (IS_ERR(inode)) {
if (PTR_ERR(inode) != -ENOENT)
ret = PTR_ERR(inode);
goto out;
}
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret) {
btrfs_add_delayed_iput(inode);
goto out;
}
clear_nlink(inode);
/* One for the block groups ref */
spin_lock(&block_group->lock);
if (block_group->iref) {
block_group->iref = 0;
block_group->inode = NULL;
spin_unlock(&block_group->lock);
iput(inode);
} else {
spin_unlock(&block_group->lock);
}
/* One for the lookup ref */
btrfs_add_delayed_iput(inode);
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.type = 0;
key.offset = block_group->start;
ret = btrfs_search_slot(trans, trans->fs_info->tree_root, &key, path,
-1, 1);
if (ret) {
if (ret > 0)
ret = 0;
goto out;
}
ret = btrfs_del_item(trans, trans->fs_info->tree_root, path);
out:
btrfs_free_path(path);
return ret;
}
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv)
{
@ -267,12 +325,12 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
* We skip the throttling logic for free space cache inodes, so we don't
* need to check for -EAGAIN.
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
0, BTRFS_EXTENT_DATA_KEY);
if (ret)
goto fail;
ret = btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
fail:
if (locked)
@ -304,16 +362,11 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
int write)
{
int num_pages;
int check_crcs = 0;
num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID)
check_crcs = 1;
/* Make sure we can fit our crcs and generation into the first page */
if (write && check_crcs &&
(num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
if (write && (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE)
return -ENOSPC;
memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
@ -324,7 +377,6 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
io_ctl->num_pages = num_pages;
io_ctl->fs_info = btrfs_sb(inode->i_sb);
io_ctl->check_crcs = check_crcs;
io_ctl->inode = inode;
return 0;
@ -419,13 +471,8 @@ static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
* Skip the csum areas. If we don't check crcs then we just have a
* 64bit chunk at the front of the first page.
*/
if (io_ctl->check_crcs) {
io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
} else {
io_ctl->cur += sizeof(u64);
io_ctl->size -= sizeof(u64) * 2;
}
io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
put_unaligned_le64(generation, io_ctl->cur);
io_ctl->cur += sizeof(u64);
@ -439,14 +486,8 @@ static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
* Skip the crc area. If we don't check crcs then we just have a 64bit
* chunk at the front of the first page.
*/
if (io_ctl->check_crcs) {
io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
io_ctl->size -= sizeof(u64) +
(sizeof(u32) * io_ctl->num_pages);
} else {
io_ctl->cur += sizeof(u64);
io_ctl->size -= sizeof(u64) * 2;
}
io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
cache_gen = get_unaligned_le64(io_ctl->cur);
if (cache_gen != generation) {
@ -466,11 +507,6 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
u32 crc = ~(u32)0;
unsigned offset = 0;
if (!io_ctl->check_crcs) {
io_ctl_unmap_page(io_ctl);
return;
}
if (index == 0)
offset = sizeof(u32) * io_ctl->num_pages;
@ -488,11 +524,6 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
u32 crc = ~(u32)0;
unsigned offset = 0;
if (!io_ctl->check_crcs) {
io_ctl_map_page(io_ctl, 0);
return 0;
}
if (index == 0)
offset = sizeof(u32) * io_ctl->num_pages;
@ -625,42 +656,42 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
return 0;
}
/*
* Since we attach pinned extents after the fact we can have contiguous sections
* of free space that are split up in entries. This poses a problem with the
* tree logging stuff since it could have allocated across what appears to be 2
* entries since we would have merged the entries when adding the pinned extents
* back to the free space cache. So run through the space cache that we just
* loaded and merge contiguous entries. This will make the log replay stuff not
* blow up and it will make for nicer allocator behavior.
*/
static void merge_space_tree(struct btrfs_free_space_ctl *ctl)
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *e, *prev = NULL;
struct rb_node *n;
struct btrfs_block_group *block_group = ctl->private;
u64 max_bytes;
u64 bitmap_bytes;
u64 extent_bytes;
u64 size = block_group->length;
u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
again:
spin_lock(&ctl->tree_lock);
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
e = rb_entry(n, struct btrfs_free_space, offset_index);
if (!prev)
goto next;
if (e->bitmap || prev->bitmap)
goto next;
if (prev->offset + prev->bytes == e->offset) {
unlink_free_space(ctl, prev);
unlink_free_space(ctl, e);
prev->bytes += e->bytes;
kmem_cache_free(btrfs_free_space_cachep, e);
link_free_space(ctl, prev);
prev = NULL;
spin_unlock(&ctl->tree_lock);
goto again;
}
next:
prev = e;
}
spin_unlock(&ctl->tree_lock);
max_bitmaps = max_t(u64, max_bitmaps, 1);
ASSERT(ctl->total_bitmaps <= max_bitmaps);
/*
* We are trying to keep the total amount of memory used per 1GiB of
* space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
* mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
* bitmaps, we may end up using more memory than this.
*/
if (size < SZ_1G)
max_bytes = MAX_CACHE_BYTES_PER_GIG;
else
max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
bitmap_bytes = ctl->total_bitmaps * ctl->unit;
/*
* we want the extent entry threshold to always be at most 1/2 the max
* bytes we can have, or whatever is less than that.
*/
extent_bytes = max_bytes - bitmap_bytes;
extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
ctl->extents_thresh =
div_u64(extent_bytes, sizeof(struct btrfs_free_space));
}
static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
@ -753,16 +784,6 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
goto free_cache;
}
/*
* Sync discard ensures that the free space cache is always
* trimmed. So when reading this in, the state should reflect
* that. We also do this for async as a stop gap for lack of
* persistence.
*/
if (btrfs_test_opt(fs_info, DISCARD_SYNC) ||
btrfs_test_opt(fs_info, DISCARD_ASYNC))
e->trim_state = BTRFS_TRIM_STATE_TRIMMED;
if (!e->bytes) {
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
@ -791,7 +812,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
spin_lock(&ctl->tree_lock);
ret = link_free_space(ctl, e);
ctl->total_bitmaps++;
ctl->op->recalc_thresholds(ctl);
recalculate_thresholds(ctl);
spin_unlock(&ctl->tree_lock);
if (ret) {
btrfs_err(fs_info,
@ -816,19 +837,11 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
ret = io_ctl_read_bitmap(&io_ctl, e);
if (ret)
goto free_cache;
e->bitmap_extents = count_bitmap_extents(ctl, e);
if (!btrfs_free_space_trimmed(e)) {
ctl->discardable_extents[BTRFS_STAT_CURR] +=
e->bitmap_extents;
ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes;
}
}
io_ctl_drop_pages(&io_ctl);
merge_space_tree(ctl);
ret = 1;
out:
btrfs_discard_update_discardable(ctl->private, ctl);
io_ctl_free(&io_ctl);
return ret;
free_cache:
@ -837,16 +850,59 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
goto out;
}
static int copy_free_space_cache(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *info;
struct rb_node *n;
int ret = 0;
while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (!info->bitmap) {
unlink_free_space(ctl, info);
ret = btrfs_add_free_space(block_group, info->offset,
info->bytes);
kmem_cache_free(btrfs_free_space_cachep, info);
} else {
u64 offset = info->offset;
u64 bytes = ctl->unit;
while (search_bitmap(ctl, info, &offset, &bytes,
false) == 0) {
ret = btrfs_add_free_space(block_group, offset,
bytes);
if (ret)
break;
bitmap_clear_bits(ctl, info, offset, bytes);
offset = info->offset;
bytes = ctl->unit;
}
free_bitmap(ctl, info);
}
cond_resched();
}
return ret;
}
int load_free_space_cache(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space_ctl tmp_ctl = {};
struct inode *inode;
struct btrfs_path *path;
int ret = 0;
bool matched;
u64 used = block_group->used;
/*
* Because we could potentially discard our loaded free space, we want
* to load everything into a temporary structure first, and then if it's
* valid copy it all into the actual free space ctl.
*/
btrfs_init_free_space_ctl(block_group, &tmp_ctl);
/*
* If this block group has been marked to be cleared for one reason or
* another then we can't trust the on disk cache, so just return.
@ -898,19 +954,25 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
}
spin_unlock(&block_group->lock);
ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
path, block_group->start);
btrfs_free_path(path);
if (ret <= 0)
goto out;
spin_lock(&ctl->tree_lock);
matched = (ctl->free_space == (block_group->length - used -
block_group->bytes_super));
spin_unlock(&ctl->tree_lock);
matched = (tmp_ctl.free_space == (block_group->length - used -
block_group->bytes_super));
if (!matched) {
__btrfs_remove_free_space_cache(ctl);
if (matched) {
ret = copy_free_space_cache(block_group, &tmp_ctl);
/*
* ret == 1 means we successfully loaded the free space cache,
* so we need to re-set it here.
*/
if (ret == 0)
ret = 1;
} else {
__btrfs_remove_free_space_cache(&tmp_ctl);
btrfs_warn(fs_info,
"block group %llu has wrong amount of free space",
block_group->start);
@ -929,6 +991,9 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
block_group->start);
}
spin_lock(&ctl->tree_lock);
btrfs_discard_update_discardable(block_group);
spin_unlock(&ctl->tree_lock);
iput(inode);
return ret;
}
@ -1191,7 +1256,7 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
"failed to write free space cache for block group %llu error %d",
block_group->start, ret);
}
btrfs_update_inode(trans, root, inode);
btrfs_update_inode(trans, root, BTRFS_I(inode));
if (block_group) {
/* the dirty list is protected by the dirty_bgs_lock */
@ -1220,14 +1285,6 @@ static int __btrfs_wait_cache_io(struct btrfs_root *root,
}
static int btrfs_wait_cache_io_root(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_io_ctl *io_ctl,
struct btrfs_path *path)
{
return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0);
}
int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path)
@ -1332,7 +1389,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
/* Everything is written out, now we dirty the pages in the file. */
ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
io_ctl->num_pages, 0, i_size_read(inode),
&cached_state);
&cached_state, false);
if (ret)
goto out_nospc;
@ -1381,7 +1438,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
invalidate_inode_pages2(inode->i_mapping);
BTRFS_I(inode)->generation = 0;
}
btrfs_update_inode(trans, root, inode);
btrfs_update_inode(trans, root, BTRFS_I(inode));
if (must_iput)
iput(inode);
return ret;
@ -1672,44 +1729,6 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
return ret;
}
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_block_group *block_group = ctl->private;
u64 max_bytes;
u64 bitmap_bytes;
u64 extent_bytes;
u64 size = block_group->length;
u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
max_bitmaps = max_t(u64, max_bitmaps, 1);
ASSERT(ctl->total_bitmaps <= max_bitmaps);
/*
* We are trying to keep the total amount of memory used per 1GiB of
* space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation
* mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of
* bitmaps, we may end up using more memory than this.
*/
if (size < SZ_1G)
max_bytes = MAX_CACHE_BYTES_PER_GIG;
else
max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G);
bitmap_bytes = ctl->total_bitmaps * ctl->unit;
/*
* we want the extent entry threshold to always be at most 1/2 the max
* bytes we can have, or whatever is less than that.
*/
extent_bytes = max_bytes - bitmap_bytes;
extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
ctl->extents_thresh =
div_u64(extent_bytes, sizeof(struct btrfs_free_space));
}
static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info,
u64 offset, u64 bytes)
@ -1912,29 +1931,6 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
return NULL;
}
static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info)
{
struct btrfs_block_group *block_group = ctl->private;
u64 bytes = bitmap_info->bytes;
unsigned int rs, re;
int count = 0;
if (!block_group || !bytes)
return count;
bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0,
BITS_PER_BITMAP) {
bytes -= (rs - re) * ctl->unit;
count++;
if (!bytes)
break;
}
return count;
}
static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, u64 offset)
{
@ -1944,8 +1940,7 @@ static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
INIT_LIST_HEAD(&info->list);
link_free_space(ctl, info);
ctl->total_bitmaps++;
ctl->op->recalc_thresholds(ctl);
recalculate_thresholds(ctl);
}
static void free_bitmap(struct btrfs_free_space_ctl *ctl,
@ -1967,7 +1962,7 @@ static void free_bitmap(struct btrfs_free_space_ctl *ctl,
kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap);
kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
ctl->total_bitmaps--;
ctl->op->recalc_thresholds(ctl);
recalculate_thresholds(ctl);
}
static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
@ -2134,7 +2129,6 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
}
static const struct btrfs_free_space_op free_space_op = {
.recalc_thresholds = recalculate_thresholds,
.use_bitmap = use_bitmap,
};
@ -2508,7 +2502,7 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
if (ret)
kmem_cache_free(btrfs_free_space_cachep, info);
out:
btrfs_discard_update_discardable(block_group, ctl);
btrfs_discard_update_discardable(block_group);
spin_unlock(&ctl->tree_lock);
if (ret) {
@ -2643,7 +2637,7 @@ int btrfs_remove_free_space(struct btrfs_block_group *block_group,
goto again;
}
out_lock:
btrfs_discard_update_discardable(block_group, ctl);
btrfs_discard_update_discardable(block_group);
spin_unlock(&ctl->tree_lock);
out:
return ret;
@ -2674,10 +2668,10 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
"%d blocks of free space at or bigger than bytes is", count);
}
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
spin_lock_init(&ctl->tree_lock);
ctl->unit = fs_info->sectorsize;
@ -2779,7 +2773,7 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
spin_lock(&ctl->tree_lock);
__btrfs_remove_free_space_cache_locked(ctl);
if (ctl->private)
btrfs_discard_update_discardable(ctl->private, ctl);
btrfs_discard_update_discardable(ctl->private);
spin_unlock(&ctl->tree_lock);
}
@ -2801,7 +2795,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
cond_resched_lock(&ctl->tree_lock);
}
__btrfs_remove_free_space_cache_locked(ctl);
btrfs_discard_update_discardable(block_group, ctl);
btrfs_discard_update_discardable(block_group);
spin_unlock(&ctl->tree_lock);
}
@ -2885,7 +2879,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
link_free_space(ctl, entry);
}
out:
btrfs_discard_update_discardable(block_group, ctl);
btrfs_discard_update_discardable(block_group);
spin_unlock(&ctl->tree_lock);
if (align_gap_len)
@ -3054,7 +3048,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
kmem_cache_free(btrfs_free_space_bitmap_cachep,
entry->bitmap);
ctl->total_bitmaps--;
ctl->op->recalc_thresholds(ctl);
recalculate_thresholds(ctl);
} else if (!btrfs_free_space_trimmed(entry)) {
ctl->discardable_extents[BTRFS_STAT_CURR]--;
}
@ -3828,166 +3822,62 @@ int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
return ret;
}
/*
* Find the left-most item in the cache tree, and then return the
* smallest inode number in the item.
*
* Note: the returned inode number may not be the smallest one in
* the tree, if the left-most item is a bitmap.
*/
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info)
{
struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
struct btrfs_free_space *entry = NULL;
u64 ino = 0;
return btrfs_super_cache_generation(fs_info->super_copy);
}
spin_lock(&ctl->tree_lock);
static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans)
{
struct btrfs_block_group *block_group;
struct rb_node *node;
int ret;
if (RB_EMPTY_ROOT(&ctl->free_space_offset))
goto out;
btrfs_info(fs_info, "cleaning free space cache v1");
entry = rb_entry(rb_first(&ctl->free_space_offset),
struct btrfs_free_space, offset_index);
if (!entry->bitmap) {
ino = entry->offset;
unlink_free_space(ctl, entry);
entry->offset++;
entry->bytes--;
if (!entry->bytes)
kmem_cache_free(btrfs_free_space_cachep, entry);
else
link_free_space(ctl, entry);
} else {
u64 offset = 0;
u64 count = 1;
int ret;
ret = search_bitmap(ctl, entry, &offset, &count, true);
/* Logic error; Should be empty if it can't find anything */
ASSERT(!ret);
ino = offset;
bitmap_clear_bits(ctl, entry, offset, 1);
if (entry->bytes == 0)
free_bitmap(ctl, entry);
node = rb_first(&fs_info->block_group_cache_tree);
while (node) {
block_group = rb_entry(node, struct btrfs_block_group, cache_node);
ret = btrfs_remove_free_space_inode(trans, NULL, block_group);
if (ret)
goto out;
node = rb_next(node);
}
out:
spin_unlock(&ctl->tree_lock);
return ino;
}
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
struct btrfs_path *path)
{
struct inode *inode = NULL;
spin_lock(&root->ino_cache_lock);
if (root->ino_cache_inode)
inode = igrab(root->ino_cache_inode);
spin_unlock(&root->ino_cache_lock);
if (inode)
return inode;
inode = __lookup_free_space_inode(root, path, 0);
if (IS_ERR(inode))
return inode;
spin_lock(&root->ino_cache_lock);
if (!btrfs_fs_closing(root->fs_info))
root->ino_cache_inode = igrab(inode);
spin_unlock(&root->ino_cache_lock);
return inode;
}
int create_free_ino_inode(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path)
{
return __create_free_space_inode(root, trans, path,
BTRFS_FREE_INO_OBJECTID, 0);
}
int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_path *path;
struct inode *inode;
int ret = 0;
u64 root_gen = btrfs_root_generation(&root->root_item);
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
return 0;
/*
* If we're unmounting then just return, since this does a search on the
* normal root and not the commit root and we could deadlock.
*/
if (btrfs_fs_closing(fs_info))
return 0;
path = btrfs_alloc_path();
if (!path)
return 0;
inode = lookup_free_ino_inode(root, path);
if (IS_ERR(inode))
goto out;
if (root_gen != BTRFS_I(inode)->generation)
goto out_put;
ret = __load_free_space_cache(root, inode, ctl, path, 0);
if (ret < 0)
btrfs_err(fs_info,
"failed to load free ino cache for root %llu",
root->root_key.objectid);
out_put:
iput(inode);
out:
btrfs_free_path(path);
return ret;
}
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct inode *inode)
int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_trans_handle *trans;
int ret;
struct btrfs_io_ctl io_ctl;
bool release_metadata = true;
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
return 0;
/*
* update_super_roots will appropriately set or unset
* super_copy->cache_generation based on SPACE_CACHE and
* BTRFS_FS_CLEANUP_SPACE_CACHE_V1. For this reason, we need a
* transaction commit whether we are enabling space cache v1 and don't
* have any other work to do, or are disabling it and removing free
* space inodes.
*/
trans = btrfs_start_transaction(fs_info->tree_root, 0);
if (IS_ERR(trans))
return PTR_ERR(trans);
memset(&io_ctl, 0, sizeof(io_ctl));
ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans);
if (!ret) {
/*
* At this point writepages() didn't error out, so our metadata
* reservation is released when the writeback finishes, at
* inode.c:btrfs_finish_ordered_io(), regardless of it finishing
* with or without an error.
*/
release_metadata = false;
ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path);
if (!active) {
set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
ret = cleanup_free_space_cache_v1(fs_info, trans);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;
}
}
if (ret) {
if (release_metadata)
btrfs_delalloc_release_metadata(BTRFS_I(inode),
inode->i_size, true);
btrfs_debug(fs_info,
"failed to write free ino cache for root %llu error %d",
root->root_key.objectid, ret);
}
ret = btrfs_commit_transaction(trans);
out:
clear_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
return ret;
}

View File

@ -60,7 +60,6 @@ struct btrfs_free_space_ctl {
};
struct btrfs_free_space_op {
void (*recalc_thresholds)(struct btrfs_free_space_ctl *ctl);
bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info);
};
@ -76,7 +75,6 @@ struct btrfs_io_ctl {
int num_pages;
int entries;
int bitmaps;
unsigned check_crcs:1;
};
struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
@ -84,6 +82,9 @@ struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
int create_free_space_inode(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path);
int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_block_group *block_group);
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
@ -97,19 +98,9 @@ int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct btrfs_path *path);
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
struct btrfs_path *path);
int create_free_ino_inode(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path);
int load_free_ino_cache(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct inode *inode);
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group);
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
struct btrfs_free_space_ctl *ctl);
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_free_space_ctl *ctl,
u64 bytenr, u64 size,
@ -126,7 +117,6 @@ bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group);
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
u64 offset, u64 bytes, u64 empty_size,
u64 *max_extent_size);
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
void btrfs_dump_free_space(struct btrfs_block_group *block_group,
u64 bytes);
int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
@ -148,6 +138,8 @@ int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen,
u64 maxlen, bool async);
bool btrfs_free_space_cache_v1_active(struct btrfs_fs_info *fs_info);
int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool active);
/* Support functions for running our sanity tests */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int test_add_free_space_entry(struct btrfs_block_group *cache,

View File

@ -136,9 +136,10 @@ static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
return 0;
}
static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
u64 size)
{
return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
}
static unsigned long *alloc_bitmap(u32 bitmap_size)
@ -200,8 +201,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
int done = 0, nr;
int ret;
bitmap_size = free_space_bitmap_size(block_group->length,
fs_info->sectorsize);
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
if (!bitmap) {
ret = -ENOMEM;
@ -290,8 +290,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
u32 data_size;
extent_size = min(end - i, bitmap_range);
data_size = free_space_bitmap_size(extent_size,
fs_info->sectorsize);
data_size = free_space_bitmap_size(fs_info, extent_size);
key.objectid = i;
key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
@ -339,8 +338,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
int done = 0, nr;
int ret;
bitmap_size = free_space_bitmap_size(block_group->length,
fs_info->sectorsize);
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
if (!bitmap) {
ret = -ENOMEM;
@ -383,8 +381,8 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
fs_info->sectorsize *
BITS_PER_BYTE);
bitmap_cursor = ((char *)bitmap) + bitmap_pos;
data_size = free_space_bitmap_size(found_key.offset,
fs_info->sectorsize);
data_size = free_space_bitmap_size(fs_info,
found_key.offset);
ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
read_extent_buffer(leaf, bitmap_cursor, ptr,
@ -416,7 +414,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
nrbits = div_u64(block_group->length, block_group->fs_info->sectorsize);
nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
start_bit = find_next_bit_le(bitmap, nrbits, 0);
while (start_bit < nrbits) {
@ -540,8 +538,8 @@ static void free_space_set_bits(struct btrfs_block_group *block_group,
end = found_end;
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
first = div_u64(*start - found_start, fs_info->sectorsize);
last = div_u64(end - found_start, fs_info->sectorsize);
first = (*start - found_start) >> fs_info->sectorsize_bits;
last = (end - found_start) >> fs_info->sectorsize_bits;
if (bit)
extent_buffer_bitmap_set(leaf, ptr, first, last - first);
else
@ -1195,8 +1193,6 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
key.objectid = 0;
key.type = 0;
key.offset = 0;

View File

@ -119,8 +119,6 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0)
ret = -ENOENT;
@ -193,8 +191,6 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
ret = -ENOENT;
@ -270,7 +266,6 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, root, path, &key,
ins_len);
if (ret == -EEXIST) {
@ -327,7 +322,6 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
path->skip_release_on_error = 1;
ret = btrfs_insert_empty_item(trans, root, path, &key,
ins_len);

View File

@ -1,582 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*/
#include <linux/kthread.h>
#include <linux/pagemap.h>
#include "ctree.h"
#include "disk-io.h"
#include "free-space-cache.h"
#include "inode-map.h"
#include "transaction.h"
#include "delalloc-space.h"
static void fail_caching_thread(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
btrfs_warn(fs_info, "failed to start inode caching task");
btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE,
"disabling inode map caching");
spin_lock(&root->ino_cache_lock);
root->ino_cache_state = BTRFS_CACHE_ERROR;
spin_unlock(&root->ino_cache_lock);
wake_up(&root->ino_cache_wait);
}
static int caching_kthread(void *data)
{
struct btrfs_root *root = data;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_key key;
struct btrfs_path *path;
struct extent_buffer *leaf;
u64 last = (u64)-1;
int slot;
int ret;
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
return 0;
path = btrfs_alloc_path();
if (!path) {
fail_caching_thread(root);
return -ENOMEM;
}
/* Since the commit root is read-only, we can safely skip locking. */
path->skip_locking = 1;
path->search_commit_root = 1;
path->reada = READA_FORWARD;
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.offset = 0;
key.type = BTRFS_INODE_ITEM_KEY;
again:
/* need to make sure the commit_root doesn't disappear */
down_read(&fs_info->commit_root_sem);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
while (1) {
if (btrfs_fs_closing(fs_info))
goto out;
leaf = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
else if (ret > 0)
break;
if (need_resched() ||
btrfs_transaction_in_commit(fs_info)) {
leaf = path->nodes[0];
if (WARN_ON(btrfs_header_nritems(leaf) == 0))
break;
/*
* Save the key so we can advances forward
* in the next search.
*/
btrfs_item_key_to_cpu(leaf, &key, 0);
btrfs_release_path(path);
root->ino_cache_progress = last;
up_read(&fs_info->commit_root_sem);
schedule_timeout(1);
goto again;
} else
continue;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.type != BTRFS_INODE_ITEM_KEY)
goto next;
if (key.objectid >= root->highest_objectid)
break;
if (last != (u64)-1 && last + 1 != key.objectid) {
__btrfs_add_free_space(fs_info, ctl, last + 1,
key.objectid - last - 1, 0);
wake_up(&root->ino_cache_wait);
}
last = key.objectid;
next:
path->slots[0]++;
}
if (last < root->highest_objectid - 1) {
__btrfs_add_free_space(fs_info, ctl, last + 1,
root->highest_objectid - last - 1, 0);
}
spin_lock(&root->ino_cache_lock);
root->ino_cache_state = BTRFS_CACHE_FINISHED;
spin_unlock(&root->ino_cache_lock);
root->ino_cache_progress = (u64)-1;
btrfs_unpin_free_ino(root);
out:
wake_up(&root->ino_cache_wait);
up_read(&fs_info->commit_root_sem);
btrfs_free_path(path);
return ret;
}
static void start_caching(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct task_struct *tsk;
int ret;
u64 objectid;
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
return;
spin_lock(&root->ino_cache_lock);
if (root->ino_cache_state != BTRFS_CACHE_NO) {
spin_unlock(&root->ino_cache_lock);
return;
}
root->ino_cache_state = BTRFS_CACHE_STARTED;
spin_unlock(&root->ino_cache_lock);
ret = load_free_ino_cache(fs_info, root);
if (ret == 1) {
spin_lock(&root->ino_cache_lock);
root->ino_cache_state = BTRFS_CACHE_FINISHED;
spin_unlock(&root->ino_cache_lock);
wake_up(&root->ino_cache_wait);
return;
}
/*
* It can be quite time-consuming to fill the cache by searching
* through the extent tree, and this can keep ino allocation path
* waiting. Therefore at start we quickly find out the highest
* inode number and we know we can use inode numbers which fall in
* [highest_ino + 1, BTRFS_LAST_FREE_OBJECTID].
*/
ret = btrfs_find_free_objectid(root, &objectid);
if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
__btrfs_add_free_space(fs_info, ctl, objectid,
BTRFS_LAST_FREE_OBJECTID - objectid + 1,
0);
wake_up(&root->ino_cache_wait);
}
tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu",
root->root_key.objectid);
if (IS_ERR(tsk))
fail_caching_thread(root);
}
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
{
if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return btrfs_find_free_objectid(root, objectid);
again:
*objectid = btrfs_find_ino_for_alloc(root);
if (*objectid != 0)
return 0;
start_caching(root);
wait_event(root->ino_cache_wait,
root->ino_cache_state == BTRFS_CACHE_FINISHED ||
root->ino_cache_state == BTRFS_CACHE_ERROR ||
root->free_ino_ctl->free_space > 0);
if (root->ino_cache_state == BTRFS_CACHE_FINISHED &&
root->free_ino_ctl->free_space == 0)
return -ENOSPC;
else if (root->ino_cache_state == BTRFS_CACHE_ERROR)
return btrfs_find_free_objectid(root, objectid);
else
goto again;
}
void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
return;
again:
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
} else {
down_write(&fs_info->commit_root_sem);
spin_lock(&root->ino_cache_lock);
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
spin_unlock(&root->ino_cache_lock);
up_write(&fs_info->commit_root_sem);
goto again;
}
spin_unlock(&root->ino_cache_lock);
start_caching(root);
__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
up_write(&fs_info->commit_root_sem);
}
}
/*
* When a transaction is committed, we'll move those inode numbers which are
* smaller than root->ino_cache_progress from pinned tree to free_ino tree, and
* others will just be dropped, because the commit root we were searching has
* changed.
*
* Must be called with root->fs_info->commit_root_sem held
*/
void btrfs_unpin_free_ino(struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
struct btrfs_free_space *info;
struct rb_node *n;
u64 count;
if (!btrfs_test_opt(root->fs_info, INODE_MAP_CACHE))
return;
while (1) {
spin_lock(rbroot_lock);
n = rb_first(rbroot);
if (!n) {
spin_unlock(rbroot_lock);
break;
}
info = rb_entry(n, struct btrfs_free_space, offset_index);
BUG_ON(info->bitmap); /* Logic error */
if (info->offset > root->ino_cache_progress)
count = 0;
else
count = min(root->ino_cache_progress - info->offset + 1,
info->bytes);
rb_erase(&info->offset_index, rbroot);
spin_unlock(rbroot_lock);
if (count)
__btrfs_add_free_space(root->fs_info, ctl,
info->offset, count, 0);
kmem_cache_free(btrfs_free_space_cachep, info);
}
}
#define INIT_THRESHOLD ((SZ_32K / 2) / sizeof(struct btrfs_free_space))
#define INODES_PER_BITMAP (PAGE_SIZE * 8)
/*
* The goal is to keep the memory used by the free_ino tree won't
* exceed the memory if we use bitmaps only.
*/
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *info;
struct rb_node *n;
int max_ino;
int max_bitmaps;
n = rb_last(&ctl->free_space_offset);
if (!n) {
ctl->extents_thresh = INIT_THRESHOLD;
return;
}
info = rb_entry(n, struct btrfs_free_space, offset_index);
/*
* Find the maximum inode number in the filesystem. Note we
* ignore the fact that this can be a bitmap, because we are
* not doing precise calculation.
*/
max_ino = info->bytes - 1;
max_bitmaps = ALIGN(max_ino, INODES_PER_BITMAP) / INODES_PER_BITMAP;
if (max_bitmaps <= ctl->total_bitmaps) {
ctl->extents_thresh = 0;
return;
}
ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
PAGE_SIZE / sizeof(*info);
}
/*
* We don't fall back to bitmap, if we are below the extents threshold
* or this chunk of inode numbers is a big one.
*/
static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
if (ctl->free_extents < ctl->extents_thresh ||
info->bytes > INODES_PER_BITMAP / 10)
return false;
return true;
}
static const struct btrfs_free_space_op free_ino_op = {
.recalc_thresholds = recalculate_thresholds,
.use_bitmap = use_bitmap,
};
static void pinned_recalc_thresholds(struct btrfs_free_space_ctl *ctl)
{
}
static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
/*
* We always use extents for two reasons:
*
* - The pinned tree is only used during the process of caching
* work.
* - Make code simpler. See btrfs_unpin_free_ino().
*/
return false;
}
static const struct btrfs_free_space_op pinned_free_ino_op = {
.recalc_thresholds = pinned_recalc_thresholds,
.use_bitmap = pinned_use_bitmap,
};
void btrfs_init_free_ino_ctl(struct btrfs_root *root)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
spin_lock_init(&ctl->tree_lock);
ctl->unit = 1;
ctl->start = 0;
ctl->private = NULL;
ctl->op = &free_ino_op;
INIT_LIST_HEAD(&ctl->trimming_ranges);
mutex_init(&ctl->cache_writeout_mutex);
/*
* Initially we allow to use 16K of ram to cache chunks of
* inode numbers before we resort to bitmaps. This is somewhat
* arbitrary, but it will be adjusted in runtime.
*/
ctl->extents_thresh = INIT_THRESHOLD;
spin_lock_init(&pinned->tree_lock);
pinned->unit = 1;
pinned->start = 0;
pinned->private = NULL;
pinned->extents_thresh = 0;
pinned->op = &pinned_free_ino_op;
}
int btrfs_save_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
struct btrfs_path *path;
struct inode *inode;
struct btrfs_block_rsv *rsv;
struct extent_changeset *data_reserved = NULL;
u64 num_bytes;
u64 alloc_hint = 0;
int ret;
int prealloc;
bool retry = false;
/* only fs tree and subvol/snap needs ino cache */
if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
(root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
return 0;
/* Don't save inode cache if we are deleting this root */
if (btrfs_root_refs(&root->root_item) == 0)
return 0;
if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE))
return 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
rsv = trans->block_rsv;
trans->block_rsv = &fs_info->trans_block_rsv;
num_bytes = trans->bytes_reserved;
/*
* 1 item for inode item insertion if need
* 4 items for inode item update (in the worst case)
* 1 items for slack space if we need do truncation
* 1 item for free space object
* 3 items for pre-allocation
*/
trans->bytes_reserved = btrfs_calc_insert_metadata_size(fs_info, 10);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
trans->bytes_reserved,
BTRFS_RESERVE_NO_FLUSH);
if (ret)
goto out;
trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
trans->bytes_reserved, 1);
again:
inode = lookup_free_ino_inode(root, path);
if (IS_ERR(inode) && (PTR_ERR(inode) != -ENOENT || retry)) {
ret = PTR_ERR(inode);
goto out_release;
}
if (IS_ERR(inode)) {
BUG_ON(retry); /* Logic error */
retry = true;
ret = create_free_ino_inode(root, trans, path);
if (ret)
goto out_release;
goto again;
}
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_put;
}
if (i_size_read(inode) > 0) {
ret = btrfs_truncate_free_space_cache(trans, NULL, inode);
if (ret) {
if (ret != -ENOSPC)
btrfs_abort_transaction(trans, ret);
goto out_put;
}
}
spin_lock(&root->ino_cache_lock);
if (root->ino_cache_state != BTRFS_CACHE_FINISHED) {
ret = -1;
spin_unlock(&root->ino_cache_lock);
goto out_put;
}
spin_unlock(&root->ino_cache_lock);
spin_lock(&ctl->tree_lock);
prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
prealloc = ALIGN(prealloc, PAGE_SIZE);
prealloc += ctl->total_bitmaps * PAGE_SIZE;
spin_unlock(&ctl->tree_lock);
/* Just to make sure we have enough space */
prealloc += 8 * PAGE_SIZE;
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved, 0,
prealloc);
if (ret)
goto out_put;
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true);
goto out_put;
}
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
out_put:
iput(inode);
out_release:
trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
trans->bytes_reserved, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv,
trans->bytes_reserved, NULL);
out:
trans->block_rsv = rsv;
trans->bytes_reserved = num_bytes;
btrfs_free_path(path);
extent_changeset_free(data_reserved);
return ret;
}
int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
{
struct btrfs_path *path;
int ret;
struct extent_buffer *l;
struct btrfs_key search_key;
struct btrfs_key found_key;
int slot;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
search_key.type = -1;
search_key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
goto error;
BUG_ON(ret == 0); /* Corruption */
if (path->slots[0] > 0) {
slot = path->slots[0] - 1;
l = path->nodes[0];
btrfs_item_key_to_cpu(l, &found_key, slot);
*objectid = max_t(u64, found_key.objectid,
BTRFS_FIRST_FREE_OBJECTID - 1);
} else {
*objectid = BTRFS_FIRST_FREE_OBJECTID - 1;
}
ret = 0;
error:
btrfs_free_path(path);
return ret;
}
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
{
int ret;
mutex_lock(&root->objectid_mutex);
if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
btrfs_warn(root->fs_info,
"the objectid of root %llu reaches its highest value",
root->root_key.objectid);
ret = -ENOSPC;
goto out;
}
*objectid = ++root->highest_objectid;
ret = 0;
out:
mutex_unlock(&root->objectid_mutex);
return ret;
}

View File

@ -1,16 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BTRFS_INODE_MAP_H
#define BTRFS_INODE_MAP_H
void btrfs_init_free_ino_ctl(struct btrfs_root *root);
void btrfs_unpin_free_ino(struct btrfs_root *root);
void btrfs_return_ino(struct btrfs_root *root, u64 objectid);
int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid);
int btrfs_save_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans);
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -34,7 +34,6 @@
#include "print-tree.h"
#include "volumes.h"
#include "locking.h"
#include "inode-map.h"
#include "backref.h"
#include "rcu-string.h"
#include "send.h"
@ -193,6 +192,15 @@ static int check_fsflags(unsigned int old_flags, unsigned int flags)
return 0;
}
static int check_fsflags_compatible(struct btrfs_fs_info *fs_info,
unsigned int flags)
{
if (btrfs_is_zoned(fs_info) && (flags & FS_NOCOW_FL))
return -EPERM;
return 0;
}
static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
{
struct inode *inode = file_inode(file);
@ -230,6 +238,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (ret)
goto out_unlock;
ret = check_fsflags_compatible(fs_info, fsflags);
if (ret)
goto out_unlock;
binode_flags = binode->flags;
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
@ -336,7 +348,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
btrfs_sync_inode_flags_to_i_flags(inode);
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
out_end_trans:
btrfs_end_transaction(trans);
@ -479,7 +491,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
btrfs_sync_inode_flags_to_i_flags(inode);
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
btrfs_end_transaction(trans);
@ -733,7 +745,7 @@ static noinline int create_subvol(struct inode *dir,
}
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
ret = btrfs_update_inode(trans, root, dir);
ret = btrfs_update_inode(trans, root, BTRFS_I(dir));
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
@ -1275,6 +1287,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
u64 page_end;
u64 page_cnt;
u64 start = (u64)start_index << PAGE_SHIFT;
u64 search_start;
int ret;
int i;
int i_done;
@ -1371,6 +1384,40 @@ static int cluster_pages_for_defrag(struct inode *inode,
lock_extent_bits(&BTRFS_I(inode)->io_tree,
page_start, page_end - 1, &cached_state);
/*
* When defragmenting we skip ranges that have holes or inline extents,
* (check should_defrag_range()), to avoid unnecessary IO and wasting
* space. At btrfs_defrag_file(), we check if a range should be defragged
* before locking the inode and then, if it should, we trigger a sync
* page cache readahead - we lock the inode only after that to avoid
* blocking for too long other tasks that possibly want to operate on
* other file ranges. But before we were able to get the inode lock,
* some other task may have punched a hole in the range, or we may have
* now an inline extent, in which case we should not defrag. So check
* for that here, where we have the inode and the range locked, and bail
* out if that happened.
*/
search_start = page_start;
while (search_start < page_end) {
struct extent_map *em;
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, search_start,
page_end - search_start);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out_unlock_range;
}
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
free_extent_map(em);
/* Ok, 0 means we did not defrag anything */
ret = 0;
goto out_unlock_range;
}
search_start = extent_map_end(em);
free_extent_map(em);
}
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
page_end - 1, EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 0, 0, &cached_state);
@ -1401,6 +1448,10 @@ static int cluster_pages_for_defrag(struct inode *inode,
btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return i_done;
out_unlock_range:
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
page_start, page_end - 1, &cached_state);
out:
for (i = 0; i < i_done; i++) {
unlock_page(pages[i]);
@ -1678,7 +1729,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
btrfs_info(fs_info, "resizing devid %llu", devid);
}
device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
if (!device) {
btrfs_info(fs_info, "resizer unable to find device %llu",
devid);
@ -3321,7 +3372,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
rcu_read_lock();
dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid,
NULL, true);
NULL);
if (!dev) {
ret = -ENODEV;
@ -3393,7 +3444,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
ret = -ENOMEM;
goto out_free;
}
path->leave_spinning = 1;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {

View File

@ -17,404 +17,89 @@
* Extent buffer locking
* =====================
*
* The locks use a custom scheme that allows to do more operations than are
* available fromt current locking primitives. The building blocks are still
* rwlock and wait queues.
*
* Required semantics:
* We use a rw_semaphore for tree locking, and the semantics are exactly the
* same:
*
* - reader/writer exclusion
* - writer/writer exclusion
* - reader/reader sharing
* - spinning lock semantics
* - blocking lock semantics
* - try-lock semantics for readers and writers
* - one level nesting, allowing read lock to be taken by the same thread that
* already has write lock
*
* The extent buffer locks (also called tree locks) manage access to eb data
* related to the storage in the b-tree (keys, items, but not the individual
* members of eb).
* We want concurrency of many readers and safe updates. The underlying locking
* is done by read-write spinlock and the blocking part is implemented using
* counters and wait queues.
*
* spinning semantics - the low-level rwlock is held so all other threads that
* want to take it are spinning on it.
*
* blocking semantics - the low-level rwlock is not held but the counter
* denotes how many times the blocking lock was held;
* sleeping is possible
*
* Write lock always allows only one thread to access the data.
*
*
* Debugging
* ---------
*
* There are additional state counters that are asserted in various contexts,
* removed from non-debug build to reduce extent_buffer size and for
* performance reasons.
*
*
* Lock recursion
* --------------
*
* A write operation on a tree might indirectly start a look up on the same
* tree. This can happen when btrfs_cow_block locks the tree and needs to
* lookup free extents.
*
* btrfs_cow_block
* ..
* alloc_tree_block_no_bg_flush
* btrfs_alloc_tree_block
* btrfs_reserve_extent
* ..
* load_free_space_cache
* ..
* btrfs_lookup_file_extent
* btrfs_search_slot
*
*
* Locking pattern - spinning
* --------------------------
*
* The simple locking scenario, the +--+ denotes the spinning section.
*
* +- btrfs_tree_lock
* | - extent_buffer::rwlock is held
* | - no heavy operations should happen, eg. IO, memory allocations, large
* | structure traversals
* +- btrfs_tree_unock
*
*
* Locking pattern - blocking
* --------------------------
*
* The blocking write uses the following scheme. The +--+ denotes the spinning
* section.
*
* +- btrfs_tree_lock
* |
* +- btrfs_set_lock_blocking_write
*
* - allowed: IO, memory allocations, etc.
*
* -- btrfs_tree_unlock - note, no explicit unblocking necessary
*
*
* Blocking read is similar.
*
* +- btrfs_tree_read_lock
* |
* +- btrfs_set_lock_blocking_read
*
* - heavy operations allowed
*
* +- btrfs_tree_read_unlock_blocking
* |
* +- btrfs_tree_read_unlock
*
* The rwsem implementation does opportunistic spinning which reduces number of
* times the locking task needs to sleep.
*/
#ifdef CONFIG_BTRFS_DEBUG
static inline void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
{
WARN_ON(eb->spinning_writers);
eb->spinning_writers++;
}
static inline void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
{
WARN_ON(eb->spinning_writers != 1);
eb->spinning_writers--;
}
static inline void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
{
WARN_ON(eb->spinning_writers);
}
static inline void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
{
atomic_inc(&eb->spinning_readers);
}
static inline void btrfs_assert_spinning_readers_put(struct extent_buffer *eb)
{
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
}
static inline void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb)
{
atomic_inc(&eb->read_locks);
}
static inline void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb)
{
atomic_dec(&eb->read_locks);
}
static inline void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->read_locks));
}
static inline void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
{
eb->write_locks++;
}
static inline void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
{
eb->write_locks--;
}
#else
static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { }
static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { }
static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb) { }
static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb) { }
static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { }
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { }
static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { }
static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { }
static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { }
static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { }
#endif
/*
* Mark already held read lock as blocking. Can be nested in write lock by the
* same thread.
* __btrfs_tree_read_lock - lock extent buffer for read
* @eb: the eb to be locked
* @nest: the nesting level to be used for lockdep
*
* Use when there are potentially long operations ahead so other thread waiting
* on the lock will not actively spin but sleep instead.
*
* The rwlock is released and blocking reader counter is increased.
* This takes the read lock on the extent buffer, using the specified nesting
* level for lockdep purposes.
*/
void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
{
trace_btrfs_set_lock_blocking_read(eb);
/*
* No lock is required. The lock owner may change if we have a read
* lock, but it won't change to or away from us. If we have the write
* lock, we are the owner and it'll never change.
*/
if (eb->lock_recursed && current->pid == eb->lock_owner)
return;
btrfs_assert_tree_read_locked(eb);
atomic_inc(&eb->blocking_readers);
btrfs_assert_spinning_readers_put(eb);
read_unlock(&eb->lock);
}
/*
* Mark already held write lock as blocking.
*
* Use when there are potentially long operations ahead so other threads
* waiting on the lock will not actively spin but sleep instead.
*
* The rwlock is released and blocking writers is set.
*/
void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
{
trace_btrfs_set_lock_blocking_write(eb);
/*
* No lock is required. The lock owner may change if we have a read
* lock, but it won't change to or away from us. If we have the write
* lock, we are the owner and it'll never change.
*/
if (eb->lock_recursed && current->pid == eb->lock_owner)
return;
if (eb->blocking_writers == 0) {
btrfs_assert_spinning_writers_put(eb);
btrfs_assert_tree_locked(eb);
WRITE_ONCE(eb->blocking_writers, 1);
write_unlock(&eb->lock);
}
}
/*
* Lock the extent buffer for read. Wait for any writers (spinning or blocking).
* Can be nested in write lock by the same thread.
*
* Use when the locked section does only lightweight actions and busy waiting
* would be cheaper than making other threads do the wait/wake loop.
*
* The rwlock is held upon exit.
*/
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest,
bool recurse)
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
{
u64 start_ns = 0;
if (trace_btrfs_tree_read_lock_enabled())
start_ns = ktime_get_ns();
again:
read_lock(&eb->lock);
BUG_ON(eb->blocking_writers == 0 &&
current->pid == eb->lock_owner);
if (eb->blocking_writers) {
if (current->pid == eb->lock_owner) {
/*
* This extent is already write-locked by our thread.
* We allow an additional read lock to be added because
* it's for the same thread. btrfs_find_all_roots()
* depends on this as it may be called on a partly
* (write-)locked tree.
*/
WARN_ON(!recurse);
BUG_ON(eb->lock_recursed);
eb->lock_recursed = true;
read_unlock(&eb->lock);
trace_btrfs_tree_read_lock(eb, start_ns);
return;
}
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
READ_ONCE(eb->blocking_writers) == 0);
goto again;
}
btrfs_assert_tree_read_locks_get(eb);
btrfs_assert_spinning_readers_get(eb);
down_read_nested(&eb->lock, nest);
eb->lock_owner = current->pid;
trace_btrfs_tree_read_lock(eb, start_ns);
}
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, false);
__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL);
}
/*
* Lock extent buffer for read, optimistically expecting that there are no
* contending blocking writers. If there are, don't wait.
*
* Return 1 if the rwlock has been taken, 0 otherwise
*/
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
{
if (READ_ONCE(eb->blocking_writers))
return 0;
read_lock(&eb->lock);
/* Refetch value after lock */
if (READ_ONCE(eb->blocking_writers)) {
read_unlock(&eb->lock);
return 0;
}
btrfs_assert_tree_read_locks_get(eb);
btrfs_assert_spinning_readers_get(eb);
trace_btrfs_tree_read_lock_atomic(eb);
return 1;
}
/*
* Try-lock for read. Don't block or wait for contending writers.
* Try-lock for read.
*
* Retrun 1 if the rwlock has been taken, 0 otherwise
*/
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
if (READ_ONCE(eb->blocking_writers))
return 0;
if (!read_trylock(&eb->lock))
return 0;
/* Refetch value after lock */
if (READ_ONCE(eb->blocking_writers)) {
read_unlock(&eb->lock);
return 0;
if (down_read_trylock(&eb->lock)) {
eb->lock_owner = current->pid;
trace_btrfs_try_tree_read_lock(eb);
return 1;
}
btrfs_assert_tree_read_locks_get(eb);
btrfs_assert_spinning_readers_get(eb);
trace_btrfs_try_tree_read_lock(eb);
return 1;
return 0;
}
/*
* Try-lock for write. May block until the lock is uncontended, but does not
* wait until it is free.
* Try-lock for write.
*
* Retrun 1 if the rwlock has been taken, 0 otherwise
*/
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
{
if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers))
return 0;
write_lock(&eb->lock);
/* Refetch value after lock */
if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
return 0;
if (down_write_trylock(&eb->lock)) {
eb->lock_owner = current->pid;
trace_btrfs_try_tree_write_lock(eb);
return 1;
}
btrfs_assert_tree_write_locks_get(eb);
btrfs_assert_spinning_writers_get(eb);
eb->lock_owner = current->pid;
trace_btrfs_try_tree_write_lock(eb);
return 1;
return 0;
}
/*
* Release read lock. Must be used only if the lock is in spinning mode. If
* the read lock is nested, must pair with read lock before the write unlock.
*
* The rwlock is not held upon exit.
* Release read lock.
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
trace_btrfs_tree_read_unlock(eb);
/*
* if we're nested, we have the write lock. No new locking
* is needed as long as we are the lock owner.
* The write unlock will do a barrier for us, and the lock_recursed
* field only matters to the lock owner.
*/
if (eb->lock_recursed && current->pid == eb->lock_owner) {
eb->lock_recursed = false;
return;
}
btrfs_assert_tree_read_locked(eb);
btrfs_assert_spinning_readers_put(eb);
btrfs_assert_tree_read_locks_put(eb);
read_unlock(&eb->lock);
eb->lock_owner = 0;
up_read(&eb->lock);
}
/*
* Release read lock, previously set to blocking by a pairing call to
* btrfs_set_lock_blocking_read(). Can be nested in write lock by the same
* thread.
* __btrfs_tree_lock - lock eb for write
* @eb: the eb to lock
* @nest: the nesting to use for the lock
*
* State of rwlock is unchanged, last reader wakes waiting threads.
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
trace_btrfs_tree_read_unlock_blocking(eb);
/*
* if we're nested, we have the write lock. No new locking
* is needed as long as we are the lock owner.
* The write unlock will do a barrier for us, and the lock_recursed
* field only matters to the lock owner.
*/
if (eb->lock_recursed && current->pid == eb->lock_owner) {
eb->lock_recursed = false;
return;
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
/* atomic_dec_and_test implies a barrier */
if (atomic_dec_and_test(&eb->blocking_readers))
cond_wake_up_nomb(&eb->read_lock_wq);
btrfs_assert_tree_read_locks_put(eb);
}
/*
* Lock for write. Wait for all blocking and spinning readers and writers. This
* starts context where reader lock could be nested by the same thread.
*
* The rwlock is held for write upon exit.
* Returns with the eb->lock write locked.
*/
void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
__acquires(&eb->lock)
@ -424,19 +109,7 @@ void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
if (trace_btrfs_tree_lock_enabled())
start_ns = ktime_get_ns();
WARN_ON(eb->lock_owner == current->pid);
again:
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
wait_event(eb->write_lock_wq, READ_ONCE(eb->blocking_writers) == 0);
write_lock(&eb->lock);
/* Refetch value after lock */
if (atomic_read(&eb->blocking_readers) ||
READ_ONCE(eb->blocking_writers)) {
write_unlock(&eb->lock);
goto again;
}
btrfs_assert_spinning_writers_get(eb);
btrfs_assert_tree_write_locks_get(eb);
down_write_nested(&eb->lock, nest);
eb->lock_owner = current->pid;
trace_btrfs_tree_lock(eb, start_ns);
}
@ -447,68 +120,13 @@ void btrfs_tree_lock(struct extent_buffer *eb)
}
/*
* Release the write lock, either blocking or spinning (ie. there's no need
* for an explicit blocking unlock, like btrfs_tree_read_unlock_blocking).
* This also ends the context for nesting, the read lock must have been
* released already.
*
* Tasks blocked and waiting are woken, rwlock is not held upon exit.
* Release the write lock.
*/
void btrfs_tree_unlock(struct extent_buffer *eb)
{
/*
* This is read both locked and unlocked but always by the same thread
* that already owns the lock so we don't need to use READ_ONCE
*/
int blockers = eb->blocking_writers;
BUG_ON(blockers > 1);
btrfs_assert_tree_locked(eb);
trace_btrfs_tree_unlock(eb);
eb->lock_owner = 0;
btrfs_assert_tree_write_locks_put(eb);
if (blockers) {
btrfs_assert_no_spinning_writers(eb);
/* Unlocked write */
WRITE_ONCE(eb->blocking_writers, 0);
/*
* We need to order modifying blocking_writers above with
* actually waking up the sleepers to ensure they see the
* updated value of blocking_writers
*/
cond_wake_up(&eb->write_lock_wq);
} else {
btrfs_assert_spinning_writers_put(eb);
write_unlock(&eb->lock);
}
}
/*
* Set all locked nodes in the path to blocking locks. This should be done
* before scheduling
*/
void btrfs_set_path_blocking(struct btrfs_path *p)
{
int i;
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
if (!p->nodes[i] || !p->locks[i])
continue;
/*
* If we currently have a spinning reader or writer lock this
* will bump the count of blocking holders and drop the
* spinlock.
*/
if (p->locks[i] == BTRFS_READ_LOCK) {
btrfs_set_lock_blocking_read(p->nodes[i]);
p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
} else if (p->locks[i] == BTRFS_WRITE_LOCK) {
btrfs_set_lock_blocking_write(p->nodes[i]);
p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
}
}
up_write(&eb->lock);
}
/*
@ -564,14 +182,13 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
*
* Return: root extent buffer with read lock held
*/
struct extent_buffer *__btrfs_read_lock_root_node(struct btrfs_root *root,
bool recurse)
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
while (1) {
eb = btrfs_root_node(root);
__btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL, recurse);
btrfs_tree_read_lock(eb);
if (eb == root->node)
break;
btrfs_tree_read_unlock(eb);

View File

@ -13,8 +13,6 @@
#define BTRFS_WRITE_LOCK 1
#define BTRFS_READ_LOCK 2
#define BTRFS_WRITE_LOCK_BLOCKING 3
#define BTRFS_READ_LOCK_BLOCKING 4
/*
* We are limited in number of subclasses by MAX_LOCKDEP_SUBCLASSES, which at
@ -89,42 +87,28 @@ void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest);
void btrfs_tree_lock(struct extent_buffer *eb);
void btrfs_tree_unlock(struct extent_buffer *eb);
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest,
bool recurse);
void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest);
void btrfs_tree_read_lock(struct extent_buffer *eb);
void btrfs_tree_read_unlock(struct extent_buffer *eb);
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
void btrfs_set_lock_blocking_read(struct extent_buffer *eb);
void btrfs_set_lock_blocking_write(struct extent_buffer *eb);
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
struct extent_buffer *__btrfs_read_lock_root_node(struct btrfs_root *root,
bool recurse);
static inline struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
{
return __btrfs_read_lock_root_node(root, false);
}
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
#ifdef CONFIG_BTRFS_DEBUG
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) {
BUG_ON(!eb->write_locks);
lockdep_assert_held(&eb->lock);
}
#else
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
#endif
void btrfs_set_path_blocking(struct btrfs_path *p);
void btrfs_unlock_up_safe(struct btrfs_path *path, int level);
static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
{
if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING)
if (rw == BTRFS_WRITE_LOCK)
btrfs_tree_unlock(eb);
else if (rw == BTRFS_READ_LOCK_BLOCKING)
btrfs_tree_read_unlock_blocking(eb);
else if (rw == BTRFS_READ_LOCK)
btrfs_tree_read_unlock(eb);
else

View File

@ -854,51 +854,6 @@ btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset)
return entry;
}
/*
* search the ordered extents for one corresponding to 'offset' and
* try to find a checksum. This is used because we allow pages to
* be reclaimed before their checksum is actually put into the btree
*/
int btrfs_find_ordered_sum(struct btrfs_inode *inode, u64 offset,
u64 disk_bytenr, u8 *sum, int len)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_sum *ordered_sum;
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
unsigned long num_sectors;
unsigned long i;
u32 sectorsize = btrfs_inode_sectorsize(inode);
const u8 blocksize_bits = inode->vfs_inode.i_sb->s_blocksize_bits;
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int index = 0;
ordered = btrfs_lookup_ordered_extent(inode, offset);
if (!ordered)
return 0;
spin_lock_irq(&tree->lock);
list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
if (disk_bytenr >= ordered_sum->bytenr &&
disk_bytenr < ordered_sum->bytenr + ordered_sum->len) {
i = (disk_bytenr - ordered_sum->bytenr) >> blocksize_bits;
num_sectors = ordered_sum->len >> blocksize_bits;
num_sectors = min_t(int, len - index, num_sectors - i);
memcpy(sum + index, ordered_sum->sums + i * csum_size,
num_sectors * csum_size);
index += (int)num_sectors * csum_size;
if (index == len)
goto out;
disk_bytenr += num_sectors * sectorsize;
}
}
out:
spin_unlock_irq(&tree->lock);
btrfs_put_ordered_extent(ordered);
return index;
}
/*
* btrfs_flush_ordered_range - Lock the passed range and ensures all pending
* ordered extents in it are run to completion.

View File

@ -137,9 +137,8 @@ static inline int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info,
unsigned long bytes)
{
int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
int csum_size = btrfs_super_csum_size(fs_info->super_copy);
return sizeof(struct btrfs_ordered_sum) + num_sectors * csum_size;
return sizeof(struct btrfs_ordered_sum) + num_sectors * fs_info->csum_size;
}
static inline void
@ -184,8 +183,6 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
u64 len);
void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
struct list_head *list);
int btrfs_find_ordered_sum(struct btrfs_inode *inode, u64 offset,
u64 disk_bytenr, u8 *sum, int len);
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,

View File

@ -177,8 +177,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
__le64 subvol_id;
read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id));
pr_info("\t\tsubvol_id %llu\n",
(unsigned long long)le64_to_cpu(subvol_id));
pr_info("\t\tsubvol_id %llu\n", le64_to_cpu(subvol_id));
item_size -= sizeof(u64);
offset += sizeof(u64);
}
@ -191,15 +190,8 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
static void print_eb_refs_lock(struct extent_buffer *eb)
{
#ifdef CONFIG_BTRFS_DEBUG
btrfs_info(eb->fs_info,
"refs %u lock (w:%d r:%d bw:%d br:%d sw:%d sr:%d) lock_owner %u current %u",
atomic_read(&eb->refs), eb->write_locks,
atomic_read(&eb->read_locks),
eb->blocking_writers,
atomic_read(&eb->blocking_readers),
eb->spinning_writers,
atomic_read(&eb->spinning_readers),
eb->lock_owner, current->pid);
btrfs_info(eb->fs_info, "refs %u lock_owner %u current %u",
atomic_read(&eb->refs), eb->lock_owner, current->pid);
#endif
}
@ -398,6 +390,7 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
btrfs_node_key_to_cpu(c, &first_key, i);
next = read_tree_block(fs_info, btrfs_node_blockptr(c, i),
btrfs_header_owner(c),
btrfs_node_ptr_generation(c, i),
level - 1, &first_key);
if (IS_ERR(next)) {

View File

@ -894,8 +894,6 @@ static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
key.objectid = 0;
key.offset = 0;
key.type = 0;
@ -1944,34 +1942,22 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
struct btrfs_key dst_key;
if (src_path->nodes[cur_level] == NULL) {
struct btrfs_key first_key;
struct extent_buffer *eb;
int parent_slot;
u64 child_gen;
u64 child_bytenr;
eb = src_path->nodes[cur_level + 1];
parent_slot = src_path->slots[cur_level + 1];
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
eb = read_tree_block(fs_info, child_bytenr, child_gen,
cur_level, &first_key);
eb = btrfs_read_node_slot(eb, parent_slot);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
} else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
}
src_path->nodes[cur_level] = eb;
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_read(eb);
src_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
src_path->locks[cur_level] = BTRFS_READ_LOCK;
}
src_path->slots[cur_level] = dst_path->slots[cur_level];
@ -2066,10 +2052,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
/* Read the tree block if needed */
if (dst_path->nodes[cur_level] == NULL) {
struct btrfs_key first_key;
int parent_slot;
u64 child_gen;
u64 child_bytenr;
/*
* dst_path->nodes[root_level] must be initialized before
@ -2088,31 +2072,23 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
*/
eb = dst_path->nodes[cur_level + 1];
parent_slot = dst_path->slots[cur_level + 1];
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
/* This node is old, no need to trace */
if (child_gen < last_snapshot)
goto out;
eb = read_tree_block(fs_info, child_bytenr, child_gen,
cur_level, &first_key);
eb = btrfs_read_node_slot(eb, parent_slot);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
} else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
}
dst_path->nodes[cur_level] = eb;
dst_path->slots[cur_level] = 0;
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_read(eb);
dst_path->locks[cur_level] = BTRFS_READ_LOCK_BLOCKING;
dst_path->locks[cur_level] = BTRFS_READ_LOCK;
need_cleanup = true;
}
@ -2256,38 +2232,28 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
level = root_level;
while (level >= 0) {
if (path->nodes[level] == NULL) {
struct btrfs_key first_key;
int parent_slot;
u64 child_gen;
u64 child_bytenr;
/*
* We need to get child blockptr/gen from parent before
* we can read it.
* We need to get child blockptr from parent before we
* can read it.
*/
eb = path->nodes[level + 1];
parent_slot = path->slots[level + 1];
child_bytenr = btrfs_node_blockptr(eb, parent_slot);
child_gen = btrfs_node_ptr_generation(eb, parent_slot);
btrfs_node_key_to_cpu(eb, &first_key, parent_slot);
eb = read_tree_block(fs_info, child_bytenr, child_gen,
level, &first_key);
eb = btrfs_read_node_slot(eb, parent_slot);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
goto out;
} else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
}
path->nodes[level] = eb;
path->slots[level] = 0;
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_read(eb);
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
path->locks[level] = BTRFS_READ_LOCK;
ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
fs_info->nodesize,
@ -4242,7 +4208,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
spin_unlock(&blocks->lock);
/* Read out reloc subtree root */
reloc_eb = read_tree_block(fs_info, block->reloc_bytenr,
reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, 0,
block->reloc_generation, block->level,
&block->first_key);
if (IS_ERR(reloc_eb)) {

View File

@ -1097,7 +1097,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
/* see if we can add this page onto our existing bio */
if (last) {
u64 last_end = (u64)last->bi_iter.bi_sector << 9;
u64 last_end = last->bi_iter.bi_sector << 9;
last_end += last->bi_iter.bi_size;
/*
@ -1163,7 +1163,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
struct bvec_iter iter;
int i = 0;
start = (u64)bio->bi_iter.bi_sector << 9;
start = bio->bi_iter.bi_sector << 9;
stripe_offset = start - rbio->bbio->raid_map[0];
page_index = stripe_offset >> PAGE_SHIFT;
@ -1374,7 +1374,7 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
struct bio *bio)
{
u64 logical = (u64)bio->bi_iter.bi_sector << 9;
u64 logical = bio->bi_iter.bi_sector << 9;
int i;
for (i = 0; i < rbio->nr_data; i++) {
@ -2150,7 +2150,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
if (rbio->faila == -1) {
btrfs_warn(fs_info,
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
__func__, (u64)bio->bi_iter.bi_sector << 9,
__func__, bio->bi_iter.bi_sector << 9,
(u64)bio->bi_iter.bi_size, bbio->map_type);
if (generic_io)
btrfs_put_bbio(bbio);

View File

@ -52,6 +52,7 @@ struct reada_extctl {
struct reada_extent {
u64 logical;
u64 owner_root;
struct btrfs_key top;
struct list_head extctl;
int refcnt;
@ -59,6 +60,7 @@ struct reada_extent {
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
int nzones;
int scheduled;
int level;
};
struct reada_zone {
@ -87,7 +89,8 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
static void __reada_start_machine(struct btrfs_fs_info *fs_info);
static int reada_add_block(struct reada_control *rc, u64 logical,
struct btrfs_key *top, u64 generation);
struct btrfs_key *top, u64 owner_root,
u64 generation, int level);
/* recurses */
/* in case of err, eb might be NULL */
@ -165,7 +168,9 @@ static void __readahead_hook(struct btrfs_fs_info *fs_info,
if (rec->generation == generation &&
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
reada_add_block(rc, bytenr, &next_key, n_gen);
reada_add_block(rc, bytenr, &next_key,
btrfs_header_owner(eb), n_gen,
btrfs_header_level(eb) - 1);
}
}
@ -298,7 +303,8 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
u64 logical,
struct btrfs_key *top)
struct btrfs_key *top,
u64 owner_root, int level)
{
int ret;
struct reada_extent *re = NULL;
@ -331,6 +337,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&re->extctl);
spin_lock_init(&re->lock);
re->refcnt = 1;
re->owner_root = owner_root;
re->level = level;
/*
* map block
@ -531,6 +539,8 @@ static void reada_zone_release(struct kref *kref)
{
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
lockdep_assert_held(&zone->device->fs_info->reada_lock);
radix_tree_delete(&zone->device->reada_zones,
zone->end >> PAGE_SHIFT);
@ -546,14 +556,15 @@ static void reada_control_release(struct kref *kref)
}
static int reada_add_block(struct reada_control *rc, u64 logical,
struct btrfs_key *top, u64 generation)
struct btrfs_key *top, u64 owner_root,
u64 generation, int level)
{
struct btrfs_fs_info *fs_info = rc->fs_info;
struct reada_extent *re;
struct reada_extctl *rec;
/* takes one ref */
re = reada_find_extent(fs_info, logical, top);
re = reada_find_extent(fs_info, logical, top, owner_root, level);
if (!re)
return -1;
@ -645,12 +656,13 @@ static int reada_pick_zone(struct btrfs_device *dev)
}
static int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
int mirror_num, struct extent_buffer **eb)
u64 owner_root, int level, int mirror_num,
struct extent_buffer **eb)
{
struct extent_buffer *buf = NULL;
int ret;
buf = btrfs_find_create_tree_block(fs_info, bytenr);
buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level);
if (IS_ERR(buf))
return 0;
@ -738,7 +750,8 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
logical = re->logical;
atomic_inc(&dev->reada_in_flight);
ret = reada_tree_block_flagged(fs_info, logical, mirror_num, &eb);
ret = reada_tree_block_flagged(fs_info, logical, re->owner_root,
re->level, mirror_num, &eb);
if (ret)
__readahead_hook(fs_info, re, NULL, ret);
else if (eb)
@ -945,6 +958,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
u64 start;
u64 generation;
int ret;
int level;
struct extent_buffer *node;
static struct btrfs_key max_key = {
.objectid = (u64)-1,
@ -967,9 +981,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
node = btrfs_root_node(root);
start = node->start;
generation = btrfs_header_generation(node);
level = btrfs_header_level(node);
free_extent_buffer(node);
ret = reada_add_block(rc, start, &max_key, generation);
ret = reada_add_block(rc, start, &max_key, root->root_key.objectid,
generation, level);
if (ret) {
kfree(rc);
return ERR_PTR(ret);

View File

@ -551,34 +551,19 @@ static int process_leaf(struct btrfs_root *root,
static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
int level, u64 *bytenr, u64 *num_bytes)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *eb;
u64 block_bytenr, gen;
int ret = 0;
while (level >= 0) {
if (level) {
struct btrfs_key first_key;
block_bytenr = btrfs_node_blockptr(path->nodes[level],
path->slots[level]);
gen = btrfs_node_ptr_generation(path->nodes[level],
path->slots[level]);
btrfs_node_key_to_cpu(path->nodes[level], &first_key,
path->slots[level]);
eb = read_tree_block(fs_info, block_bytenr, gen,
level - 1, &first_key);
eb = btrfs_read_node_slot(path->nodes[level],
path->slots[level]);
if (IS_ERR(eb))
return PTR_ERR(eb);
if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_read(eb);
path->nodes[level-1] = eb;
path->slots[level-1] = 0;
path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING;
path->locks[level-1] = BTRFS_READ_LOCK;
} else {
ret = process_leaf(root, path, bytenr, num_bytes);
if (ret)
@ -799,8 +784,7 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
if (!be) {
btrfs_err(fs_info,
"trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!",
action, (unsigned long long)bytenr,
(unsigned long long)num_bytes);
action, bytenr, num_bytes);
dump_ref_action(fs_info, ra);
kfree(ref);
kfree(ra);
@ -1001,11 +985,10 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
return -ENOMEM;
eb = btrfs_read_lock_root_node(fs_info->extent_root);
btrfs_set_lock_blocking_read(eb);
level = btrfs_header_level(eb);
path->nodes[level] = eb;
path->slots[level] = 0;
path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
path->locks[level] = BTRFS_READ_LOCK;
while (1) {
/*

View File

@ -31,10 +31,10 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
endoff = destoff + olen;
if (endoff > inode->i_size) {
i_size_write(inode, endoff);
btrfs_inode_safe_disk_i_size_write(inode, 0);
btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
}
ret = btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
@ -163,6 +163,7 @@ static int clone_copy_inline_extent(struct inode *dst,
const u64 aligned_end = ALIGN(new_key->offset + datal,
fs_info->sectorsize);
struct btrfs_trans_handle *trans = NULL;
struct btrfs_drop_extents_args drop_args = { 0 };
int ret;
struct btrfs_key key;
@ -252,7 +253,11 @@ static int clone_copy_inline_extent(struct inode *dst,
trans = NULL;
goto out;
}
ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
drop_args.path = path;
drop_args.start = drop_start;
drop_args.end = aligned_end;
drop_args.drop_cache = true;
ret = btrfs_drop_extents(trans, root, BTRFS_I(dst), &drop_args);
if (ret)
goto out;
ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
@ -263,7 +268,7 @@ static int clone_copy_inline_extent(struct inode *dst,
btrfs_item_ptr_offset(path->nodes[0],
path->slots[0]),
size);
inode_add_bytes(dst, datal);
btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
out:
@ -347,7 +352,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
u64 drop_start;
/* Note the key will change type as we walk through the tree */
path->leave_spinning = 1;
ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
0, 0);
if (ret < 0)
@ -417,7 +421,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
size);
btrfs_release_path(path);
path->leave_spinning = 0;
memcpy(&new_key, &key, sizeof(new_key));
new_key.objectid = btrfs_ino(BTRFS_I(inode));
@ -533,7 +536,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
* mixing buffered and direct IO writes against this file.
*/
btrfs_release_path(path);
path->leave_spinning = 0;
ret = btrfs_replace_file_extents(inode, path, last_dest_end,
destoff + len - 1, NULL, &trans);
@ -652,7 +654,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
if (destoff > inode->i_size) {
const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
ret = btrfs_cont_expand(inode, inode->i_size, destoff);
ret = btrfs_cont_expand(BTRFS_I(inode), inode->i_size, destoff);
if (ret)
return ret;
/*

View File

@ -18,7 +18,6 @@
#include "btrfs_inode.h"
#include "async-thread.h"
#include "free-space-cache.h"
#include "inode-map.h"
#include "qgroup.h"
#include "print-tree.h"
#include "delalloc-space.h"
@ -783,7 +782,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
btrfs_set_root_refs(root_item, 0);
memset(&root_item->drop_progress, 0,
sizeof(struct btrfs_disk_key));
root_item->drop_level = 0;
btrfs_set_root_drop_level(root_item, 0);
}
btrfs_tree_unlock(eb);
@ -1196,7 +1195,6 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
eb = btrfs_lock_root_node(dest);
btrfs_set_lock_blocking_write(eb);
level = btrfs_header_level(eb);
if (level < lowest_level) {
@ -1210,7 +1208,6 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
BTRFS_NESTING_COW);
BUG_ON(ret);
}
btrfs_set_lock_blocking_write(eb);
if (next_key) {
next_key->objectid = (u64)-1;
@ -1220,8 +1217,6 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
parent = eb;
while (1) {
struct btrfs_key first_key;
level = btrfs_header_level(parent);
BUG_ON(level < lowest_level);
@ -1237,7 +1232,6 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
old_bytenr = btrfs_node_blockptr(parent, slot);
blocksize = fs_info->nodesize;
old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
btrfs_node_key_to_cpu(parent, &first_key, slot);
if (level <= max_level) {
eb = path->nodes[level];
@ -1262,15 +1256,10 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
break;
}
eb = read_tree_block(fs_info, old_bytenr, old_ptr_gen,
level - 1, &first_key);
eb = btrfs_read_node_slot(parent, slot);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
break;
} else if (!extent_buffer_uptodate(eb)) {
ret = -EIO;
free_extent_buffer(eb);
break;
}
btrfs_tree_lock(eb);
if (cow) {
@ -1279,7 +1268,6 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
BTRFS_NESTING_COW);
BUG_ON(ret);
}
btrfs_set_lock_blocking_write(eb);
btrfs_tree_unlock(parent);
free_extent_buffer(parent);
@ -1418,10 +1406,8 @@ static noinline_for_stack
int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
int *level)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *eb = NULL;
int i;
u64 bytenr;
u64 ptr_gen = 0;
u64 last_snapshot;
u32 nritems;
@ -1429,8 +1415,6 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
last_snapshot = btrfs_root_last_snapshot(&root->root_item);
for (i = *level; i > 0; i--) {
struct btrfs_key first_key;
eb = path->nodes[i];
nritems = btrfs_header_nritems(eb);
while (path->slots[i] < nritems) {
@ -1450,16 +1434,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
return 0;
}
bytenr = btrfs_node_blockptr(eb, path->slots[i]);
btrfs_node_key_to_cpu(eb, &first_key, path->slots[i]);
eb = read_tree_block(fs_info, bytenr, ptr_gen, i - 1,
&first_key);
if (IS_ERR(eb)) {
eb = btrfs_read_node_slot(eb, path->slots[i]);
if (IS_ERR(eb))
return PTR_ERR(eb);
} else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
return -EIO;
}
BUG_ON(btrfs_header_level(eb) != i - 1);
path->nodes[i - 1] = eb;
path->slots[i - 1] = 0;
@ -1575,7 +1552,7 @@ static void insert_dirty_subvol(struct btrfs_trans_handle *trans,
reloc_root_item = &reloc_root->root_item;
memset(&reloc_root_item->drop_progress, 0,
sizeof(reloc_root_item->drop_progress));
reloc_root_item->drop_level = 0;
btrfs_set_root_drop_level(reloc_root_item, 0);
btrfs_set_root_refs(reloc_root_item, 0);
btrfs_update_reloc_root(trans, root);
@ -1652,8 +1629,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
int level;
int max_level;
int replaced = 0;
int ret;
int err = 0;
int ret = 0;
u32 min_reserved;
path = btrfs_alloc_path();
@ -1672,7 +1648,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
} else {
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
level = root_item->drop_level;
level = btrfs_root_drop_level(root_item);
BUG_ON(level == 0);
path->lowest_level = level;
ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
@ -1704,13 +1680,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
while (1) {
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
BTRFS_RESERVE_FLUSH_LIMIT);
if (ret) {
err = ret;
if (ret)
goto out;
}
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
ret = PTR_ERR(trans);
trans = NULL;
goto out;
}
@ -1732,10 +1706,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
max_level = level;
ret = walk_down_reloc_tree(reloc_root, path, &level);
if (ret < 0) {
err = ret;
if (ret < 0)
goto out;
}
if (ret > 0)
break;
@ -1746,11 +1718,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
ret = replace_path(trans, rc, root, reloc_root, path,
&next_key, level, max_level);
}
if (ret < 0) {
err = ret;
if (ret < 0)
goto out;
}
if (ret > 0) {
level = ret;
btrfs_node_key_to_cpu(path->nodes[level], &key,
@ -1769,7 +1738,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
*/
btrfs_node_key(path->nodes[level], &root_item->drop_progress,
path->slots[level]);
root_item->drop_level = level;
btrfs_set_root_drop_level(root_item, level);
btrfs_end_transaction_throttle(trans);
trans = NULL;
@ -1789,12 +1758,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
BTRFS_NESTING_COW);
btrfs_tree_unlock(leaf);
free_extent_buffer(leaf);
if (ret < 0)
err = ret;
out:
btrfs_free_path(path);
if (err == 0)
if (ret == 0)
insert_dirty_subvol(trans, rc, root);
if (trans)
@ -1805,7 +1772,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
if (replaced && rc->stage == UPDATE_DATA_PTRS)
invalidate_extent_cache(root, &key, &next_key);
return err;
return ret;
}
static noinline_for_stack
@ -2205,7 +2172,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
struct btrfs_key *key,
struct btrfs_path *path, int lowest)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
struct btrfs_backref_node *upper;
struct btrfs_backref_edge *edge;
struct btrfs_backref_edge *edges[BTRFS_MAX_LEVEL - 1];
@ -2213,17 +2179,14 @@ static int do_relocation(struct btrfs_trans_handle *trans,
struct extent_buffer *eb;
u32 blocksize;
u64 bytenr;
u64 generation;
int slot;
int ret;
int err = 0;
int ret = 0;
BUG_ON(lowest && node->eb);
path->lowest_level = node->level + 1;
rc->backref_cache.path[node->level] = node;
list_for_each_entry(edge, &node->upper, list[LOWER]) {
struct btrfs_key first_key;
struct btrfs_ref ref = { 0 };
cond_resched();
@ -2235,10 +2198,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
if (upper->eb && !upper->locked) {
if (!lowest) {
ret = btrfs_bin_search(upper->eb, key, &slot);
if (ret < 0) {
err = ret;
if (ret < 0)
goto next;
}
BUG_ON(ret);
bytenr = btrfs_node_blockptr(upper->eb, slot);
if (node->eb->start == bytenr)
@ -2250,10 +2211,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
if (!upper->eb) {
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
if (ret) {
if (ret < 0)
err = ret;
else
err = -ENOENT;
if (ret > 0)
ret = -ENOENT;
btrfs_release_path(path);
break;
@ -2273,10 +2232,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
} else {
ret = btrfs_bin_search(upper->eb, key, &slot);
if (ret < 0) {
err = ret;
if (ret < 0)
goto next;
}
BUG_ON(ret);
}
@ -2287,7 +2244,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
"lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
bytenr, node->bytenr, slot,
upper->eb->start);
err = -EIO;
ret = -EIO;
goto next;
}
} else {
@ -2296,30 +2253,20 @@ static int do_relocation(struct btrfs_trans_handle *trans,
}
blocksize = root->fs_info->nodesize;
generation = btrfs_node_ptr_generation(upper->eb, slot);
btrfs_node_key_to_cpu(upper->eb, &first_key, slot);
eb = read_tree_block(fs_info, bytenr, generation,
upper->level - 1, &first_key);
eb = btrfs_read_node_slot(upper->eb, slot);
if (IS_ERR(eb)) {
err = PTR_ERR(eb);
goto next;
} else if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb);
err = -EIO;
ret = PTR_ERR(eb);
goto next;
}
btrfs_tree_lock(eb);
btrfs_set_lock_blocking_write(eb);
if (!node->eb) {
ret = btrfs_cow_block(trans, root, eb, upper->eb,
slot, &eb, BTRFS_NESTING_COW);
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
if (ret < 0) {
err = ret;
if (ret < 0)
goto next;
}
BUG_ON(node->eb != eb);
} else {
btrfs_set_node_blockptr(upper->eb, slot,
@ -2345,19 +2292,19 @@ static int do_relocation(struct btrfs_trans_handle *trans,
btrfs_backref_drop_node_buffer(upper);
else
btrfs_backref_unlock_node_buffer(upper);
if (err)
if (ret)
break;
}
if (!err && node->pending) {
if (!ret && node->pending) {
btrfs_backref_drop_node_buffer(node);
list_move_tail(&node->list, &rc->backref_cache.changed);
node->pending = 0;
}
path->lowest_level = 0;
BUG_ON(err == -ENOSPC);
return err;
BUG_ON(ret == -ENOSPC);
return ret;
}
static int link_to_upper(struct btrfs_trans_handle *trans,
@ -2446,7 +2393,7 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
{
struct extent_buffer *eb;
eb = read_tree_block(fs_info, block->bytenr, block->key.offset,
eb = read_tree_block(fs_info, block->bytenr, 0, block->key.offset,
block->level, NULL);
if (IS_ERR(eb)) {
return PTR_ERR(eb);
@ -2546,7 +2493,8 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
/* Kick in readahead for tree blocks with missing keys */
rbtree_postorder_for_each_entry_safe(block, next, blocks, rb_node) {
if (!block->key_ready)
readahead_tree_block(fs_info, block->bytenr);
btrfs_readahead_tree_block(fs_info, block->bytenr, 0, 0,
block->level);
}
/* Get first keys */
@ -3071,7 +3019,7 @@ int add_data_references(struct reloc_control *rc,
while ((ref_node = ulist_next(leaves, &leaf_uiter))) {
struct extent_buffer *eb;
eb = read_tree_block(fs_info, ref_node->val, 0, 0, NULL);
eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
break;
@ -3694,7 +3642,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
memset(&root->root_item.drop_progress, 0,
sizeof(root->root_item.drop_progress));
root->root_item.drop_level = 0;
btrfs_set_root_drop_level(&root->root_item, 0);
btrfs_set_root_refs(&root->root_item, 0);
ret = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key, &root->root_item);

View File

@ -20,6 +20,7 @@
#include "rcu-string.h"
#include "raid56.h"
#include "block-group.h"
#include "zoned.h"
/*
* This is only the first step towards a full-features scrub. It reads all
@ -71,11 +72,9 @@ struct scrub_page {
u64 physical;
u64 physical_for_dev_replace;
atomic_t refs;
struct {
unsigned int mirror_num:8;
unsigned int have_csum:1;
unsigned int io_error:1;
};
u8 mirror_num;
int have_csum:1;
int io_error:1;
u8 csum[BTRFS_CSUM_SIZE];
struct scrub_recover *recover;
@ -131,7 +130,7 @@ struct scrub_parity {
int nsectors;
u64 stripe_len;
u32 stripe_len;
refcount_t refs;
@ -161,7 +160,6 @@ struct scrub_ctx {
atomic_t workers_pending;
spinlock_t list_lock;
wait_queue_head_t list_wait;
u16 csum_size;
struct list_head csum_list;
atomic_t cancel_req;
int readonly;
@ -235,15 +233,15 @@ static void scrub_parity_get(struct scrub_parity *sparity);
static void scrub_parity_put(struct scrub_parity *sparity);
static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
struct scrub_page *spage);
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u32 len,
u64 physical, struct btrfs_device *dev, u64 flags,
u64 gen, int mirror_num, u8 *csum, int force,
u64 gen, int mirror_num, u8 *csum,
u64 physical_for_dev_replace);
static void scrub_bio_end_io(struct bio *bio);
static void scrub_bio_end_io_worker(struct btrfs_work *work);
static void scrub_block_complete(struct scrub_block *sblock);
static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
u64 extent_logical, u64 extent_len,
u64 extent_logical, u32 extent_len,
u64 *extent_physical,
struct btrfs_device **extent_dev,
int *extent_mirror_num);
@ -256,10 +254,10 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
static void scrub_put_ctx(struct scrub_ctx *sctx);
static inline int scrub_is_page_on_raid56(struct scrub_page *page)
static inline int scrub_is_page_on_raid56(struct scrub_page *spage)
{
return page->recover &&
(page->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
return spage->recover &&
(spage->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
}
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
@ -610,7 +608,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
atomic_set(&sctx->bios_in_flight, 0);
atomic_set(&sctx->workers_pending, 0);
atomic_set(&sctx->cancel_req, 0);
sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy);
spin_lock_init(&sctx->list_lock);
spin_lock_init(&sctx->stat_lock);
@ -1092,11 +1089,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
success = 1;
for (page_num = 0; page_num < sblock_bad->page_count;
page_num++) {
struct scrub_page *page_bad = sblock_bad->pagev[page_num];
struct scrub_page *spage_bad = sblock_bad->pagev[page_num];
struct scrub_block *sblock_other = NULL;
/* skip no-io-error page in scrub */
if (!page_bad->io_error && !sctx->is_dev_replace)
if (!spage_bad->io_error && !sctx->is_dev_replace)
continue;
if (scrub_is_page_on_raid56(sblock_bad->pagev[0])) {
@ -1108,7 +1105,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
* sblock_for_recheck array to target device.
*/
sblock_other = NULL;
} else if (page_bad->io_error) {
} else if (spage_bad->io_error) {
/* try to find no-io-error page in mirrors */
for (mirror_index = 0;
mirror_index < BTRFS_MAX_MIRRORS &&
@ -1147,7 +1144,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
sblock_other,
page_num, 0);
if (0 == ret)
page_bad->io_error = 0;
spage_bad->io_error = 0;
else
success = 0;
}
@ -1325,13 +1322,13 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
for (mirror_index = 0; mirror_index < nmirrors;
mirror_index++) {
struct scrub_block *sblock;
struct scrub_page *page;
struct scrub_page *spage;
sblock = sblocks_for_recheck + mirror_index;
sblock->sctx = sctx;
page = kzalloc(sizeof(*page), GFP_NOFS);
if (!page) {
spage = kzalloc(sizeof(*spage), GFP_NOFS);
if (!spage) {
leave_nomem:
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@ -1339,17 +1336,17 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
scrub_put_recover(fs_info, recover);
return -ENOMEM;
}
scrub_page_get(page);
sblock->pagev[page_index] = page;
page->sblock = sblock;
page->flags = flags;
page->generation = generation;
page->logical = logical;
page->have_csum = have_csum;
scrub_page_get(spage);
sblock->pagev[page_index] = spage;
spage->sblock = sblock;
spage->flags = flags;
spage->generation = generation;
spage->logical = logical;
spage->have_csum = have_csum;
if (have_csum)
memcpy(page->csum,
memcpy(spage->csum,
original_sblock->pagev[0]->csum,
sctx->csum_size);
sctx->fs_info->csum_size);
scrub_stripe_index_and_offset(logical,
bbio->map_type,
@ -1360,23 +1357,23 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
mirror_index,
&stripe_index,
&stripe_offset);
page->physical = bbio->stripes[stripe_index].physical +
spage->physical = bbio->stripes[stripe_index].physical +
stripe_offset;
page->dev = bbio->stripes[stripe_index].dev;
spage->dev = bbio->stripes[stripe_index].dev;
BUG_ON(page_index >= original_sblock->page_count);
page->physical_for_dev_replace =
spage->physical_for_dev_replace =
original_sblock->pagev[page_index]->
physical_for_dev_replace;
/* for missing devices, dev->bdev is NULL */
page->mirror_num = mirror_index + 1;
spage->mirror_num = mirror_index + 1;
sblock->page_count++;
page->page = alloc_page(GFP_NOFS);
if (!page->page)
spage->page = alloc_page(GFP_NOFS);
if (!spage->page)
goto leave_nomem;
scrub_get_recover(recover);
page->recover = recover;
spage->recover = recover;
}
scrub_put_recover(fs_info, recover);
length -= sublen;
@ -1394,19 +1391,19 @@ static void scrub_bio_wait_endio(struct bio *bio)
static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
struct bio *bio,
struct scrub_page *page)
struct scrub_page *spage)
{
DECLARE_COMPLETION_ONSTACK(done);
int ret;
int mirror_num;
bio->bi_iter.bi_sector = page->logical >> 9;
bio->bi_iter.bi_sector = spage->logical >> 9;
bio->bi_private = &done;
bio->bi_end_io = scrub_bio_wait_endio;
mirror_num = page->sblock->pagev[0]->mirror_num;
ret = raid56_parity_recover(fs_info, bio, page->recover->bbio,
page->recover->map_length,
mirror_num = spage->sblock->pagev[0]->mirror_num;
ret = raid56_parity_recover(fs_info, bio, spage->recover->bbio,
spage->recover->map_length,
mirror_num, 0);
if (ret)
return ret;
@ -1431,10 +1428,10 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
bio_set_dev(bio, first_page->dev->bdev);
for (page_num = 0; page_num < sblock->page_count; page_num++) {
struct scrub_page *page = sblock->pagev[page_num];
struct scrub_page *spage = sblock->pagev[page_num];
WARN_ON(!page->page);
bio_add_page(bio, page->page, PAGE_SIZE, 0);
WARN_ON(!spage->page);
bio_add_page(bio, spage->page, PAGE_SIZE, 0);
}
if (scrub_submit_raid56_bio_wait(fs_info, bio, first_page)) {
@ -1475,24 +1472,24 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
for (page_num = 0; page_num < sblock->page_count; page_num++) {
struct bio *bio;
struct scrub_page *page = sblock->pagev[page_num];
struct scrub_page *spage = sblock->pagev[page_num];
if (page->dev->bdev == NULL) {
page->io_error = 1;
if (spage->dev->bdev == NULL) {
spage->io_error = 1;
sblock->no_io_error_seen = 0;
continue;
}
WARN_ON(!page->page);
WARN_ON(!spage->page);
bio = btrfs_io_bio_alloc(1);
bio_set_dev(bio, page->dev->bdev);
bio_set_dev(bio, spage->dev->bdev);
bio_add_page(bio, page->page, PAGE_SIZE, 0);
bio->bi_iter.bi_sector = page->physical >> 9;
bio_add_page(bio, spage->page, PAGE_SIZE, 0);
bio->bi_iter.bi_sector = spage->physical >> 9;
bio->bi_opf = REQ_OP_READ;
if (btrfsic_submit_bio_wait(bio)) {
page->io_error = 1;
spage->io_error = 1;
sblock->no_io_error_seen = 0;
}
@ -1548,36 +1545,36 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
struct scrub_block *sblock_good,
int page_num, int force_write)
{
struct scrub_page *page_bad = sblock_bad->pagev[page_num];
struct scrub_page *page_good = sblock_good->pagev[page_num];
struct scrub_page *spage_bad = sblock_bad->pagev[page_num];
struct scrub_page *spage_good = sblock_good->pagev[page_num];
struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
BUG_ON(page_bad->page == NULL);
BUG_ON(page_good->page == NULL);
BUG_ON(spage_bad->page == NULL);
BUG_ON(spage_good->page == NULL);
if (force_write || sblock_bad->header_error ||
sblock_bad->checksum_error || page_bad->io_error) {
sblock_bad->checksum_error || spage_bad->io_error) {
struct bio *bio;
int ret;
if (!page_bad->dev->bdev) {
if (!spage_bad->dev->bdev) {
btrfs_warn_rl(fs_info,
"scrub_repair_page_from_good_copy(bdev == NULL) is unexpected");
return -EIO;
}
bio = btrfs_io_bio_alloc(1);
bio_set_dev(bio, page_bad->dev->bdev);
bio->bi_iter.bi_sector = page_bad->physical >> 9;
bio_set_dev(bio, spage_bad->dev->bdev);
bio->bi_iter.bi_sector = spage_bad->physical >> 9;
bio->bi_opf = REQ_OP_WRITE;
ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
ret = bio_add_page(bio, spage_good->page, PAGE_SIZE, 0);
if (PAGE_SIZE != ret) {
bio_put(bio);
return -EIO;
}
if (btrfsic_submit_bio_wait(bio)) {
btrfs_dev_stat_inc_and_print(page_bad->dev,
btrfs_dev_stat_inc_and_print(spage_bad->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
atomic64_inc(&fs_info->dev_replace.num_write_errors);
bio_put(bio);
@ -1798,11 +1795,15 @@ static int scrub_checksum_data(struct scrub_block *sblock)
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
if (memcmp(csum, spage->csum, sctx->csum_size))
/*
* In scrub_pages() and scrub_pages_for_parity() we ensure each spage
* only contains one sector of data.
*/
crypto_shash_digest(shash, kaddr, fs_info->sectorsize, csum);
if (memcmp(csum, spage->csum, fs_info->csum_size))
sblock->checksum_error = 1;
return sblock->checksum_error;
}
@ -1814,16 +1815,26 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
u8 calculated_csum[BTRFS_CSUM_SIZE];
u8 on_disk_csum[BTRFS_CSUM_SIZE];
const int num_pages = sctx->fs_info->nodesize >> PAGE_SHIFT;
/*
* This is done in sectorsize steps even for metadata as there's a
* constraint for nodesize to be aligned to sectorsize. This will need
* to change so we don't misuse data and metadata units like that.
*/
const u32 sectorsize = sctx->fs_info->sectorsize;
const int num_sectors = fs_info->nodesize >> fs_info->sectorsize_bits;
int i;
struct scrub_page *spage;
char *kaddr;
BUG_ON(sblock->page_count < 1);
/* Each member in pagev is just one block, not a full page */
ASSERT(sblock->page_count == num_sectors);
spage = sblock->pagev[0];
kaddr = page_address(spage->page);
h = (struct btrfs_header *)kaddr;
memcpy(on_disk_csum, h->csum, sctx->csum_size);
memcpy(on_disk_csum, h->csum, sctx->fs_info->csum_size);
/*
* we don't use the getter functions here, as we
@ -1848,15 +1859,15 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
shash->tfm = fs_info->csum_shash;
crypto_shash_init(shash);
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
PAGE_SIZE - BTRFS_CSUM_SIZE);
sectorsize - BTRFS_CSUM_SIZE);
for (i = 1; i < num_pages; i++) {
for (i = 1; i < num_sectors; i++) {
kaddr = page_address(sblock->pagev[i]->page);
crypto_shash_update(shash, kaddr, PAGE_SIZE);
crypto_shash_update(shash, kaddr, sectorsize);
}
crypto_shash_final(shash, calculated_csum);
if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
if (memcmp(calculated_csum, on_disk_csum, sctx->fs_info->csum_size))
sblock->checksum_error = 1;
return sblock->header_error || sblock->checksum_error;
@ -1893,7 +1904,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
crypto_shash_digest(shash, kaddr + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, calculated_csum);
if (memcmp(calculated_csum, s->csum, sctx->csum_size))
if (memcmp(calculated_csum, s->csum, sctx->fs_info->csum_size))
++fail_cor;
if (fail_cor + fail_gen) {
@ -2150,12 +2161,13 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
spin_unlock(&sctx->stat_lock);
}
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u32 len,
u64 physical, struct btrfs_device *dev, u64 flags,
u64 gen, int mirror_num, u8 *csum, int force,
u64 gen, int mirror_num, u8 *csum,
u64 physical_for_dev_replace)
{
struct scrub_block *sblock;
const u32 sectorsize = sctx->fs_info->sectorsize;
int index;
sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
@ -2174,7 +2186,12 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
for (index = 0; len > 0; index++) {
struct scrub_page *spage;
u64 l = min_t(u64, len, PAGE_SIZE);
/*
* Here we will allocate one page for one sector to scrub.
* This is fine if PAGE_SIZE == sectorsize, but will cost
* more memory for PAGE_SIZE > sectorsize case.
*/
u32 l = min(sectorsize, len);
spage = kzalloc(sizeof(*spage), GFP_KERNEL);
if (!spage) {
@ -2198,7 +2215,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
spage->mirror_num = mirror_num;
if (csum) {
spage->have_csum = 1;
memcpy(spage->csum, csum, sctx->csum_size);
memcpy(spage->csum, csum, sctx->fs_info->csum_size);
} else {
spage->have_csum = 0;
}
@ -2231,7 +2248,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
}
}
if (force)
if (flags & BTRFS_EXTENT_FLAG_SUPER)
scrub_submit(sctx);
}
@ -2295,12 +2312,11 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
unsigned long *bitmap,
u64 start, u64 len)
u64 start, u32 len)
{
u64 offset;
u64 nsectors64;
u32 nsectors;
int sectorsize = sparity->sctx->fs_info->sectorsize;
u32 sectorsize_bits = sparity->sctx->fs_info->sectorsize_bits;
if (len >= sparity->stripe_len) {
bitmap_set(bitmap, 0, sparity->nsectors);
@ -2309,11 +2325,8 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
start -= sparity->logic_start;
start = div64_u64_rem(start, sparity->stripe_len, &offset);
offset = div_u64(offset, sectorsize);
nsectors64 = div_u64(len, sectorsize);
ASSERT(nsectors64 < UINT_MAX);
nsectors = (u32)nsectors64;
offset = offset >> sectorsize_bits;
nsectors = len >> sectorsize_bits;
if (offset + nsectors <= sparity->nsectors) {
bitmap_set(bitmap, offset, nsectors);
@ -2325,13 +2338,13 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
}
static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
u64 start, u64 len)
u64 start, u32 len)
{
__scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
}
static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
u64 start, u64 len)
u64 start, u32 len)
{
__scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
}
@ -2359,48 +2372,77 @@ static void scrub_block_complete(struct scrub_block *sblock)
u64 end = sblock->pagev[sblock->page_count - 1]->logical +
PAGE_SIZE;
ASSERT(end - start <= U32_MAX);
scrub_parity_mark_sectors_error(sblock->sparity,
start, end - start);
}
}
static void drop_csum_range(struct scrub_ctx *sctx, struct btrfs_ordered_sum *sum)
{
sctx->stat.csum_discards += sum->len >> sctx->fs_info->sectorsize_bits;
list_del(&sum->list);
kfree(sum);
}
/*
* Find the desired csum for range [logical, logical + sectorsize), and store
* the csum into @csum.
*
* The search source is sctx->csum_list, which is a pre-populated list
* storing bytenr ordered csum ranges. We're reponsible to cleanup any range
* that is before @logical.
*
* Return 0 if there is no csum for the range.
* Return 1 if there is csum for the range and copied to @csum.
*/
static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
{
struct btrfs_ordered_sum *sum = NULL;
unsigned long index;
unsigned long num_sectors;
bool found = false;
while (!list_empty(&sctx->csum_list)) {
struct btrfs_ordered_sum *sum = NULL;
unsigned long index;
unsigned long num_sectors;
sum = list_first_entry(&sctx->csum_list,
struct btrfs_ordered_sum, list);
/* The current csum range is beyond our range, no csum found */
if (sum->bytenr > logical)
return 0;
if (sum->bytenr + sum->len > logical)
break;
++sctx->stat.csum_discards;
list_del(&sum->list);
kfree(sum);
sum = NULL;
/*
* The current sum is before our bytenr, since scrub is always
* done in bytenr order, the csum will never be used anymore,
* clean it up so that later calls won't bother with the range,
* and continue search the next range.
*/
if (sum->bytenr + sum->len <= logical) {
drop_csum_range(sctx, sum);
continue;
}
/* Now the csum range covers our bytenr, copy the csum */
found = true;
index = (logical - sum->bytenr) >> sctx->fs_info->sectorsize_bits;
num_sectors = sum->len >> sctx->fs_info->sectorsize_bits;
memcpy(csum, sum->sums + index * sctx->fs_info->csum_size,
sctx->fs_info->csum_size);
/* Cleanup the range if we're at the end of the csum range */
if (index == num_sectors - 1)
drop_csum_range(sctx, sum);
break;
}
if (!sum)
if (!found)
return 0;
index = div_u64(logical - sum->bytenr, sctx->fs_info->sectorsize);
ASSERT(index < UINT_MAX);
num_sectors = sum->len / sctx->fs_info->sectorsize;
memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
if (index == num_sectors - 1) {
list_del(&sum->list);
kfree(sum);
}
return 1;
}
/* scrub extent tries to collect up to 64 kB for each bio */
static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
u64 logical, u64 len,
u64 logical, u32 len,
u64 physical, struct btrfs_device *dev, u64 flags,
u64 gen, int mirror_num, u64 physical_for_dev_replace)
{
@ -2432,7 +2474,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
}
while (len) {
u64 l = min_t(u64, len, blocksize);
u32 l = min(len, blocksize);
int have_csum = 0;
if (flags & BTRFS_EXTENT_FLAG_DATA) {
@ -2442,7 +2484,7 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
++sctx->stat.no_csum;
}
ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen,
mirror_num, have_csum ? csum : NULL, 0,
mirror_num, have_csum ? csum : NULL,
physical_for_dev_replace);
if (ret)
return ret;
@ -2455,14 +2497,17 @@ static int scrub_extent(struct scrub_ctx *sctx, struct map_lookup *map,
}
static int scrub_pages_for_parity(struct scrub_parity *sparity,
u64 logical, u64 len,
u64 logical, u32 len,
u64 physical, struct btrfs_device *dev,
u64 flags, u64 gen, int mirror_num, u8 *csum)
{
struct scrub_ctx *sctx = sparity->sctx;
struct scrub_block *sblock;
const u32 sectorsize = sctx->fs_info->sectorsize;
int index;
ASSERT(IS_ALIGNED(len, sectorsize));
sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
if (!sblock) {
spin_lock(&sctx->stat_lock);
@ -2481,7 +2526,6 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
for (index = 0; len > 0; index++) {
struct scrub_page *spage;
u64 l = min_t(u64, len, PAGE_SIZE);
spage = kzalloc(sizeof(*spage), GFP_KERNEL);
if (!spage) {
@ -2508,7 +2552,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
spage->mirror_num = mirror_num;
if (csum) {
spage->have_csum = 1;
memcpy(spage->csum, csum, sctx->csum_size);
memcpy(spage->csum, csum, sctx->fs_info->csum_size);
} else {
spage->have_csum = 0;
}
@ -2516,9 +2560,12 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
spage->page = alloc_page(GFP_KERNEL);
if (!spage->page)
goto leave_nomem;
len -= l;
logical += l;
physical += l;
/* Iterate over the stripe range in sectorsize steps */
len -= sectorsize;
logical += sectorsize;
physical += sectorsize;
}
WARN_ON(sblock->page_count == 0);
@ -2539,7 +2586,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
}
static int scrub_extent_for_parity(struct scrub_parity *sparity,
u64 logical, u64 len,
u64 logical, u32 len,
u64 physical, struct btrfs_device *dev,
u64 flags, u64 gen, int mirror_num)
{
@ -2563,7 +2610,7 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
}
while (len) {
u64 l = min_t(u64, len, blocksize);
u32 l = min(len, blocksize);
int have_csum = 0;
if (flags & BTRFS_EXTENT_FLAG_DATA) {
@ -2767,7 +2814,8 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
u64 generation;
u64 extent_logical;
u64 extent_physical;
u64 extent_len;
/* Check the comment in scrub_stripe() for why u32 is enough here */
u32 extent_len;
u64 mapped_length;
struct btrfs_device *extent_dev;
struct scrub_parity *sparity;
@ -2776,7 +2824,8 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
int extent_mirror_num;
int stop_loop = 0;
nsectors = div_u64(map->stripe_len, fs_info->sectorsize);
ASSERT(map->stripe_len <= U32_MAX);
nsectors = map->stripe_len >> fs_info->sectorsize_bits;
bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
GFP_NOFS);
@ -2787,6 +2836,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
return -ENOMEM;
}
ASSERT(map->stripe_len <= U32_MAX);
sparity->stripe_len = map->stripe_len;
sparity->nsectors = nsectors;
sparity->sctx = sctx;
@ -2881,6 +2931,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
}
again:
extent_logical = key.objectid;
ASSERT(bytes <= U32_MAX);
extent_len = bytes;
if (extent_logical < logic_start) {
@ -2959,9 +3010,11 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
logic_start += map->stripe_len;
}
out:
if (ret < 0)
if (ret < 0) {
ASSERT(logic_end - logic_start <= U32_MAX);
scrub_parity_mark_sectors_error(sparity, logic_start,
logic_end - logic_start);
}
scrub_parity_put(sparity);
scrub_submit(sctx);
mutex_lock(&sctx->wr_lock);
@ -3003,7 +3056,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 offset;
u64 extent_logical;
u64 extent_physical;
u64 extent_len;
/*
* Unlike chunk length, extent length should never go beyond
* BTRFS_MAX_EXTENT_SIZE, thus u32 is enough here.
*/
u32 extent_len;
u64 stripe_logical;
u64 stripe_end;
struct btrfs_device *extent_dev;
@ -3084,17 +3141,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
key_end.offset = (u64)-1;
reada1 = btrfs_reada_add(root, &key, &key_end);
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key.type = BTRFS_EXTENT_CSUM_KEY;
key.offset = logical;
key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key_end.type = BTRFS_EXTENT_CSUM_KEY;
key_end.offset = logic_end;
reada2 = btrfs_reada_add(csum_root, &key, &key_end);
if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key.type = BTRFS_EXTENT_CSUM_KEY;
key.offset = logical;
key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key_end.type = BTRFS_EXTENT_CSUM_KEY;
key_end.offset = logic_end;
reada2 = btrfs_reada_add(csum_root, &key, &key_end);
} else {
reada2 = NULL;
}
if (!IS_ERR(reada1))
btrfs_reada_wait(reada1);
if (!IS_ERR(reada2))
if (!IS_ERR_OR_NULL(reada2))
btrfs_reada_wait(reada2);
@ -3248,6 +3309,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
again:
extent_logical = key.objectid;
ASSERT(bytes <= U32_MAX);
extent_len = bytes;
/*
@ -3704,10 +3766,12 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes)
break;
if (!btrfs_check_super_location(scrub_dev, bytenr))
continue;
ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
NULL, 1, bytenr);
NULL, bytenr);
if (ret)
return ret;
}
@ -3821,14 +3885,6 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EINVAL;
}
if (fs_info->sectorsize != PAGE_SIZE) {
/* not supported for data w/o checksums */
btrfs_err_rl(fs_info,
"scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails",
fs_info->sectorsize, PAGE_SIZE);
return -EINVAL;
}
if (fs_info->nodesize >
PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK ||
fs_info->sectorsize > PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) {
@ -3855,7 +3911,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
goto out_free_ctx;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
!is_dev_replace)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@ -4032,7 +4088,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct scrub_ctx *sctx = NULL;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
if (dev)
sctx = dev->scrub_ctx;
if (sctx)
@ -4043,7 +4099,7 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
}
static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
u64 extent_logical, u64 extent_len,
u64 extent_logical, u32 extent_len,
u64 *extent_physical,
struct btrfs_device **extent_dev,
int *extent_mirror_num)

View File

@ -2410,7 +2410,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
sctx->send_root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
le64_to_cpu(sctx->send_root->root_item.ctransid));
btrfs_root_ctransid(&sctx->send_root->root_item));
if (parent_root) {
if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
@ -2419,7 +2419,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
parent_root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
le64_to_cpu(sctx->parent_root->root_item.ctransid));
btrfs_root_ctransid(&sctx->parent_root->root_item));
}
ret = send_cmd(sctx);
@ -5101,7 +5101,7 @@ static int send_clone(struct send_ctx *sctx,
TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
clone_root->root->root_item.uuid);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
le64_to_cpu(clone_root->root->root_item.ctransid));
btrfs_root_ctransid(&clone_root->root->root_item));
TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET,
clone_root->offset);

View File

@ -57,8 +57,9 @@ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
const void *ptr, unsigned long off) \
{ \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long idx = member_offset >> PAGE_SHIFT; \
const unsigned long oip = offset_in_page(member_offset); \
const unsigned long idx = get_eb_page_index(member_offset); \
const unsigned long oip = get_eb_offset_in_page(token->eb, \
member_offset); \
const int size = sizeof(u##bits); \
u8 lebytes[sizeof(u##bits)]; \
const int part = PAGE_SIZE - oip; \
@ -85,8 +86,8 @@ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
const void *ptr, unsigned long off) \
{ \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long oip = offset_in_page(member_offset); \
const unsigned long idx = member_offset >> PAGE_SHIFT; \
const unsigned long oip = get_eb_offset_in_page(eb, member_offset); \
const unsigned long idx = get_eb_page_index(member_offset); \
char *kaddr = page_address(eb->pages[idx]); \
const int size = sizeof(u##bits); \
const int part = PAGE_SIZE - oip; \
@ -106,8 +107,9 @@ void btrfs_set_token_##bits(struct btrfs_map_token *token, \
u##bits val) \
{ \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long idx = member_offset >> PAGE_SHIFT; \
const unsigned long oip = offset_in_page(member_offset); \
const unsigned long idx = get_eb_page_index(member_offset); \
const unsigned long oip = get_eb_offset_in_page(token->eb, \
member_offset); \
const int size = sizeof(u##bits); \
u8 lebytes[sizeof(u##bits)]; \
const int part = PAGE_SIZE - oip; \
@ -136,8 +138,8 @@ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
unsigned long off, u##bits val) \
{ \
const unsigned long member_offset = (unsigned long)ptr + off; \
const unsigned long oip = offset_in_page(member_offset); \
const unsigned long idx = member_offset >> PAGE_SHIFT; \
const unsigned long oip = get_eb_offset_in_page(eb, member_offset); \
const unsigned long idx = get_eb_page_index(member_offset); \
char *kaddr = page_address(eb->pages[idx]); \
const int size = sizeof(u##bits); \
const int part = PAGE_SIZE - oip; \

View File

@ -44,6 +44,7 @@
#include "backref.h"
#include "space-info.h"
#include "sysfs.h"
#include "zoned.h"
#include "tests/btrfs-tests.h"
#include "block-group.h"
#include "discard.h"
@ -240,9 +241,13 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
vaf.fmt = fmt;
vaf.va = &args;
if (__ratelimit(ratelimit))
printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
fs_info ? fs_info->sb->s_id : "<unknown>", &vaf);
if (__ratelimit(ratelimit)) {
if (fs_info)
printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
fs_info->sb->s_id, &vaf);
else
printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
}
va_end(args);
}
@ -333,7 +338,6 @@ enum {
Opt_device,
Opt_fatal_errors,
Opt_flushoncommit, Opt_noflushoncommit,
Opt_inode_cache, Opt_noinode_cache,
Opt_max_inline,
Opt_barrier, Opt_nobarrier,
Opt_datacow, Opt_nodatacow,
@ -360,9 +364,13 @@ enum {
Opt_rescue,
Opt_usebackuproot,
Opt_nologreplay,
Opt_ignorebadroots,
Opt_ignoredatacsums,
Opt_rescue_all,
/* Deprecated options */
Opt_recovery,
Opt_inode_cache, Opt_noinode_cache,
/* Debugging options */
Opt_check_integrity,
@ -455,9 +463,25 @@ static const match_table_t tokens = {
static const match_table_t rescue_tokens = {
{Opt_usebackuproot, "usebackuproot"},
{Opt_nologreplay, "nologreplay"},
{Opt_ignorebadroots, "ignorebadroots"},
{Opt_ignorebadroots, "ibadroots"},
{Opt_ignoredatacsums, "ignoredatacsums"},
{Opt_ignoredatacsums, "idatacsums"},
{Opt_rescue_all, "all"},
{Opt_err, NULL},
};
static bool check_ro_option(struct btrfs_fs_info *fs_info, unsigned long opt,
const char *opt_name)
{
if (fs_info->mount_opt & opt) {
btrfs_err(fs_info, "%s must be used with ro mount option",
opt_name);
return true;
}
return false;
}
static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
{
char *opts;
@ -487,6 +511,23 @@ static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
btrfs_set_and_info(info, NOLOGREPLAY,
"disabling log replay at mount time");
break;
case Opt_ignorebadroots:
btrfs_set_and_info(info, IGNOREBADROOTS,
"ignoring bad roots");
break;
case Opt_ignoredatacsums:
btrfs_set_and_info(info, IGNOREDATACSUMS,
"ignoring data csums");
break;
case Opt_rescue_all:
btrfs_info(info, "enabling all of the rescue options");
btrfs_set_and_info(info, IGNOREDATACSUMS,
"ignoring data csums");
btrfs_set_and_info(info, IGNOREBADROOTS,
"ignoring bad roots");
btrfs_set_and_info(info, NOLOGREPLAY,
"disabling log replay at mount time");
break;
case Opt_err:
btrfs_info(info, "unrecognized rescue option '%s'", p);
ret = -EINVAL;
@ -511,7 +552,6 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
{
substring_t args[MAX_OPT_ARGS];
char *p, *num;
u64 cache_gen;
int intarg;
int ret = 0;
char *compress_type;
@ -521,11 +561,17 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
bool saved_compress_force;
int no_compress = 0;
cache_gen = btrfs_super_cache_generation(info->super_copy);
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
else if (cache_gen)
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
else if (btrfs_free_space_cache_v1_active(info)) {
if (btrfs_is_zoned(info)) {
btrfs_info(info,
"zoned: clearing existing space cache");
btrfs_set_super_cache_generation(info->super_copy, 0);
} else {
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
}
}
/*
* Even the options are empty, we still need to do extra check
@ -832,14 +878,9 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
}
break;
case Opt_inode_cache:
btrfs_warn(info,
"the 'inode_cache' option is deprecated and will have no effect from 5.11");
btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
"enabling inode map caching");
break;
case Opt_noinode_cache:
btrfs_clear_pending_and_info(info, INODE_MAP_CACHE,
"disabling inode map caching");
btrfs_warn(info,
"the 'inode_cache' option is deprecated and has no effect since 5.11");
break;
case Opt_clear_cache:
btrfs_set_and_info(info, CLEAR_CACHE,
@ -968,14 +1009,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
}
}
check:
/*
* Extra check for current option against current flag
*/
if (btrfs_test_opt(info, NOLOGREPLAY) && !(new_flags & SB_RDONLY)) {
btrfs_err(info,
"nologreplay must be used with ro mount option");
/* We're read-only, don't have to check. */
if (new_flags & SB_RDONLY)
goto out;
if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") ||
check_ro_option(info, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums"))
ret = -EINVAL;
}
out:
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
!btrfs_test_opt(info, FREE_SPACE_TREE) &&
@ -984,6 +1025,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
ret = -EINVAL;
}
if (!ret)
ret = btrfs_check_mountopts_zoned(info);
if (!ret && btrfs_test_opt(info, SPACE_CACHE))
btrfs_info(info, "disk space caching is enabled");
if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
@ -1127,7 +1170,6 @@ char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
ret = -ENOMEM;
goto err;
}
path->leave_spinning = 1;
name = kmalloc(PATH_MAX, GFP_KERNEL);
if (!name) {
@ -1256,7 +1298,6 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
path->leave_spinning = 1;
/*
* Find the "default" dir item which points to the root item that we
@ -1383,11 +1424,18 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return btrfs_commit_transaction(trans);
}
static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed)
{
seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s);
*printed = true;
}
static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
{
struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
const char *compress_type;
const char *subvol_name;
bool printed = false;
if (btrfs_test_opt(info, DEGRADED))
seq_puts(seq, ",degraded");
@ -1420,7 +1468,13 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
if (btrfs_test_opt(info, NOTREELOG))
seq_puts(seq, ",notreelog");
if (btrfs_test_opt(info, NOLOGREPLAY))
seq_puts(seq, ",rescue=nologreplay");
print_rescue_option(seq, "nologreplay", &printed);
if (btrfs_test_opt(info, USEBACKUPROOT))
print_rescue_option(seq, "usebackuproot", &printed);
if (btrfs_test_opt(info, IGNOREBADROOTS))
print_rescue_option(seq, "ignorebadroots", &printed);
if (btrfs_test_opt(info, IGNOREDATACSUMS))
print_rescue_option(seq, "ignoredatacsums", &printed);
if (btrfs_test_opt(info, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
if (btrfs_test_opt(info, DISCARD_SYNC))
@ -1429,9 +1483,9 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",discard=async");
if (!(info->sb->s_flags & SB_POSIXACL))
seq_puts(seq, ",noacl");
if (btrfs_test_opt(info, SPACE_CACHE))
if (btrfs_free_space_cache_v1_active(info))
seq_puts(seq, ",space_cache");
else if (btrfs_test_opt(info, FREE_SPACE_TREE))
else if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
seq_puts(seq, ",space_cache=v2");
else
seq_puts(seq, ",nospace_cache");
@ -1445,8 +1499,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",enospc_debug");
if (btrfs_test_opt(info, AUTO_DEFRAG))
seq_puts(seq, ",autodefrag");
if (btrfs_test_opt(info, INODE_MAP_CACHE))
seq_puts(seq, ",inode_cache");
if (btrfs_test_opt(info, SKIP_BALANCE))
seq_puts(seq, ",skip_balance");
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
@ -1810,6 +1862,8 @@ static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
unsigned long old_opts)
{
const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
/*
* We need to cleanup all defragable inodes if the autodefragment is
* close or the filesystem is read only.
@ -1826,12 +1880,15 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
!btrfs_test_opt(fs_info, DISCARD_ASYNC))
btrfs_discard_cleanup(fs_info);
/* If we toggled space cache */
if (cache_opt != btrfs_free_space_cache_v1_active(fs_info))
btrfs_set_free_space_cache_v1_active(fs_info, cache_opt);
}
static int btrfs_remount(struct super_block *sb, int *flags, char *data)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_root *root = fs_info->tree_root;
unsigned old_flags = sb->s_flags;
unsigned long old_opts = fs_info->mount_opt;
unsigned long old_compress_type = fs_info->compress_type;
@ -1862,6 +1919,22 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
(!sb_rdonly(sb) || (*flags & SB_RDONLY))) {
btrfs_warn(fs_info,
"remount supports changing free space tree only from ro to rw");
/* Make sure free space cache options match the state on disk */
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
}
if (btrfs_free_space_cache_v1_active(fs_info)) {
btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE);
btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE);
}
}
if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
goto out;
@ -1924,39 +1997,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
}
ret = btrfs_cleanup_fs_roots(fs_info);
/*
* NOTE: when remounting with a change that does writes, don't
* put it anywhere above this point, as we are not sure to be
* safe to write until we pass the above checks.
*/
ret = btrfs_start_pre_rw_mount(fs_info);
if (ret)
goto restore;
/* recover relocation */
mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(root);
mutex_unlock(&fs_info->cleaner_mutex);
if (ret)
goto restore;
ret = btrfs_resume_balance_async(fs_info);
if (ret)
goto restore;
ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
btrfs_warn(fs_info, "failed to resume dev_replace");
goto restore;
}
btrfs_qgroup_rescan_resume(fs_info);
if (!fs_info->uuid_root) {
btrfs_info(fs_info, "creating UUID tree");
ret = btrfs_create_uuid_tree(fs_info);
if (ret) {
btrfs_warn(fs_info,
"failed to create the UUID tree %d",
ret);
goto restore;
}
}
sb->s_flags &= ~SB_RDONLY;
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
@ -1970,6 +2019,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
wake_up_process(fs_info->transaction_kthread);
btrfs_remount_cleanup(fs_info, old_opts);
btrfs_clear_oneshot_options(fs_info);
clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
return 0;
@ -2156,7 +2206,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
u64 total_used = 0;
u64 total_free_data = 0;
u64 total_free_meta = 0;
int bits = dentry->d_sb->s_blocksize_bits;
u32 bits = fs_info->sectorsize_bits;
__be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
unsigned factor = 1;
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
@ -2462,6 +2512,11 @@ static void __init btrfs_print_mod_info(void)
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
", ref-verify=on"
#endif
#ifdef CONFIG_BLK_DEV_ZONED
", zoned=yes"
#else
", zoned=no"
#endif
;
pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options);
@ -2523,8 +2578,6 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_end_io_wq;
btrfs_init_lockdep();
btrfs_print_mod_info();
err = btrfs_run_sanity_tests();

View File

@ -263,6 +263,10 @@ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
/* Remove once support for zoned allocation is feature complete */
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
#endif
static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref),
@ -278,6 +282,9 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(metadata_uuid),
BTRFS_FEAT_ATTR_PTR(free_space_tree),
BTRFS_FEAT_ATTR_PTR(raid1c34),
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_FEAT_ATTR_PTR(zoned),
#endif
NULL
};
@ -329,10 +336,35 @@ static ssize_t send_stream_version_show(struct kobject *kobj,
}
BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
static const char *rescue_opts[] = {
"usebackuproot",
"nologreplay",
"ignorebadroots",
"ignoredatacsums",
"all",
};
static ssize_t supported_rescue_options_show(struct kobject *kobj,
struct kobj_attribute *a,
char *buf)
{
ssize_t ret = 0;
int i;
for (i = 0; i < ARRAY_SIZE(rescue_opts); i++)
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
(i ? " " : ""), rescue_opts[i]);
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
return ret;
}
BTRFS_ATTR(static_feature, supported_rescue_options,
supported_rescue_options_show);
static struct attribute *btrfs_supported_static_feature_attrs[] = {
BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
BTRFS_ATTR_PTR(static_feature, supported_checksums),
BTRFS_ATTR_PTR(static_feature, send_stream_version),
BTRFS_ATTR_PTR(static_feature, supported_rescue_options),
NULL
};
@ -433,7 +465,8 @@ static ssize_t btrfs_discard_iops_limit_store(struct kobject *kobj,
return -EINVAL;
WRITE_ONCE(discard_ctl->iops_limit, iops_limit);
btrfs_discard_calc_delay(discard_ctl);
btrfs_discard_schedule_work(discard_ctl, true);
return len;
}
BTRFS_ATTR_RW(discard, iops_limit, btrfs_discard_iops_limit_show,
@ -463,7 +496,7 @@ static ssize_t btrfs_discard_kbps_limit_store(struct kobject *kobj,
return -EINVAL;
WRITE_ONCE(discard_ctl->kbps_limit, kbps_limit);
btrfs_discard_schedule_work(discard_ctl, true);
return len;
}
BTRFS_ATTR_RW(discard, kbps_limit, btrfs_discard_kbps_limit_show,
@ -854,6 +887,82 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
}
BTRFS_ATTR(, exclusive_operation, btrfs_exclusive_operation_show);
static ssize_t btrfs_generation_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return scnprintf(buf, PAGE_SIZE, "%llu\n", fs_info->generation);
}
BTRFS_ATTR(, generation, btrfs_generation_show);
/*
* Look for an exact string @string in @buffer with possible leading or
* trailing whitespace
*/
static bool strmatch(const char *buffer, const char *string)
{
const size_t len = strlen(string);
/* Skip leading whitespace */
buffer = skip_spaces(buffer);
/* Match entire string, check if the rest is whitespace or empty */
if (strncmp(string, buffer, len) == 0 &&
strlen(skip_spaces(buffer + len)) == 0)
return true;
return false;
}
static const char * const btrfs_read_policy_name[] = { "pid" };
static ssize_t btrfs_read_policy_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
ssize_t ret = 0;
int i;
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
if (fs_devices->read_policy == i)
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s[%s]",
(ret == 0 ? "" : " "),
btrfs_read_policy_name[i]);
else
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
(ret == 0 ? "" : " "),
btrfs_read_policy_name[i]);
}
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
return ret;
}
static ssize_t btrfs_read_policy_store(struct kobject *kobj,
struct kobj_attribute *a,
const char *buf, size_t len)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
int i;
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
if (strmatch(buf, btrfs_read_policy_name[i])) {
if (i != fs_devices->read_policy) {
fs_devices->read_policy = i;
btrfs_info(fs_devices->fs_info,
"read policy set to '%s'",
btrfs_read_policy_name[i]);
}
return len;
}
}
return -EINVAL;
}
BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, label),
BTRFS_ATTR_PTR(, nodesize),
@ -863,6 +972,8 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, metadata_uuid),
BTRFS_ATTR_PTR(, checksum),
BTRFS_ATTR_PTR(, exclusive_operation),
BTRFS_ATTR_PTR(, generation),
BTRFS_ATTR_PTR(, read_policy),
NULL,
};
@ -1207,7 +1318,7 @@ static const char *alloc_name(u64 flags)
default:
WARN_ON(1);
return "invalid-combination";
};
}
}
/*

View File

@ -134,6 +134,7 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
fs_info->nodesize = nodesize;
fs_info->sectorsize = sectorsize;
fs_info->sectorsize_bits = ilog2(sectorsize);
set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
test_mnt->mnt_sb->s_fs_info = fs_info;
@ -224,7 +225,7 @@ btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->bg_list);
btrfs_init_free_space_ctl(cache);
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
mutex_init(&cache->free_space_lock);
return cache;

View File

@ -379,54 +379,50 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
{
struct btrfs_fs_info *fs_info;
unsigned long len;
unsigned long *bitmap = NULL;
struct extent_buffer *eb = NULL;
int ret;
test_msg("running extent buffer bitmap tests");
/*
* In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
* BTRFS_MAX_METADATA_BLOCKSIZE.
*/
len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE)
? sectorsize * 4 : sectorsize;
fs_info = btrfs_alloc_dummy_fs_info(len, len);
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
test_std_err(TEST_ALLOC_FS_INFO);
return -ENOMEM;
}
bitmap = kmalloc(len, GFP_KERNEL);
bitmap = kmalloc(nodesize, GFP_KERNEL);
if (!bitmap) {
test_err("couldn't allocate test bitmap");
ret = -ENOMEM;
goto out;
}
eb = __alloc_dummy_extent_buffer(fs_info, 0, len);
eb = __alloc_dummy_extent_buffer(fs_info, 0, nodesize);
if (!eb) {
test_std_err(TEST_ALLOC_ROOT);
ret = -ENOMEM;
goto out;
}
ret = __test_eb_bitmaps(bitmap, eb, len);
ret = __test_eb_bitmaps(bitmap, eb, nodesize);
if (ret)
goto out;
/* Do it over again with an extent buffer which isn't page-aligned. */
free_extent_buffer(eb);
eb = __alloc_dummy_extent_buffer(fs_info, nodesize / 2, len);
/*
* Test again for case where the tree block is sectorsize aligned but
* not nodesize aligned.
*/
eb = __alloc_dummy_extent_buffer(fs_info, sectorsize, nodesize);
if (!eb) {
test_std_err(TEST_ALLOC_ROOT);
ret = -ENOMEM;
goto out;
}
ret = __test_eb_bitmaps(bitmap, eb, len);
ret = __test_eb_bitmaps(bitmap, eb, nodesize);
out:
free_extent_buffer(eb);
kfree(bitmap);

View File

@ -399,7 +399,6 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group *cache,
u64 offset;
u64 max_extent_size;
const struct btrfs_free_space_op test_free_space_ops = {
.recalc_thresholds = cache->free_space_ctl->op->recalc_thresholds,
.use_bitmap = test_use_bitmap,
};
const struct btrfs_free_space_op *orig_free_space_ops;

View File

@ -36,7 +36,6 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(&trans, root, path, &ins, size);
if (ret) {
test_err("couldn't insert ref %d", ret);
@ -86,7 +85,6 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
test_err("couldn't find extent ref");
@ -135,7 +133,6 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
test_std_err(TEST_ALLOC_ROOT);
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, -1, 1);
if (ret) {
@ -170,7 +167,6 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_search_slot(&trans, root, &key, path, 0, 1);
if (ret) {
test_err("couldn't find extent ref");

View File

@ -16,7 +16,6 @@
#include "transaction.h"
#include "locking.h"
#include "tree-log.h"
#include "inode-map.h"
#include "volumes.h"
#include "dev-replace.h"
#include "qgroup.h"
@ -155,6 +154,7 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root, *tmp;
struct btrfs_caching_control *caching_ctl, *next;
down_write(&fs_info->commit_root_sem);
list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
@ -162,8 +162,6 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
list_del_init(&root->dirty_list);
free_extent_buffer(root->commit_root);
root->commit_root = btrfs_root_node(root);
if (is_fstree(root->root_key.objectid))
btrfs_unpin_free_ino(root);
extent_io_tree_release(&root->dirty_log_pages);
btrfs_qgroup_clean_swapped_blocks(root);
}
@ -180,6 +178,47 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
spin_lock(&cur_trans->dropped_roots_lock);
}
spin_unlock(&cur_trans->dropped_roots_lock);
/*
* We have to update the last_byte_to_unpin under the commit_root_sem,
* at the same time we swap out the commit roots.
*
* This is because we must have a real view of the last spot the caching
* kthreads were while caching. Consider the following views of the
* extent tree for a block group
*
* commit root
* +----+----+----+----+----+----+----+
* |\\\\| |\\\\|\\\\| |\\\\|\\\\|
* +----+----+----+----+----+----+----+
* 0 1 2 3 4 5 6 7
*
* new commit root
* +----+----+----+----+----+----+----+
* | | | |\\\\| | |\\\\|
* +----+----+----+----+----+----+----+
* 0 1 2 3 4 5 6 7
*
* If the cache_ctl->progress was at 3, then we are only allowed to
* unpin [0,1) and [2,3], because the caching thread has already
* processed those extents. We are not allowed to unpin [5,6), because
* the caching thread will re-start it's search from 3, and thus find
* the hole from [4,6) to add to the free space cache.
*/
spin_lock(&fs_info->block_group_cache_lock);
list_for_each_entry_safe(caching_ctl, next,
&fs_info->caching_block_groups, list) {
struct btrfs_block_group *cache = caching_ctl->block_group;
if (btrfs_block_group_done(cache)) {
cache->last_byte_to_unpin = (u64)-1;
list_del_init(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
} else {
cache->last_byte_to_unpin = caching_ctl->progress;
}
}
spin_unlock(&fs_info->block_group_cache_lock);
up_write(&fs_info->commit_root_sem);
}
@ -856,24 +895,24 @@ void btrfs_throttle(struct btrfs_fs_info *fs_info)
wait_current_trans(fs_info);
}
static int should_end_transaction(struct btrfs_trans_handle *trans)
static bool should_end_transaction(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
if (btrfs_check_space_for_delayed_refs(fs_info))
return 1;
return true;
return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5);
}
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
{
struct btrfs_transaction *cur_trans = trans->transaction;
smp_mb();
if (cur_trans->state >= TRANS_STATE_COMMIT_START ||
cur_trans->delayed_refs.flushing)
return 1;
return true;
return should_end_transaction(trans);
}
@ -1300,8 +1339,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
btrfs_free_log(trans, root);
btrfs_update_reloc_root(trans, root);
btrfs_save_ino_cache(root, trans);
/* see comments in should_cow_block() */
clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
smp_mb__after_atomic();
@ -1598,8 +1635,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto fail;
}
btrfs_set_lock_blocking_write(old);
ret = btrfs_copy_root(trans, root, old, &tmp, objectid);
/* clean up in any case */
btrfs_tree_unlock(old);
@ -1681,7 +1716,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
dentry->d_name.len * 2);
parent_inode->i_mtime = parent_inode->i_ctime =
current_time(parent_inode);
ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
@ -1761,6 +1796,8 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
super->root_level = root_item->level;
if (btrfs_test_opt(fs_info, SPACE_CACHE))
super->cache_generation = root_item->generation;
else if (test_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags))
super->cache_generation = 0;
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
super->uuid_tree_generation = root_item->generation;
}
@ -1956,10 +1993,8 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
}
}
static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
/*
* We use writeback_inodes_sb here because if we used
* btrfs_start_delalloc_roots we would deadlock with fs freeze.
@ -1969,50 +2004,15 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans)
* from already being in a transaction and our join_transaction doesn't
* have to re-take the fs freeze lock.
*/
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC);
} else {
struct btrfs_pending_snapshot *pending;
struct list_head *head = &trans->transaction->pending_snapshots;
/*
* Flush dellaloc for any root that is going to be snapshotted.
* This is done to avoid a corrupted version of files, in the
* snapshots, that had both buffered and direct IO writes (even
* if they were done sequentially) due to an unordered update of
* the inode's size on disk.
*/
list_for_each_entry(pending, head, list) {
int ret;
ret = btrfs_start_delalloc_snapshot(pending->root);
if (ret)
return ret;
}
}
return 0;
}
static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans)
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) {
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
} else {
struct btrfs_pending_snapshot *pending;
struct list_head *head = &trans->transaction->pending_snapshots;
/*
* Wait for any dellaloc that we started previously for the roots
* that are going to be snapshotted. This is to avoid a corrupted
* version of files in the snapshots that had both buffered and
* direct IO writes (even if they were done sequentially).
*/
list_for_each_entry(pending, head, list)
btrfs_wait_ordered_extents(pending->root,
U64_MAX, 0, U64_MAX);
}
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
@ -2150,7 +2150,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
extwriter_counter_dec(cur_trans, trans->type);
ret = btrfs_start_delalloc_flush(trans);
ret = btrfs_start_delalloc_flush(fs_info);
if (ret)
goto cleanup_transaction;
@ -2166,7 +2166,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (ret)
goto cleanup_transaction;
btrfs_wait_delalloc_flush(trans);
btrfs_wait_delalloc_flush(fs_info);
/*
* Wait for all ordered extents started by a fast fsync that joined this
@ -2293,8 +2293,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
goto unlock_tree_log;
}
btrfs_prepare_extent_commit(fs_info);
cur_trans = fs_info->running_transaction;
btrfs_set_root_node(&fs_info->tree_root->root_item,
@ -2435,10 +2433,6 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid);
btrfs_kill_all_delayed_nodes(root);
if (root->ino_cache_inode) {
iput(root->ino_cache_inode);
root->ino_cache_inode = NULL;
}
if (btrfs_header_backref_rev(root->node) <
BTRFS_MIXED_BACKREF_REV)
@ -2459,16 +2453,6 @@ void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info)
if (!prev)
return;
bit = 1 << BTRFS_PENDING_SET_INODE_MAP_CACHE;
if (prev & bit)
btrfs_set_opt(fs_info->mount_opt, INODE_MAP_CACHE);
prev &= ~bit;
bit = 1 << BTRFS_PENDING_CLEAR_INODE_MAP_CACHE;
if (prev & bit)
btrfs_clear_opt(fs_info->mount_opt, INODE_MAP_CACHE);
prev &= ~bit;
bit = 1 << BTRFS_PENDING_COMMIT;
if (prev & bit)
btrfs_debug(fs_info, "pending commit done");

View File

@ -112,7 +112,6 @@ struct btrfs_transaction {
#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH)
#define BTRFS_SEND_TRANS_STUB ((void *)1)
#define BTRFS_DIO_SYNC_STUB ((void *)2)
struct btrfs_trans_handle {
u64 transid;
@ -219,7 +218,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
int wait_for_unblock);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
int btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
void btrfs_throttle(struct btrfs_fs_info *fs_info);
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root);

File diff suppressed because it is too large Load Diff

View File

@ -52,7 +52,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
u32 nritems;
root_node = btrfs_lock_root_node(root);
btrfs_set_lock_blocking_write(root_node);
nritems = btrfs_header_nritems(root_node);
root->defrag_max.objectid = 0;
/* from above we know this is not a leaf */

View File

@ -17,7 +17,6 @@
#include "backref.h"
#include "compression.h"
#include "qgroup.h"
#include "inode-map.h"
#include "block-group.h"
#include "space-info.h"
@ -139,8 +138,25 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *tree_root = fs_info->tree_root;
int ret = 0;
/*
* First check if the log root tree was already created. If not, create
* it before locking the root's log_mutex, just to keep lockdep happy.
*/
if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &tree_root->state)) {
mutex_lock(&tree_root->log_mutex);
if (!fs_info->log_root_tree) {
ret = btrfs_init_log_root_tree(trans, fs_info);
if (!ret)
set_bit(BTRFS_ROOT_HAS_LOG_TREE, &tree_root->state);
}
mutex_unlock(&tree_root->log_mutex);
if (ret)
return ret;
}
mutex_lock(&root->log_mutex);
if (root->log_root) {
@ -156,13 +172,6 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
}
} else {
mutex_lock(&fs_info->tree_log_mutex);
if (!fs_info->log_root_tree)
ret = btrfs_init_log_root_tree(trans, fs_info);
mutex_unlock(&fs_info->tree_log_mutex);
if (ret)
goto out;
ret = btrfs_add_log_tree(trans, root);
if (ret)
goto out;
@ -172,7 +181,6 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
root->log_start_pid = current->pid;
}
atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
if (ctx && !ctx->logging_new_name) {
int index = root->log_transid % 2;
@ -576,6 +584,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
struct btrfs_drop_extents_args drop_args = { 0 };
struct btrfs_fs_info *fs_info = root->fs_info;
int found_type;
u64 extent_end;
@ -653,7 +662,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
/* drop any overlapping extents */
ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
drop_args.start = start;
drop_args.end = extent_end;
drop_args.drop_cache = true;
ret = btrfs_drop_extents(trans, root, BTRFS_I(inode), &drop_args);
if (ret)
goto out;
@ -828,9 +840,9 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
if (ret)
goto out;
inode_add_bytes(inode, nbytes);
update_inode:
ret = btrfs_update_inode(trans, root, inode);
btrfs_update_inode_bytes(BTRFS_I(inode), nbytes, drop_args.bytes_found);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
out:
if (inode)
iput(inode);
@ -1529,7 +1541,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
if (ret)
goto out;
btrfs_update_inode(trans, root, inode);
btrfs_update_inode(trans, root, BTRFS_I(inode));
}
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
@ -1564,18 +1576,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
return ret;
}
static int insert_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 ino)
{
int ret;
ret = btrfs_insert_orphan_item(trans, root, ino);
if (ret == -EEXIST)
ret = 0;
return ret;
}
static int count_inode_extrefs(struct btrfs_root *root,
struct btrfs_inode *inode, struct btrfs_path *path)
{
@ -1716,7 +1716,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (nlink != inode->i_nlink) {
set_nlink(inode, nlink);
btrfs_update_inode(trans, root, inode);
btrfs_update_inode(trans, root, BTRFS_I(inode));
}
BTRFS_I(inode)->index_cnt = (u64)-1;
@ -1727,7 +1727,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (ret)
goto out;
}
ret = insert_orphan_item(trans, root, ino);
ret = btrfs_insert_orphan_item(trans, root, ino);
if (ret == -EEXIST)
ret = 0;
}
out:
@ -1820,7 +1822,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
set_nlink(inode, 1);
else
inc_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
} else if (ret == -EEXIST) {
ret = 0;
} else {
@ -1973,7 +1975,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (!ret && update_size) {
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2);
ret = btrfs_update_inode(trans, root, dir);
ret = btrfs_update_inode(trans, root, BTRFS_I(dir));
}
kfree(name);
iput(dir);
@ -2586,6 +2588,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
* those prealloc extents just after replaying them.
*/
if (S_ISREG(mode)) {
struct btrfs_drop_extents_args drop_args = { 0 };
struct inode *inode;
u64 from;
@ -2596,12 +2599,18 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
}
from = ALIGN(i_size_read(inode),
root->fs_info->sectorsize);
ret = btrfs_drop_extents(wc->trans, root, inode,
from, (u64)-1, 1);
drop_args.start = from;
drop_args.end = (u64)-1;
drop_args.drop_cache = true;
ret = btrfs_drop_extents(wc->trans, root,
BTRFS_I(inode),
&drop_args);
if (!ret) {
inode_sub_bytes(inode,
drop_args.bytes_found);
/* Update the inode's nbytes. */
ret = btrfs_update_inode(wc->trans,
root, inode);
root, BTRFS_I(inode));
}
iput(inode);
if (ret)
@ -2709,7 +2718,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
blocksize = fs_info->nodesize;
next = btrfs_find_create_tree_block(fs_info, bytenr);
next = btrfs_find_create_tree_block(fs_info, bytenr,
btrfs_header_owner(cur),
*level - 1);
if (IS_ERR(next))
return PTR_ERR(next);
@ -2732,7 +2743,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
@ -2801,7 +2811,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
@ -2883,7 +2892,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
btrfs_tree_lock(next);
btrfs_set_lock_blocking_write(next);
btrfs_clean_tree_block(next);
btrfs_wait_tree_block_writeback(next);
btrfs_tree_unlock(next);
@ -3023,6 +3031,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
int log_transid = 0;
struct btrfs_log_ctx root_log_ctx;
struct blk_plug plug;
u64 log_root_start;
u64 log_root_level;
mutex_lock(&root->log_mutex);
log_transid = ctx->log_transid;
@ -3200,22 +3210,31 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out_wake_log_root;
}
btrfs_set_super_log_root(fs_info->super_for_commit,
log_root_tree->node->start);
btrfs_set_super_log_root_level(fs_info->super_for_commit,
btrfs_header_level(log_root_tree->node));
log_root_start = log_root_tree->node->start;
log_root_level = btrfs_header_level(log_root_tree->node);
log_root_tree->log_transid++;
mutex_unlock(&log_root_tree->log_mutex);
/*
* Nobody else is going to jump in and write the ctree
* super here because the log_commit atomic below is protecting
* us. We must be called with a transaction handle pinning
* the running transaction open, so a full commit can't hop
* in and cause problems either.
* Here we are guaranteed that nobody is going to write the superblock
* for the current transaction before us and that neither we do write
* our superblock before the previous transaction finishes its commit
* and writes its superblock, because:
*
* 1) We are holding a handle on the current transaction, so no body
* can commit it until we release the handle;
*
* 2) Before writing our superblock we acquire the tree_log_mutex, so
* if the previous transaction is still committing, and hasn't yet
* written its superblock, we wait for it to do it, because a
* transaction commit acquires the tree_log_mutex when the commit
* begins and releases it only after writing its superblock.
*/
mutex_lock(&fs_info->tree_log_mutex);
btrfs_set_super_log_root(fs_info->super_for_commit, log_root_start);
btrfs_set_super_log_root_level(fs_info->super_for_commit, log_root_level);
ret = write_all_supers(fs_info, 1);
mutex_unlock(&fs_info->tree_log_mutex);
if (ret) {
btrfs_set_log_full_commit(trans);
btrfs_abort_transaction(trans, ret);
@ -3300,6 +3319,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
if (fs_info->log_root_tree) {
free_log_tree(trans, fs_info->log_root_tree);
fs_info->log_root_tree = NULL;
clear_bit(BTRFS_ROOT_HAS_LOG_TREE, &fs_info->tree_root->state);
}
return 0;
}
@ -4196,6 +4216,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_log_ctx *ctx)
{
struct btrfs_drop_extents_args drop_args = { 0 };
struct btrfs_root *log = root->log_root;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
@ -4204,19 +4225,21 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
u64 extent_offset = em->start - em->orig_start;
u64 block_len;
int ret;
int extent_inserted = 0;
ret = log_extent_csums(trans, inode, log, em, ctx);
if (ret)
return ret;
ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
em->start + em->len, NULL, 0, 1,
sizeof(*fi), &extent_inserted);
drop_args.path = path;
drop_args.start = em->start;
drop_args.end = em->start + em->len;
drop_args.replace_extent = true;
drop_args.extent_item_size = sizeof(*fi);
ret = btrfs_drop_extents(trans, log, inode, &drop_args);
if (ret)
return ret;
if (!extent_inserted) {
if (!drop_args.extent_inserted) {
key.objectid = btrfs_ino(inode);
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = em->start;
@ -4375,8 +4398,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
do {
ret = btrfs_truncate_inode_items(trans,
root->log_root,
&inode->vfs_inode,
truncate_offset,
inode, truncate_offset,
BTRFS_EXTENT_DATA_KEY);
} while (ret == -EAGAIN);
if (ret)
@ -4415,14 +4437,12 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct extent_map *em, *n;
struct list_head extents;
struct extent_map_tree *tree = &inode->extent_tree;
u64 test_gen;
int ret = 0;
int num = 0;
INIT_LIST_HEAD(&extents);
write_lock(&tree->lock);
test_gen = root->fs_info->last_trans_committed;
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
list_del_init(&em->list);
@ -4438,7 +4458,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
goto process;
}
if (em->generation <= test_gen)
if (em->generation < trans->transid)
continue;
/* We log prealloc extents beyond eof later. */
@ -4571,6 +4591,10 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
const u64 ino = btrfs_ino(inode);
int ins_nr = 0;
int start_slot = 0;
bool found_xattrs = false;
if (test_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags))
return 0;
key.objectid = ino;
key.type = BTRFS_XATTR_ITEM_KEY;
@ -4609,6 +4633,7 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
start_slot = slot;
ins_nr++;
path->slots[0]++;
found_xattrs = true;
cond_resched();
}
if (ins_nr > 0) {
@ -4618,6 +4643,9 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
return ret;
}
if (!found_xattrs)
set_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags);
return 0;
}
@ -5303,7 +5331,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
&inode->runtime_flags);
while(1) {
ret = btrfs_truncate_inode_items(trans,
log, &inode->vfs_inode, 0, 0);
log, inode, 0, 0);
if (ret != -EAGAIN)
break;
}
@ -5442,11 +5470,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
bool ret = false;
mutex_lock(&inode->log_mutex);
if (inode->last_unlink_trans > fs_info->last_trans_committed) {
if (inode->last_unlink_trans >= trans->transid) {
/*
* Make sure any commits to the log are forced to be full
* commits.
@ -5468,8 +5495,7 @@ static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct dentry *parent,
struct super_block *sb,
u64 last_committed)
struct super_block *sb)
{
int ret = 0;
struct dentry *old_parent = NULL;
@ -5481,8 +5507,8 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
* and other fun in this file.
*/
if (S_ISREG(inode->vfs_inode.i_mode) &&
inode->generation <= last_committed &&
inode->last_unlink_trans <= last_committed)
inode->generation < trans->transid &&
inode->last_unlink_trans < trans->transid)
goto out;
if (!S_ISDIR(inode->vfs_inode.i_mode)) {
@ -5828,7 +5854,6 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
while (true) {
struct btrfs_fs_info *fs_info = root->fs_info;
const u64 last_committed = fs_info->last_trans_committed;
struct extent_buffer *leaf = path->nodes[0];
int slot = path->slots[0];
struct btrfs_key search_key;
@ -5847,7 +5872,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
if (IS_ERR(inode))
return PTR_ERR(inode);
if (BTRFS_I(inode)->generation > last_committed)
if (BTRFS_I(inode)->generation >= trans->transid)
ret = btrfs_log_inode(trans, root, BTRFS_I(inode),
LOG_INODE_EXISTS, ctx);
btrfs_add_delayed_iput(inode);
@ -5888,7 +5913,6 @@ static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct dentry *old_parent = NULL;
struct super_block *sb = inode->vfs_inode.i_sb;
int ret = 0;
@ -5902,7 +5926,7 @@ static int log_new_ancestors_fast(struct btrfs_trans_handle *trans,
if (root != inode->root)
break;
if (inode->generation > fs_info->last_trans_committed) {
if (inode->generation >= trans->transid) {
ret = btrfs_log_inode(trans, root, inode,
LOG_INODE_EXISTS, ctx);
if (ret)
@ -6019,7 +6043,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct super_block *sb;
int ret = 0;
u64 last_committed = fs_info->last_trans_committed;
bool log_dentries = false;
sb = inode->vfs_inode.i_sb;
@ -6029,23 +6052,12 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_no_trans;
}
/*
* The prev transaction commit doesn't complete, we need do
* full commit by ourselves.
*/
if (fs_info->last_trans_log_full_commit >
fs_info->last_trans_committed) {
ret = 1;
goto end_no_trans;
}
if (btrfs_root_refs(&root->root_item) == 0) {
ret = 1;
goto end_no_trans;
}
ret = check_parent_dirs_for_sync(trans, inode, parent, sb,
last_committed);
ret = check_parent_dirs_for_sync(trans, inode, parent, sb);
if (ret)
goto end_no_trans;
@ -6075,8 +6087,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
* and other fun in this file.
*/
if (S_ISREG(inode->vfs_inode.i_mode) &&
inode->generation <= last_committed &&
inode->last_unlink_trans <= last_committed) {
inode->generation < trans->transid &&
inode->last_unlink_trans < trans->transid) {
ret = 0;
goto end_trans;
}
@ -6125,7 +6137,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
* but the file inode does not have a matching BTRFS_INODE_REF_KEY item
* and has a link count of 2.
*/
if (inode->last_unlink_trans > last_committed) {
if (inode->last_unlink_trans >= trans->transid) {
ret = btrfs_log_all_parents(trans, inode, ctx);
if (ret)
goto end_trans;
@ -6434,7 +6446,6 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, struct btrfs_inode *old_dir,
struct dentry *parent)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_log_ctx ctx;
/*
@ -6448,8 +6459,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* if this inode hasn't been logged and directory we're renaming it
* from hasn't been logged, we don't need to log it
*/
if (inode->logged_trans <= fs_info->last_trans_committed &&
(!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
if (inode->logged_trans < trans->transid &&
(!old_dir || old_dir->logged_trans < trans->transid))
return;
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);

View File

@ -129,8 +129,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
} else {
btrfs_warn(fs_info,
"insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!",
ret, (unsigned long long)key.objectid,
(unsigned long long)key.offset, type);
ret, key.objectid, key.offset, type);
goto out;
}

View File

@ -31,6 +31,7 @@
#include "space-info.h"
#include "block-group.h"
#include "discard.h"
#include "zoned.h"
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = {
@ -374,6 +375,7 @@ void btrfs_free_device(struct btrfs_device *device)
rcu_string_free(device->name);
extent_io_tree_release(&device->alloc_state);
bio_put(device->flush_bio);
btrfs_destroy_dev_zone_info(device);
kfree(device);
}
@ -667,6 +669,10 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
device->mode = flags;
ret = btrfs_get_dev_zone_info(device);
if (ret != 0)
goto error_free_page;
fs_devices->open_devices++;
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
device->devid != BTRFS_DEV_REPLACE_DEVID) {
@ -822,7 +828,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
} else {
mutex_lock(&fs_devices->device_list_mutex);
device = btrfs_find_device(fs_devices, devid,
disk_super->dev_item.uuid, NULL, false);
disk_super->dev_item.uuid, NULL);
/*
* If this disk has been pulled into an fs devices created by
@ -1044,7 +1050,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
}
static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
int step, struct btrfs_device **latest_dev)
struct btrfs_device **latest_dev)
{
struct btrfs_device *device, *next;
@ -1089,16 +1095,16 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices,
* After we have read the system tree and know devids belonging to this
* filesystem, remove the device which does not belong there.
*/
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step)
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *latest_dev = NULL;
struct btrfs_fs_devices *seed_dev;
mutex_lock(&uuid_mutex);
__btrfs_free_extra_devids(fs_devices, step, &latest_dev);
__btrfs_free_extra_devids(fs_devices, &latest_dev);
list_for_each_entry(seed_dev, &fs_devices->seed_list, seed_list)
__btrfs_free_extra_devids(seed_dev, step, &latest_dev);
__btrfs_free_extra_devids(seed_dev, &latest_dev);
fs_devices->latest_bdev = latest_dev->bdev;
@ -1137,6 +1143,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
device->bdev = NULL;
}
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
btrfs_destroy_dev_zone_info(device);
device->fs_info = NULL;
atomic_set(&device->dev_stats_ccnt, 0);
@ -1217,6 +1224,7 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices,
fs_devices->latest_bdev = latest_dev->bdev;
fs_devices->total_rw_bytes = 0;
fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_REGULAR;
fs_devices->read_policy = BTRFS_READ_POLICY_PID;
return 0;
}
@ -1268,7 +1276,7 @@ void btrfs_release_disk_super(struct btrfs_super_block *super)
}
static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
u64 bytenr)
u64 bytenr, u64 bytenr_orig)
{
struct btrfs_super_block *disk_super;
struct page *page;
@ -1299,7 +1307,7 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
/* align our pointer to the offset of the super block */
disk_super = p + offset_in_page(bytenr);
if (btrfs_super_bytenr(disk_super) != bytenr ||
if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
btrfs_release_disk_super(p);
return ERR_PTR(-EINVAL);
@ -1334,7 +1342,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
bool new_device_added = false;
struct btrfs_device *device = NULL;
struct block_device *bdev;
u64 bytenr;
u64 bytenr, bytenr_orig;
int ret;
lockdep_assert_held(&uuid_mutex);
@ -1344,14 +1353,18 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
* So, we need to add a special mount option to scan for
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
*/
bytenr = btrfs_sb_offset(0);
flags |= FMODE_EXCL;
bdev = blkdev_get_by_path(path, flags, holder);
if (IS_ERR(bdev))
return ERR_CAST(bdev);
disk_super = btrfs_read_disk_super(bdev, bytenr);
bytenr_orig = btrfs_sb_offset(0);
ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
if (ret)
return ERR_PTR(ret);
disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
if (IS_ERR(disk_super)) {
device = ERR_CAST(disk_super);
goto error_bdev_put;
@ -2015,6 +2028,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
if (IS_ERR(disk_super))
continue;
if (bdev_is_zoned(bdev)) {
btrfs_reset_sb_log_zones(bdev, copy_num);
continue;
}
memset(&disk_super->magic, 0, sizeof(disk_super->magic));
page = virt_to_page(disk_super);
@ -2293,10 +2311,10 @@ static struct btrfs_device *btrfs_find_device_by_path(
dev_uuid = disk_super->dev_item.uuid;
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
disk_super->metadata_uuid, true);
disk_super->metadata_uuid);
else
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
disk_super->fsid, true);
disk_super->fsid);
btrfs_release_disk_super(disk_super);
if (!device)
@ -2316,7 +2334,7 @@ struct btrfs_device *btrfs_find_device_by_devspec(
if (devid) {
device = btrfs_find_device(fs_info->fs_devices, devid, NULL,
NULL, true);
NULL);
if (!device)
return ERR_PTR(-ENOENT);
return device;
@ -2465,7 +2483,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
BTRFS_FSID_SIZE);
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
fs_uuid, true);
fs_uuid);
BUG_ON(!device); /* Logic error */
if (device->fs_devices->seeding) {
@ -2507,6 +2525,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
if (IS_ERR(bdev))
return PTR_ERR(bdev);
if (!btrfs_check_device_zone_type(fs_info, bdev)) {
ret = -EINVAL;
goto error;
}
if (fs_devices->seeding) {
seeding_dev = 1;
down_write(&sb->s_umount);
@ -2540,10 +2563,17 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
}
rcu_assign_pointer(device->name, name);
device->fs_info = fs_info;
device->bdev = bdev;
ret = btrfs_get_dev_zone_info(device);
if (ret)
goto error_free_device;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto error_free_device;
goto error_free_zone;
}
q = bdev_get_queue(bdev);
@ -2556,8 +2586,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
fs_info->sectorsize);
device->disk_total_bytes = device->total_bytes;
device->commit_total_bytes = device->total_bytes;
device->fs_info = fs_info;
device->bdev = bdev;
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
device->mode = FMODE_EXCL;
@ -2704,6 +2732,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
sb->s_flags |= SB_RDONLY;
if (trans)
btrfs_end_transaction(trans);
error_free_zone:
btrfs_destroy_dev_zone_info(device);
error_free_device:
btrfs_free_device(device);
error:
@ -5479,7 +5509,18 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
else
num_stripes = map->num_stripes;
preferred_mirror = first + current->pid % num_stripes;
switch (fs_info->fs_devices->read_policy) {
default:
/* Shouldn't happen, just warn and use pid instead of failing */
btrfs_warn_rl(fs_info,
"unknown read_policy type %u, reset to pid",
fs_info->fs_devices->read_policy);
fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID;
fallthrough;
case BTRFS_READ_POLICY_PID:
preferred_mirror = first + (current->pid % num_stripes);
break;
}
if (dev_replace_is_ongoing &&
fs_info->dev_replace.cont_reading_from_srcdev_mode ==
@ -6335,7 +6376,7 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
bio->bi_iter.bi_sector = physical >> 9;
btrfs_debug_in_rcu(fs_info,
"btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
bio_op(bio), bio->bi_opf, (u64)bio->bi_iter.bi_sector,
bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
(unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
dev->devid, bio->bi_iter.bi_size);
bio_set_dev(bio, dev->bdev);
@ -6367,7 +6408,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
{
struct btrfs_device *dev;
struct bio *first_bio = bio;
u64 logical = (u64)bio->bi_iter.bi_sector << 9;
u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = 0;
u64 map_length;
int ret;
@ -6447,8 +6488,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
* If @seed is true, traverse through the seed devices.
*/
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
u64 devid, u8 *uuid, u8 *fsid,
bool seed)
u64 devid, u8 *uuid, u8 *fsid)
{
struct btrfs_device *device;
struct btrfs_fs_devices *seed_devs;
@ -6655,7 +6695,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
btrfs_stripe_dev_uuid_nr(chunk, i),
BTRFS_UUID_SIZE);
map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices,
devid, uuid, NULL, true);
devid, uuid, NULL);
if (!map->stripes[i].dev &&
!btrfs_test_opt(fs_info, DEGRADED)) {
free_extent_map(em);
@ -6794,7 +6834,7 @@ static int read_one_dev(struct extent_buffer *leaf,
}
device = btrfs_find_device(fs_info->fs_devices, devid, dev_uuid,
fs_uuid, true);
fs_uuid);
if (!device) {
if (!btrfs_test_opt(fs_info, DEGRADED)) {
btrfs_report_missing_device(fs_info, devid,
@ -6857,6 +6897,16 @@ static int read_one_dev(struct extent_buffer *leaf,
}
fill_device_from_item(leaf, dev_item, device);
if (device->bdev) {
u64 max_total_bytes = i_size_read(device->bdev->bd_inode);
if (device->total_bytes > max_total_bytes) {
btrfs_err(fs_info,
"device total_bytes should be at most %llu but found %llu",
max_total_bytes, device->total_bytes);
return -EINVAL;
}
}
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
@ -6891,11 +6941,11 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
* fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
* overallocate but we can keep it as-is, only the first page is used.
*/
sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET);
sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET,
root->root_key.objectid, 0);
if (IS_ERR(sb))
return PTR_ERR(sb);
set_extent_buffer_uptodate(sb);
btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
/*
* The sb extent buffer is artificial and just used to read the system array.
* set_extent_buffer_uptodate() call does not properly mark all it's
@ -7059,12 +7109,8 @@ static void readahead_tree_node_children(struct extent_buffer *node)
int i;
const int nr_items = btrfs_header_nritems(node);
for (i = 0; i < nr_items; i++) {
u64 start;
start = btrfs_node_blockptr(node, i);
readahead_tree_block(node->fs_info, start);
}
for (i = 0; i < nr_items; i++)
btrfs_readahead_node_child(node, i);
}
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
@ -7451,8 +7497,7 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
int i;
mutex_lock(&fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL,
true);
dev = btrfs_find_device(fs_info->fs_devices, stats->devid, NULL, NULL);
mutex_unlock(&fs_devices->device_list_mutex);
if (!dev) {
@ -7583,28 +7628,13 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
}
/* Make sure no dev extent is beyond device bondary */
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true);
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
if (!dev) {
btrfs_err(fs_info, "failed to find devid %llu", devid);
ret = -EUCLEAN;
goto out;
}
/* It's possible this device is a dummy for seed device */
if (dev->disk_total_bytes == 0) {
struct btrfs_fs_devices *devs;
devs = list_first_entry(&fs_info->fs_devices->seed_list,
struct btrfs_fs_devices, seed_list);
dev = btrfs_find_device(devs, devid, NULL, NULL, false);
if (!dev) {
btrfs_err(fs_info, "failed to find seed devid %llu",
devid);
ret = -EUCLEAN;
goto out;
}
}
if (physical_offset + physical_len > dev->disk_total_bytes) {
btrfs_err(fs_info,
"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
@ -7659,6 +7689,19 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
u64 prev_dev_ext_end = 0;
int ret = 0;
/*
* We don't have a dev_root because we mounted with ignorebadroots and
* failed to load the root, so we want to skip the verification in this
* case for sure.
*
* However if the dev root is fine, but the tree itself is corrupted
* we'd still fail to mount. This verification is only to make sure
* writes can happen safely, so instead just bypass this check
* completely in the case of IGNOREBADROOTS.
*/
if (btrfs_test_opt(fs_info, IGNOREBADROOTS))
return 0;
key.objectid = 1;
key.type = BTRFS_DEV_EXTENT_KEY;
key.offset = 0;

View File

@ -52,6 +52,8 @@ struct btrfs_io_geometry {
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
#define BTRFS_DEV_STATE_NO_READA (5)
struct btrfs_zoned_device_info;
struct btrfs_device {
struct list_head dev_list; /* device_list_mutex */
struct list_head dev_alloc_list; /* chunk mutex */
@ -65,6 +67,8 @@ struct btrfs_device {
struct block_device *bdev;
struct btrfs_zoned_device_info *zone_info;
/* the mode sent to blkdev_get */
fmode_t mode;
@ -211,6 +215,16 @@ enum btrfs_chunk_allocation_policy {
BTRFS_CHUNK_ALLOC_REGULAR,
};
/*
* Read policies for mirrored block group profiles, read picks the stripe based
* on these policies.
*/
enum btrfs_read_policy {
/* Use process PID to choose the stripe */
BTRFS_READ_POLICY_PID,
BTRFS_NR_READ_POLICY,
};
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
u8 metadata_uuid[BTRFS_FSID_SIZE];
@ -264,6 +278,9 @@ struct btrfs_fs_devices {
struct completion kobj_unregister;
enum btrfs_chunk_allocation_policy chunk_alloc_policy;
/* Policy used to read the mirrored stripes */
enum btrfs_read_policy read_policy;
};
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
@ -436,7 +453,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path,
fmode_t flags, void *holder);
int btrfs_forget_devices(const char *path);
void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
void btrfs_assign_next_active_device(struct btrfs_device *device,
struct btrfs_device *this_dev);
struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
@ -453,7 +470,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
int btrfs_grow_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 new_size);
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
u64 devid, u8 *uuid, u8 *fsid, bool seed);
u64 devid, u8 *uuid, u8 *fsid);
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
int btrfs_balance(struct btrfs_fs_info *fs_info,

View File

@ -213,9 +213,11 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
}
out:
btrfs_free_path(path);
if (!ret)
if (!ret) {
set_bit(BTRFS_INODE_COPY_EVERYTHING,
&BTRFS_I(inode)->runtime_flags);
clear_bit(BTRFS_INODE_NO_XATTRS, &BTRFS_I(inode)->runtime_flags);
}
return ret;
}
@ -239,7 +241,7 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
BUG_ON(ret);
out:
btrfs_end_transaction(trans);
@ -391,7 +393,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
if (!ret) {
inode_inc_iversion(inode);
inode->i_ctime = current_time(inode);
ret = btrfs_update_inode(trans, root, inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
BUG_ON(ret);
}

Some files were not shown because too many files have changed in this diff Show More